Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author(no author) <(no author)@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-15 09:41:21 +0400
committer(no author) <(no author)@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-15 09:41:21 +0400
commitfebe2e6dfa22ded9ebc50c3490dbcdf979cb41c7 (patch)
tree14c1cd15f468e98b68ac33ba8eb15b24ab479733
parent467fc4c97c9163b22d0fb27a2c2f1806bee90718 (diff)
This commit was manufactured by cvs2svn to create tagRoot_HIEUS_BRANCH_2006_08_15
'Root_HIEUS_BRANCH_2006_08_15'. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/tags/Root_HIEUS_BRANCH_2006_08_15@752 1f5c12ca-751b-0410-a591-d2e778427230
-rw-r--r--CVSROOT/checkoutlist13
-rw-r--r--CVSROOT/commitinfo15
-rw-r--r--CVSROOT/config21
-rw-r--r--CVSROOT/cvswrappers19
-rw-r--r--CVSROOT/editinfo21
-rw-r--r--CVSROOT/loginfo37
-rw-r--r--CVSROOT/modules26
-rw-r--r--CVSROOT/notify12
-rw-r--r--CVSROOT/rcsinfo13
-rw-r--r--CVSROOT/taginfo20
-rw-r--r--CVSROOT/verifymsg21
-rw-r--r--irstlm/.cdtbuild33
-rw-r--r--irstlm/.cdtproject15
-rw-r--r--irstlm/.cvsignore6
-rw-r--r--irstlm/.project19
-rw-r--r--irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs12
-rw-r--r--irstlm/Makefile.am5
-rw-r--r--irstlm/README14
-rw-r--r--irstlm/config.h.in22
-rw-r--r--irstlm/configure.in11
-rwxr-xr-xirstlm/depcomp522
-rwxr-xr-xirstlm/install-sh322
-rw-r--r--irstlm/irstlm.vcproj255
-rwxr-xr-xirstlm/missing353
-rw-r--r--irstlm/src/Makefile.am20
-rw-r--r--irstlm/src/cmd.c661
-rw-r--r--irstlm/src/cmd.h68
-rw-r--r--irstlm/src/compile-lm.cpp189
-rw-r--r--irstlm/src/dictionary.cpp418
-rw-r--r--irstlm/src/dictionary.h186
-rw-r--r--irstlm/src/htable.cpp329
-rw-r--r--irstlm/src/htable.h130
-rw-r--r--irstlm/src/index.h19
-rw-r--r--irstlm/src/lmtable.cpp1198
-rw-r--r--irstlm/src/lmtable.h365
-rw-r--r--irstlm/src/mempool.cpp496
-rw-r--r--irstlm/src/mempool.h172
-rw-r--r--irstlm/src/n_gram.cpp214
-rw-r--r--irstlm/src/n_gram.h117
-rw-r--r--irstlm/src/ngramcache.cpp85
-rw-r--r--irstlm/src/ngramcache.h50
-rw-r--r--irstlm/src/quantize-lm.cpp388
-rw-r--r--misc/.project11
-rw-r--r--misc/GenerateTuples.cpp294
-rw-r--r--misc/GenerateTuples.h12
-rw-r--r--misc/Makefile26
-rw-r--r--misc/java-utils/.classpath6
-rw-r--r--misc/java-utils/.cvsignore1
-rw-r--r--misc/java-utils/.project17
-rw-r--r--misc/java-utils/CombineTags.java89
-rw-r--r--misc/java-utils/ProcessShallowParse.java82
-rw-r--r--misc/java-utils/ShrinkSentence.java48
-rw-r--r--misc/java-utils/TagHierarchy.java135
-rw-r--r--misc/misc.vcproj174
-rw-r--r--misc/processPhraseTable.cpp193
-rw-r--r--moses-cmd/.cdtbuild140
-rw-r--r--moses-cmd/.cdtproject15
-rw-r--r--moses-cmd/.cvsignore11
-rw-r--r--moses-cmd/.project21
-rw-r--r--moses-cmd/.settings/org.eclipse.cdt.managedbuilder.core.prefs16
-rw-r--r--moses-cmd/Makefile.am5
-rw-r--r--moses-cmd/Makefile.in574
-rw-r--r--moses-cmd/aclocal.m41044
-rw-r--r--moses-cmd/acsite.m43
-rw-r--r--moses-cmd/config.h.in58
-rw-r--r--moses-cmd/config.in93
-rw-r--r--moses-cmd/config/mysql++.m4130
-rw-r--r--moses-cmd/config/mysql-client.m4133
-rwxr-xr-xmoses-cmd/configure5846
-rw-r--r--moses-cmd/configure.in129
-rwxr-xr-xmoses-cmd/depcomp441
-rwxr-xr-xmoses-cmd/install-sh276
-rwxr-xr-xmoses-cmd/missing336
-rw-r--r--moses-cmd/moses-cmd.vcproj231
-rwxr-xr-xmoses-cmd/regenerate-makefiles.sh12
-rwxr-xr-xmoses-cmd/src/IOCommandLine.cpp232
-rwxr-xr-xmoses-cmd/src/IOCommandLine.h90
-rwxr-xr-xmoses-cmd/src/IOFile.cpp66
-rwxr-xr-xmoses-cmd/src/IOFile.h60
-rw-r--r--moses-cmd/src/Main.cpp202
-rw-r--r--moses-cmd/src/Main.h42
-rw-r--r--moses-cmd/src/Makefile419
-rw-r--r--moses-cmd/src/Makefile.am11
-rw-r--r--moses-cmd/src/Makefile.in419
-rw-r--r--moses-cmd/src/TranslationAnalysis.cpp115
-rw-r--r--moses-cmd/src/TranslationAnalysis.h25
-rw-r--r--moses/configure.in2
-rw-r--r--regression-testing/.project11
-rw-r--r--regression-testing/MosesRegressionTesting.pm75
-rwxr-xr-xregression-testing/compare-results.pl82
-rwxr-xr-xregression-testing/run-single-test.pl135
-rwxr-xr-xregression-testing/run-test-suite93
-rwxr-xr-xregression-testing/tests/basic-surface-binptable/filter-stderr22
-rwxr-xr-xregression-testing/tests/basic-surface-binptable/filter-stdout7
-rw-r--r--regression-testing/tests/basic-surface-binptable/moses.ini52
-rw-r--r--regression-testing/tests/basic-surface-binptable/to-translate5
-rw-r--r--regression-testing/tests/basic-surface-binptable/truth/results.dat13
-rwxr-xr-xregression-testing/tests/basic-surface-only/filter-stderr22
-rwxr-xr-xregression-testing/tests/basic-surface-only/filter-stdout7
-rw-r--r--regression-testing/tests/basic-surface-only/moses.ini45
-rw-r--r--regression-testing/tests/basic-surface-only/to-translate5
-rw-r--r--regression-testing/tests/basic-surface-only/truth/results.dat13
-rwxr-xr-xregression-testing/tests/confusionNet-surface-only/filter-stderr22
-rwxr-xr-xregression-testing/tests/confusionNet-surface-only/filter-stdout7
-rw-r--r--regression-testing/tests/confusionNet-surface-only/moses.ini56
-rw-r--r--regression-testing/tests/confusionNet-surface-only/to-translate15
-rw-r--r--regression-testing/tests/confusionNet-surface-only/to-translate.new10
-rw-r--r--regression-testing/tests/confusionNet-surface-only/to-translate.orig10
-rw-r--r--regression-testing/tests/confusionNet-surface-only/truth/results.dat9
-rwxr-xr-xregression-testing/tests/multi-factor-binptable/filter-stderr22
-rwxr-xr-xregression-testing/tests/multi-factor-binptable/filter-stdout7
-rw-r--r--regression-testing/tests/multi-factor-binptable/moses.ini63
-rw-r--r--regression-testing/tests/multi-factor-binptable/to-translate1
-rw-r--r--regression-testing/tests/multi-factor-binptable/truth/results.dat5
-rwxr-xr-xregression-testing/tests/multi-factor-drop/dropize_phrase_table.pl29
-rwxr-xr-xregression-testing/tests/multi-factor-drop/filter-stderr22
-rwxr-xr-xregression-testing/tests/multi-factor-drop/filter-stdout7
-rw-r--r--regression-testing/tests/multi-factor-drop/moses.ini71
-rw-r--r--regression-testing/tests/multi-factor-drop/to-translate1
-rw-r--r--regression-testing/tests/multi-factor-drop/truth/results.dat5
-rwxr-xr-xregression-testing/tests/multi-factor/filter-stderr22
-rwxr-xr-xregression-testing/tests/multi-factor/filter-stdout7
-rw-r--r--regression-testing/tests/multi-factor/moses.ini63
-rw-r--r--regression-testing/tests/multi-factor/moses2.ini63
-rw-r--r--regression-testing/tests/multi-factor/to-translate1
-rw-r--r--regression-testing/tests/multi-factor/truth/results.dat5
-rw-r--r--regression-testing/tests/perllib/RegTestUtils.pm31
-rwxr-xr-xregression-testing/tests/ptable-filtering/filter-stderr29
-rwxr-xr-xregression-testing/tests/ptable-filtering/filter-stdout2
-rw-r--r--regression-testing/tests/ptable-filtering/moses.ini59
-rw-r--r--regression-testing/tests/ptable-filtering/to-translate1
-rw-r--r--regression-testing/tests/ptable-filtering/truth/results.dat22
-rw-r--r--scripts/.cvsignore1
-rw-r--r--scripts/Makefile147
-rw-r--r--scripts/README15
-rw-r--r--scripts/analysis/README6
-rwxr-xr-xscripts/analysis/nontranslated_words.pl89
-rw-r--r--scripts/analysis/perllib/Error.pm744
-rwxr-xr-xscripts/analysis/sentence-by-sentence.pl447
-rw-r--r--scripts/analysis/smtgui/Corpus.pm1311
-rw-r--r--scripts/analysis/smtgui/README42
-rw-r--r--scripts/analysis/smtgui/file-descriptions4
-rw-r--r--scripts/analysis/smtgui/file-factors8
-rw-r--r--scripts/analysis/smtgui/filter-phrase-table.pl83
-rwxr-xr-xscripts/analysis/smtgui/newsmtgui.cgi996
-rwxr-xr-xscripts/generic/extract-factors.pl19
-rwxr-xr-xscripts/generic/lopar2pos.pl14
-rwxr-xr-xscripts/generic/moses-parallel.pl439
-rwxr-xr-xscripts/generic/multi-bleu.perl121
-rwxr-xr-xscripts/generic/qsub-wrapper.pl200
-rw-r--r--scripts/released-files37
-rw-r--r--scripts/tests/README28
-rw-r--r--scripts/tests/epps-sample/epps.en100
-rw-r--r--scripts/tests/epps-sample/epps.es100
-rw-r--r--scripts/tests/epps-sample/giza.en-es/en-es.A3.final.gzbin17049 -> 0 bytes
-rw-r--r--scripts/tests/epps-sample/giza.es-en/es-en.A3.final.gzbin17299 -> 0 bytes
-rwxr-xr-xscripts/tests/mert-moses-parallel.test24
-rwxr-xr-xscripts/tests/mert-moses-serial.test20
-rwxr-xr-xscripts/tests/train-factored-test-step3.test23
-rwxr-xr-xscripts/tests/train-factored-test-step9.test26
-rwxr-xr-xscripts/training/absolutize_moses_model.pl85
-rwxr-xr-xscripts/training/analyse_moses_model.pl130
-rwxr-xr-xscripts/training/build-generation-table.perl115
-rwxr-xr-xscripts/training/clean-corpus-n.perl98
-rwxr-xr-xscripts/training/clone_moses_model.pl102
-rwxr-xr-xscripts/training/cmert-0.5/Makefile11
-rwxr-xr-xscripts/training/cmert-0.5/README9
-rwxr-xr-xscripts/training/cmert-0.5/bleu.py178
-rwxr-xr-xscripts/training/cmert-0.5/data.c92
-rwxr-xr-xscripts/training/cmert-0.5/data.h17
-rwxr-xr-xscripts/training/cmert-0.5/dataset.py391
-rwxr-xr-xscripts/training/cmert-0.5/log.py17
-rwxr-xr-xscripts/training/cmert-0.5/makeinitopt27
-rwxr-xr-xscripts/training/cmert-0.5/mert-driver81
-rwxr-xr-xscripts/training/cmert-0.5/mert.c430
-rwxr-xr-xscripts/training/cmert-0.5/point.c116
-rwxr-xr-xscripts/training/cmert-0.5/point.h25
-rw-r--r--scripts/training/cmert-0.5/python/psyco/__init__.py57
-rwxr-xr-xscripts/training/cmert-0.5/python/psyco/_psyco.sobin839774 -> 0 bytes
-rw-r--r--scripts/training/cmert-0.5/python/psyco/classes.py53
-rw-r--r--scripts/training/cmert-0.5/python/psyco/core.py232
-rw-r--r--scripts/training/cmert-0.5/python/psyco/kdictproxy.py133
-rw-r--r--scripts/training/cmert-0.5/python/psyco/logger.py90
-rw-r--r--scripts/training/cmert-0.5/python/psyco/profiler.py388
-rw-r--r--scripts/training/cmert-0.5/python/psyco/support.py196
-rwxr-xr-xscripts/training/cmert-0.5/run-cmert8
-rwxr-xr-xscripts/training/cmert-0.5/score-nbest.py100
-rwxr-xr-xscripts/training/cmert-0.5/score.c33
-rwxr-xr-xscripts/training/cmert-0.5/score.h9
-rwxr-xr-xscripts/training/filter-model-given-input.pl206
-rwxr-xr-xscripts/training/mert-moses.pl956
-rw-r--r--scripts/training/phrase-extract/Makefile5
-rw-r--r--scripts/training/phrase-extract/extract.cpp286
-rw-r--r--scripts/training/phrase-extract/score.cpp323
-rw-r--r--scripts/training/phrase-extract/tables-core.cpp102
-rw-r--r--scripts/training/phrase-extract/tables-core.h58
-rwxr-xr-xscripts/training/postprocess-lopar.perl92
-rwxr-xr-xscripts/training/reduce_combine.pl93
-rw-r--r--scripts/training/symal/Makefile13
-rw-r--r--scripts/training/symal/cmd.c642
-rw-r--r--scripts/training/symal/cmd.h49
-rwxr-xr-xscripts/training/symal/giza2bal.pl96
-rw-r--r--scripts/training/symal/symal.cpp394
-rwxr-xr-xscripts/training/train-factored-phrase-model.perl1397
204 files changed, 1 insertions, 33429 deletions
diff --git a/CVSROOT/checkoutlist b/CVSROOT/checkoutlist
deleted file mode 100644
index 2921bffcd..000000000
--- a/CVSROOT/checkoutlist
+++ /dev/null
@@ -1,13 +0,0 @@
-# The "checkoutlist" file is used to support additional version controlled
-# administrative files in $CVSROOT/CVSROOT, such as template files.
-#
-# The first entry on a line is a filename which will be checked out from
-# the corresponding RCS file in the $CVSROOT/CVSROOT directory.
-# The remainder of the line is an error message to use if the file cannot
-# be checked out.
-#
-# File format:
-#
-# [<whitespace>]<filename>[<whitespace><error message>]<end-of-line>
-#
-# comment lines begin with '#'
diff --git a/CVSROOT/commitinfo b/CVSROOT/commitinfo
deleted file mode 100644
index b19e7b7a6..000000000
--- a/CVSROOT/commitinfo
+++ /dev/null
@@ -1,15 +0,0 @@
-# The "commitinfo" file is used to control pre-commit checks.
-# The filter on the right is invoked with the repository and a list
-# of files to check. A non-zero exit of the filter program will
-# cause the commit to be aborted.
-#
-# The first entry on a line is a regular expression which is tested
-# against the directory that the change is being committed to, relative
-# to the $CVSROOT. For the first match that is found, then the remainder
-# of the line is the name of the filter to run.
-#
-# If the repository name does not match any of the regular expressions in this
-# file, the "DEFAULT" line is used, if it is specified.
-#
-# If the name "ALL" appears as a regular expression it is always used
-# in addition to the first matching regex or "DEFAULT".
diff --git a/CVSROOT/config b/CVSROOT/config
deleted file mode 100644
index 92c150bf7..000000000
--- a/CVSROOT/config
+++ /dev/null
@@ -1,21 +0,0 @@
-# Set this to "no" if pserver shouldn't check system users/passwords
-#SystemAuth=no
-
-# Put CVS lock files in this directory rather than directly in the repository.
-#LockDir=/var/lock/cvs
-
-# Set `TopLevelAdmin' to `yes' to create a CVS directory at the top
-# level of the new working directory when using the `cvs checkout'
-# command.
-#TopLevelAdmin=no
-
-# Set `LogHistory' to `all' or `TOEFWUPCGMAR' to log all transactions to the
-# history file, or a subset as needed (ie `TMAR' logs all write operations)
-#LogHistory=TOEFWUPCGMAR
-
-# Set `RereadLogAfterVerify' to `always' (the default) to allow the verifymsg
-# script to change the log message. Set it to `stat' to force CVS to verify# that the file has changed before reading it (this can take up to an extra
-# second per directory being committed, so it is not recommended for large
-# repositories. Set it to `never' (the previous CVS behavior) to prevent
-# verifymsg scripts from changing the log message.
-#RereadLogAfterVerify=always
diff --git a/CVSROOT/cvswrappers b/CVSROOT/cvswrappers
deleted file mode 100644
index e989b7545..000000000
--- a/CVSROOT/cvswrappers
+++ /dev/null
@@ -1,19 +0,0 @@
-# This file affects handling of files based on their names.
-#
-# The -m option specifies whether CVS attempts to merge files.
-#
-# The -k option specifies keyword expansion (e.g. -kb for binary).
-#
-# Format of wrapper file ($CVSROOT/CVSROOT/cvswrappers or .cvswrappers)
-#
-# wildcard [option value][option value]...
-#
-# where option is one of
-# -f from cvs filter value: path to filter
-# -t to cvs filter value: path to filter
-# -m update methodology value: MERGE or COPY
-# -k expansion mode value: b, o, kkv, &c
-#
-# and value is a single-quote delimited value.
-# For example:
-#*.gif -k 'b'
diff --git a/CVSROOT/editinfo b/CVSROOT/editinfo
deleted file mode 100644
index d78886c15..000000000
--- a/CVSROOT/editinfo
+++ /dev/null
@@ -1,21 +0,0 @@
-# The "editinfo" file is used to allow verification of logging
-# information. It works best when a template (as specified in the
-# rcsinfo file) is provided for the logging procedure. Given a
-# template with locations for, a bug-id number, a list of people who
-# reviewed the code before it can be checked in, and an external
-# process to catalog the differences that were code reviewed, the
-# following test can be applied to the code:
-#
-# Making sure that the entered bug-id number is correct.
-# Validating that the code that was reviewed is indeed the code being
-# checked in (using the bug-id number or a seperate review
-# number to identify this particular code set.).
-#
-# If any of the above test failed, then the commit would be aborted.
-#
-# Actions such as mailing a copy of the report to each reviewer are
-# better handled by an entry in the loginfo file.
-#
-# One thing that should be noted is the the ALL keyword is not
-# supported. There can be only one entry that matches a given
-# repository.
diff --git a/CVSROOT/loginfo b/CVSROOT/loginfo
deleted file mode 100644
index 84be834f3..000000000
--- a/CVSROOT/loginfo
+++ /dev/null
@@ -1,37 +0,0 @@
-# The "loginfo" file controls where "cvs commit" log information
-# is sent. The first entry on a line is a regular expression which must match
-# the directory that the change is being made to, relative to the
-# $CVSROOT. If a match is found, then the remainder of the line is a filter
-# program that should expect log information on its standard input.
-#
-# If the repository name does not match any of the regular expressions in this
-# file, the "DEFAULT" line is used, if it is specified.
-#
-# If the name ALL appears as a regular expression it is always used
-# in addition to the first matching regex or DEFAULT.
-#
-# You may specify a format string as part of the
-# filter. The string is composed of a `%' followed
-# by a single format character, or followed by a set of format
-# characters surrounded by `{' and `}' as separators. The format
-# characters are:
-#
-# s = file name
-# V = old version number (pre-checkin)
-# v = new version number (post-checkin)
-# t = tag or branch name
-#
-# For example:
-#DEFAULT (echo ""; id; echo %s; date; cat) >> $CVSROOT/CVSROOT/commitlog
-# or
-#DEFAULT (echo ""; id; echo %{sVv}; date; cat) >> $CVSROOT/CVSROOT/commitlog
-
-# This line sends all changes to the CVSROOT module to the user specified
-# by USERNAME. It is recommended that someone be watching this module
-# as it shouldn't need to be modified very often.
-CVSROOT /cvsroot/sitedocs/CVSROOT/cvstools/syncmail %{sVv} redpony@gmail.com
-
-# This sends mail to a mailing list, defined by the PROJECTNAME-LISTNAME
-# value. Any changes to any modules in the project will thus generate an
-# email message to the mailing list specified.
-DEFAULT /cvsroot/sitedocs/CVSROOT/cvstools/syncmail %{sVv} redpony@gmail.com,brooke@csail.mit.edu,callison-burch@ed.ac.uk,hieu@hoang.co.uk,corbett@csail.mit.edu
diff --git a/CVSROOT/modules b/CVSROOT/modules
deleted file mode 100644
index cb9e9efc9..000000000
--- a/CVSROOT/modules
+++ /dev/null
@@ -1,26 +0,0 @@
-# Three different line formats are valid:
-# key -a aliases...
-# key [options] directory
-# key [options] directory files...
-#
-# Where "options" are composed of:
-# -i prog Run "prog" on "cvs commit" from top-level of module.
-# -o prog Run "prog" on "cvs checkout" of module.
-# -e prog Run "prog" on "cvs export" of module.
-# -t prog Run "prog" on "cvs rtag" of module.
-# -u prog Run "prog" on "cvs update" of module.
-# -d dir Place module in directory "dir" instead of module name.
-# -l Top-level directory only -- do not recurse.
-#
-# NOTE: If you change any of the "Run" options above, you'll have to
-# release and re-checkout any working directories of these modules.
-#
-# And "directory" is a path to a directory relative to $CVSROOT.
-#
-# The "-a" option specifies an alias. An alias is interpreted as if
-# everything on the right of the "-a" had been typed on the command line.
-#
-# You can encode a module within a module by using the special '&'
-# character to interpose another module into the current module. This
-# can be useful for creating a module that consists of many directories
-# spread out over the entire source repository.
diff --git a/CVSROOT/notify b/CVSROOT/notify
deleted file mode 100644
index 74ae6f9e9..000000000
--- a/CVSROOT/notify
+++ /dev/null
@@ -1,12 +0,0 @@
-# The "notify" file controls where notifications from watches set by
-# "cvs watch add" or "cvs edit" are sent. The first entry on a line is
-# a regular expression which is tested against the directory that the
-# change is being made to, relative to the $CVSROOT. If it matches,
-# then the remainder of the line is a filter program that should contain
-# one occurrence of %s for the user to notify, and information on its
-# standard input.
-#
-# "ALL" or "DEFAULT" can be used in place of the regular expression.
-#
-# For example:
-#ALL mail -s "CVS notification" %s
diff --git a/CVSROOT/rcsinfo b/CVSROOT/rcsinfo
deleted file mode 100644
index 49e59f4d0..000000000
--- a/CVSROOT/rcsinfo
+++ /dev/null
@@ -1,13 +0,0 @@
-# The "rcsinfo" file is used to control templates with which the editor
-# is invoked on commit and import.
-#
-# The first entry on a line is a regular expression which is tested
-# against the directory that the change is being made to, relative to the
-# $CVSROOT. For the first match that is found, then the remainder of the
-# line is the name of the file that contains the template.
-#
-# If the repository name does not match any of the regular expressions in this
-# file, the "DEFAULT" line is used, if it is specified.
-#
-# If the name "ALL" appears as a regular expression it is always used
-# in addition to the first matching regex or "DEFAULT".
diff --git a/CVSROOT/taginfo b/CVSROOT/taginfo
deleted file mode 100644
index 274a46dd5..000000000
--- a/CVSROOT/taginfo
+++ /dev/null
@@ -1,20 +0,0 @@
-# The "taginfo" file is used to control pre-tag checks.
-# The filter on the right is invoked with the following arguments:
-#
-# $1 -- tagname
-# $2 -- operation "add" for tag, "mov" for tag -F, and "del" for tag -d
-# $3 -- repository
-# $4-> file revision [file revision ...]
-#
-# A non-zero exit of the filter program will cause the tag to be aborted.
-#
-# The first entry on a line is a regular expression which is tested
-# against the directory that the change is being committed to, relative
-# to the $CVSROOT. For the first match that is found, then the remainder
-# of the line is the name of the filter to run.
-#
-# If the repository name does not match any of the regular expressions in this
-# file, the "DEFAULT" line is used, if it is specified.
-#
-# If the name "ALL" appears as a regular expression it is always used
-# in addition to the first matching regex or "DEFAULT".
diff --git a/CVSROOT/verifymsg b/CVSROOT/verifymsg
deleted file mode 100644
index 86f747ce2..000000000
--- a/CVSROOT/verifymsg
+++ /dev/null
@@ -1,21 +0,0 @@
-# The "verifymsg" file is used to allow verification of logging
-# information. It works best when a template (as specified in the
-# rcsinfo file) is provided for the logging procedure. Given a
-# template with locations for, a bug-id number, a list of people who
-# reviewed the code before it can be checked in, and an external
-# process to catalog the differences that were code reviewed, the
-# following test can be applied to the code:
-#
-# Making sure that the entered bug-id number is correct.
-# Validating that the code that was reviewed is indeed the code being
-# checked in (using the bug-id number or a seperate review
-# number to identify this particular code set.).
-#
-# If any of the above test failed, then the commit would be aborted.
-#
-# Actions such as mailing a copy of the report to each reviewer are
-# better handled by an entry in the loginfo file.
-#
-# One thing that should be noted is the the ALL keyword is not
-# supported. There can be only one entry that matches a given
-# repository.
diff --git a/irstlm/.cdtbuild b/irstlm/.cdtbuild
deleted file mode 100644
index bb5ffe408..000000000
--- a/irstlm/.cdtbuild
+++ /dev/null
@@ -1,33 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?fileVersion 3.0.0?>
-
-<ManagedProjectBuildInfo>
-<project id="irstlm.cdt.managedbuild.target.gnu.lib.1070956508" name="Static Library (Gnu)" projectType="cdt.managedbuild.target.gnu.lib">
-<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.debug.8750958" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
-<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.1732402088" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.debug">
-<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.208381076" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.1534243185" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug">
-<option id="gnu.cpp.compiler.option.debugging.gprof.1713594612" superClass="gnu.cpp.compiler.option.debugging.gprof" value="true" valueType="boolean"/>
-</tool>
-<tool id="cdt.managedbuild.tool.gnu.archiver.lib.debug.1727542516" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.debug"/>
-<tool id="cdt.managedbuild.tool.gnu.assembler.lib.debug.1884793796" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.debug"/>
-<macros/>
-</toolChain>
-<resourceConfiguration exclude="true" id="cdt.managedbuild.config.gnu.lib.debug.8750958.1054827022" name="compile-lm.cpp" rcbsApplicability="disable" resourcePath="/irstlm/src/compile-lm.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.1534243185./irstlm/src/compile-lm.cpp">
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.1534243185./irstlm/src/compile-lm.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.1534243185"/>
-</resourceConfiguration>
-</configuration>
-<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.release.1538178030" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
-<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.508823597" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.release">
-<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.release.723647841" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.release"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.1586280207" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release"/>
-<tool id="cdt.managedbuild.tool.gnu.archiver.lib.release.1518934657" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.release"/>
-<tool id="cdt.managedbuild.tool.gnu.assembler.lib.release.1672118671" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.release"/>
-<macros/>
-</toolChain>
-<resourceConfiguration exclude="true" id="cdt.managedbuild.config.gnu.lib.release.1538178030.1743143035" name="compile-lm.cpp" rcbsApplicability="disable" resourcePath="/irstlm/src/compile-lm.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.1586280207./irstlm/src/compile-lm.cpp">
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.1586280207./irstlm/src/compile-lm.cpp" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.1586280207"/>
-</resourceConfiguration>
-</configuration>
-</project>
-</ManagedProjectBuildInfo>
diff --git a/irstlm/.cdtproject b/irstlm/.cdtproject
deleted file mode 100644
index 9d4253d9e..000000000
--- a/irstlm/.cdtproject
+++ /dev/null
@@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?eclipse-cdt version="2.0"?>
-
-<cdtproject id="org.eclipse.cdt.managedbuilder.core.managedMake">
-<extension id="org.eclipse.cdt.managedbuilder.core.ManagedBuildManager" point="org.eclipse.cdt.core.ScannerInfoProvider"/>
-<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-<extension id="org.eclipse.cdt.core.nullindexer" point="org.eclipse.cdt.core.CIndexer"/>
-<data>
-<item id="org.eclipse.cdt.core.pathentry">
-<pathentry kind="src" path=""/>
-<pathentry kind="out" path=""/>
-<pathentry kind="con" path="org.eclipse.cdt.managedbuilder.MANAGED_CONTAINER"/>
-</item>
-</data>
-</cdtproject>
diff --git a/irstlm/.cvsignore b/irstlm/.cvsignore
deleted file mode 100644
index 5a0739eb4..000000000
--- a/irstlm/.cvsignore
+++ /dev/null
@@ -1,6 +0,0 @@
-configure
-.cvsignore
-autom4te.cache
-aclocal.m4
-Debug*
-Release*
diff --git a/irstlm/.project b/irstlm/.project
deleted file mode 100644
index bb14e64fb..000000000
--- a/irstlm/.project
+++ /dev/null
@@ -1,19 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>irstlm</name>
- <comment></comment>
- <projects>
- </projects>
- <buildSpec>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
- <arguments>
- </arguments>
- </buildCommand>
- </buildSpec>
- <natures>
- <nature>org.eclipse.cdt.core.cnature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
- <nature>org.eclipse.cdt.core.ccnature</nature>
- </natures>
-</projectDescription>
diff --git a/irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs b/irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs
deleted file mode 100644
index 18032696b..000000000
--- a/irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs
+++ /dev/null
@@ -1,12 +0,0 @@
-#Thu Aug 10 18:12:03 EDT 2006
-=\=\=\=\=\=\=
-<<<<<<<=org.eclipse.cdt.managedbuilder.core.prefs
->>>>>>>=1.2
-eclipse.preferences.version=1
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.debug.1333974501=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.debug.8750958=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.release.1538178030=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.release.32089835=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/project=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
-environment/project/cdt.managedbuild.config.gnu.lib.debug.8750958=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
-environment/project/cdt.managedbuild.config.gnu.lib.release.1538178030=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
diff --git a/irstlm/Makefile.am b/irstlm/Makefile.am
deleted file mode 100644
index 4566b2680..000000000
--- a/irstlm/Makefile.am
+++ /dev/null
@@ -1,5 +0,0 @@
-# not a GNU package. You can remove this line, if
-# have all needed files, that a GNU package needs
-AUTOMAKE_OPTIONS = foreign
-SUBDIRS = src
-
diff --git a/irstlm/README b/irstlm/README
deleted file mode 100644
index 17a9b920f..000000000
--- a/irstlm/README
+++ /dev/null
@@ -1,14 +0,0 @@
-To build:
-
- aclocal
- autoconf
- automake
-
- ./configure --with-prefix=PATH TO INSTALL (probably `pwd`)
- make
- make install
-
-*Make install is important since it creates the include/ and lib/ directories
-that client software will depend on.
-
-
diff --git a/irstlm/config.h.in b/irstlm/config.h.in
deleted file mode 100644
index b292ea963..000000000
--- a/irstlm/config.h.in
+++ /dev/null
@@ -1,22 +0,0 @@
-/* config.h.in. Generated from configure.in by autoheader. */
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* Version number of package */
-#undef VERSION
diff --git a/irstlm/configure.in b/irstlm/configure.in
deleted file mode 100644
index c2ad8dda5..000000000
--- a/irstlm/configure.in
+++ /dev/null
@@ -1,11 +0,0 @@
-AC_INIT(src)
-
-AM_CONFIG_HEADER(config.h)
-AM_INIT_AUTOMAKE(irstlm, 1.0)
-
-AC_PROG_CXX
-AC_LANG_CPLUSPLUS
-AC_PROG_RANLIB
-#AM_PROG_LIBTOOL
-
-AC_OUTPUT(Makefile src/Makefile)
diff --git a/irstlm/depcomp b/irstlm/depcomp
deleted file mode 100755
index 11e2d3bfe..000000000
--- a/irstlm/depcomp
+++ /dev/null
@@ -1,522 +0,0 @@
-#! /bin/sh
-# depcomp - compile a program generating dependencies as side-effects
-
-scriptversion=2004-05-31.23
-
-# Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
-
-case $1 in
- '')
- echo "$0: No command. Try \`$0 --help' for more information." 1>&2
- exit 1;
- ;;
- -h | --h*)
- cat <<\EOF
-Usage: depcomp [--help] [--version] PROGRAM [ARGS]
-
-Run PROGRAMS ARGS to compile a file, generating dependencies
-as side-effects.
-
-Environment variables:
- depmode Dependency tracking mode.
- source Source file read by `PROGRAMS ARGS'.
- object Object file output by `PROGRAMS ARGS'.
- DEPDIR directory where to store dependencies.
- depfile Dependency file to output.
- tmpdepfile Temporary file to use when outputing dependencies.
- libtool Whether libtool is used (yes/no).
-
-Report bugs to <bug-automake@gnu.org>.
-EOF
- exit 0
- ;;
- -v | --v*)
- echo "depcomp $scriptversion"
- exit 0
- ;;
-esac
-
-if test -z "$depmode" || test -z "$source" || test -z "$object"; then
- echo "depcomp: Variables source, object and depmode must be set" 1>&2
- exit 1
-fi
-
-# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
-depfile=${depfile-`echo "$object" |
- sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
-tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
-
-rm -f "$tmpdepfile"
-
-# Some modes work just like other modes, but use different flags. We
-# parameterize here, but still list the modes in the big case below,
-# to make depend.m4 easier to write. Note that we *cannot* use a case
-# here, because this file can only contain one case statement.
-if test "$depmode" = hp; then
- # HP compiler uses -M and no extra arg.
- gccflag=-M
- depmode=gcc
-fi
-
-if test "$depmode" = dashXmstdout; then
- # This is just like dashmstdout with a different argument.
- dashmflag=-xM
- depmode=dashmstdout
-fi
-
-case "$depmode" in
-gcc3)
-## gcc 3 implements dependency tracking that does exactly what
-## we want. Yay! Note: for some reason libtool 1.4 doesn't like
-## it if -MD -MP comes after the -MF stuff. Hmm.
- "$@" -MT "$object" -MD -MP -MF "$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- mv "$tmpdepfile" "$depfile"
- ;;
-
-gcc)
-## There are various ways to get dependency output from gcc. Here's
-## why we pick this rather obscure method:
-## - Don't want to use -MD because we'd like the dependencies to end
-## up in a subdir. Having to rename by hand is ugly.
-## (We might end up doing this anyway to support other compilers.)
-## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
-## -MM, not -M (despite what the docs say).
-## - Using -M directly means running the compiler twice (even worse
-## than renaming).
- if test -z "$gccflag"; then
- gccflag=-MD,
- fi
- "$@" -Wp,"$gccflag$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
-## The second -e expression handles DOS-style file names with drive letters.
- sed -e 's/^[^:]*: / /' \
- -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
-## This next piece of magic avoids the `deleted header file' problem.
-## The problem is that when a header file which appears in a .P file
-## is deleted, the dependency causes make to die (because there is
-## typically no way to rebuild the header). We avoid this by adding
-## dummy dependencies for each header file. Too bad gcc doesn't do
-## this for us directly.
- tr ' ' '
-' < "$tmpdepfile" |
-## Some versions of gcc put a space before the `:'. On the theory
-## that the space means something, we add a space to the output as
-## well.
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-hp)
- # This case exists only to let depend.m4 do its work. It works by
- # looking at the text of this script. This case will never be run,
- # since it is checked for above.
- exit 1
- ;;
-
-sgi)
- if test "$libtool" = yes; then
- "$@" "-Wp,-MDupdate,$tmpdepfile"
- else
- "$@" -MDupdate "$tmpdepfile"
- fi
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
-
- if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
- echo "$object : \\" > "$depfile"
-
- # Clip off the initial element (the dependent). Don't try to be
- # clever and replace this with sed code, as IRIX sed won't handle
- # lines with more than a fixed number of characters (4096 in
- # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
- # the IRIX cc adds comments like `#:fec' to the end of the
- # dependency line.
- tr ' ' '
-' < "$tmpdepfile" \
- | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
- tr '
-' ' ' >> $depfile
- echo >> $depfile
-
- # The second pass generates a dummy entry for each header file.
- tr ' ' '
-' < "$tmpdepfile" \
- | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
- >> $depfile
- else
- # The sourcefile does not contain any dependencies, so just
- # store a dummy comment line, to avoid errors with the Makefile
- # "include basename.Plo" scheme.
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-aix)
- # The C for AIX Compiler uses -M and outputs the dependencies
- # in a .u file. In older versions, this file always lives in the
- # current directory. Also, the AIX compiler puts `$object:' at the
- # start of each line; $object doesn't have directory information.
- # Version 6 uses the directory in both cases.
- stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'`
- tmpdepfile="$stripped.u"
- if test "$libtool" = yes; then
- "$@" -Wc,-M
- else
- "$@" -M
- fi
- stat=$?
-
- if test -f "$tmpdepfile"; then :
- else
- stripped=`echo "$stripped" | sed 's,^.*/,,'`
- tmpdepfile="$stripped.u"
- fi
-
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
-
- if test -f "$tmpdepfile"; then
- outname="$stripped.o"
- # Each line is of the form `foo.o: dependent.h'.
- # Do two passes, one to just change these to
- # `$object: dependent.h' and one to simply `dependent.h:'.
- sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile"
- sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile"
- else
- # The sourcefile does not contain any dependencies, so just
- # store a dummy comment line, to avoid errors with the Makefile
- # "include basename.Plo" scheme.
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-icc)
- # Intel's C compiler understands `-MD -MF file'. However on
- # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
- # ICC 7.0 will fill foo.d with something like
- # foo.o: sub/foo.c
- # foo.o: sub/foo.h
- # which is wrong. We want:
- # sub/foo.o: sub/foo.c
- # sub/foo.o: sub/foo.h
- # sub/foo.c:
- # sub/foo.h:
- # ICC 7.1 will output
- # foo.o: sub/foo.c sub/foo.h
- # and will wrap long lines using \ :
- # foo.o: sub/foo.c ... \
- # sub/foo.h ... \
- # ...
-
- "$@" -MD -MF "$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
- # Each line is of the form `foo.o: dependent.h',
- # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
- # Do two passes, one to just change these to
- # `$object: dependent.h' and one to simply `dependent.h:'.
- sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
- # Some versions of the HPUX 10.20 sed can't process this invocation
- # correctly. Breaking it into two sed invocations is a workaround.
- sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
- sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-tru64)
- # The Tru64 compiler uses -MD to generate dependencies as a side
- # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
- # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
- # dependencies in `foo.d' instead, so we check for that too.
- # Subdirectories are respected.
- dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
- test "x$dir" = "x$object" && dir=
- base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
-
- if test "$libtool" = yes; then
- # Dependencies are output in .lo.d with libtool 1.4.
- # With libtool 1.5 they are output both in $dir.libs/$base.o.d
- # and in $dir.libs/$base.o.d and $dir$base.o.d. We process the
- # latter, because the former will be cleaned when $dir.libs is
- # erased.
- tmpdepfile1="$dir.libs/$base.lo.d"
- tmpdepfile2="$dir$base.o.d"
- tmpdepfile3="$dir.libs/$base.d"
- "$@" -Wc,-MD
- else
- tmpdepfile1="$dir$base.o.d"
- tmpdepfile2="$dir$base.d"
- tmpdepfile3="$dir$base.d"
- "$@" -MD
- fi
-
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
- exit $stat
- fi
-
- if test -f "$tmpdepfile1"; then
- tmpdepfile="$tmpdepfile1"
- elif test -f "$tmpdepfile2"; then
- tmpdepfile="$tmpdepfile2"
- else
- tmpdepfile="$tmpdepfile3"
- fi
- if test -f "$tmpdepfile"; then
- sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
- # That's a tab and a space in the [].
- sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
- else
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-#nosideeffect)
- # This comment above is used by automake to tell side-effect
- # dependency tracking mechanisms from slower ones.
-
-dashmstdout)
- # Important note: in order to support this mode, a compiler *must*
- # always write the preprocessed file to stdout, regardless of -o.
- "$@" || exit $?
-
- # Remove the call to Libtool.
- if test "$libtool" = yes; then
- while test $1 != '--mode=compile'; do
- shift
- done
- shift
- fi
-
- # Remove `-o $object'.
- IFS=" "
- for arg
- do
- case $arg in
- -o)
- shift
- ;;
- $object)
- shift
- ;;
- *)
- set fnord "$@" "$arg"
- shift # fnord
- shift # $arg
- ;;
- esac
- done
-
- test -z "$dashmflag" && dashmflag=-M
- # Require at least two characters before searching for `:'
- # in the target name. This is to cope with DOS-style filenames:
- # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
- "$@" $dashmflag |
- sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
- rm -f "$depfile"
- cat < "$tmpdepfile" > "$depfile"
- tr ' ' '
-' < "$tmpdepfile" | \
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-dashXmstdout)
- # This case only exists to satisfy depend.m4. It is never actually
- # run, as this mode is specially recognized in the preamble.
- exit 1
- ;;
-
-makedepend)
- "$@" || exit $?
- # Remove any Libtool call
- if test "$libtool" = yes; then
- while test $1 != '--mode=compile'; do
- shift
- done
- shift
- fi
- # X makedepend
- shift
- cleared=no
- for arg in "$@"; do
- case $cleared in
- no)
- set ""; shift
- cleared=yes ;;
- esac
- case "$arg" in
- -D*|-I*)
- set fnord "$@" "$arg"; shift ;;
- # Strip any option that makedepend may not understand. Remove
- # the object too, otherwise makedepend will parse it as a source file.
- -*|$object)
- ;;
- *)
- set fnord "$@" "$arg"; shift ;;
- esac
- done
- obj_suffix="`echo $object | sed 's/^.*\././'`"
- touch "$tmpdepfile"
- ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
- rm -f "$depfile"
- cat < "$tmpdepfile" > "$depfile"
- sed '1,2d' "$tmpdepfile" | tr ' ' '
-' | \
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile" "$tmpdepfile".bak
- ;;
-
-cpp)
- # Important note: in order to support this mode, a compiler *must*
- # always write the preprocessed file to stdout.
- "$@" || exit $?
-
- # Remove the call to Libtool.
- if test "$libtool" = yes; then
- while test $1 != '--mode=compile'; do
- shift
- done
- shift
- fi
-
- # Remove `-o $object'.
- IFS=" "
- for arg
- do
- case $arg in
- -o)
- shift
- ;;
- $object)
- shift
- ;;
- *)
- set fnord "$@" "$arg"
- shift # fnord
- shift # $arg
- ;;
- esac
- done
-
- "$@" -E |
- sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
- sed '$ s: \\$::' > "$tmpdepfile"
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- cat < "$tmpdepfile" >> "$depfile"
- sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-msvisualcpp)
- # Important note: in order to support this mode, a compiler *must*
- # always write the preprocessed file to stdout, regardless of -o,
- # because we must use -o when running libtool.
- "$@" || exit $?
- IFS=" "
- for arg
- do
- case "$arg" in
- "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
- set fnord "$@"
- shift
- shift
- ;;
- *)
- set fnord "$@" "$arg"
- shift
- shift
- ;;
- esac
- done
- "$@" -E |
- sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
- echo " " >> "$depfile"
- . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-none)
- exec "$@"
- ;;
-
-*)
- echo "Unknown depmode $depmode" 1>&2
- exit 1
- ;;
-esac
-
-exit 0
-
-# Local Variables:
-# mode: shell-script
-# sh-indentation: 2
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-end: "$"
-# End:
diff --git a/irstlm/install-sh b/irstlm/install-sh
deleted file mode 100755
index dd97db7aa..000000000
--- a/irstlm/install-sh
+++ /dev/null
@@ -1,322 +0,0 @@
-#!/bin/sh
-# install - install a program, script, or datafile
-
-scriptversion=2004-09-10.20
-
-# This originates from X11R5 (mit/util/scripts/install.sh), which was
-# later released in X11R6 (xc/config/util/install.sh) with the
-# following copyright and license.
-#
-# Copyright (C) 1994 X Consortium
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
-# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
-# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# Except as contained in this notice, the name of the X Consortium shall not
-# be used in advertising or otherwise to promote the sale, use or other deal-
-# ings in this Software without prior written authorization from the X Consor-
-# tium.
-#
-#
-# FSF changes to this file are in the public domain.
-#
-# Calling this script install-sh is preferred over install.sh, to prevent
-# `make' implicit rules from creating a file called install from it
-# when there is no Makefile.
-#
-# This script is compatible with the BSD install script, but was written
-# from scratch. It can only install one file at a time, a restriction
-# shared with many OS's install programs.
-
-# set DOITPROG to echo to test this script
-
-# Don't use :- since 4.3BSD and earlier shells don't like it.
-doit="${DOITPROG-}"
-
-# put in absolute paths if you don't have them in your path; or use env. vars.
-
-mvprog="${MVPROG-mv}"
-cpprog="${CPPROG-cp}"
-chmodprog="${CHMODPROG-chmod}"
-chownprog="${CHOWNPROG-chown}"
-chgrpprog="${CHGRPPROG-chgrp}"
-stripprog="${STRIPPROG-strip}"
-rmprog="${RMPROG-rm}"
-mkdirprog="${MKDIRPROG-mkdir}"
-
-chmodcmd="$chmodprog 0755"
-chowncmd=
-chgrpcmd=
-stripcmd=
-rmcmd="$rmprog -f"
-mvcmd="$mvprog"
-src=
-dst=
-dir_arg=
-dstarg=
-no_target_directory=
-
-usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
- or: $0 [OPTION]... SRCFILES... DIRECTORY
- or: $0 [OPTION]... -t DIRECTORY SRCFILES...
- or: $0 [OPTION]... -d DIRECTORIES...
-
-In the 1st form, copy SRCFILE to DSTFILE.
-In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
-In the 4th, create DIRECTORIES.
-
-Options:
--c (ignored)
--d create directories instead of installing files.
--g GROUP $chgrpprog installed files to GROUP.
--m MODE $chmodprog installed files to MODE.
--o USER $chownprog installed files to USER.
--s $stripprog installed files.
--t DIRECTORY install into DIRECTORY.
--T report an error if DSTFILE is a directory.
---help display this help and exit.
---version display version info and exit.
-
-Environment variables override the default commands:
- CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG
-"
-
-while test -n "$1"; do
- case $1 in
- -c) shift
- continue;;
-
- -d) dir_arg=true
- shift
- continue;;
-
- -g) chgrpcmd="$chgrpprog $2"
- shift
- shift
- continue;;
-
- --help) echo "$usage"; exit 0;;
-
- -m) chmodcmd="$chmodprog $2"
- shift
- shift
- continue;;
-
- -o) chowncmd="$chownprog $2"
- shift
- shift
- continue;;
-
- -s) stripcmd=$stripprog
- shift
- continue;;
-
- -t) dstarg=$2
- shift
- shift
- continue;;
-
- -T) no_target_directory=true
- shift
- continue;;
-
- --version) echo "$0 $scriptversion"; exit 0;;
-
- *) # When -d is used, all remaining arguments are directories to create.
- # When -t is used, the destination is already specified.
- test -n "$dir_arg$dstarg" && break
- # Otherwise, the last argument is the destination. Remove it from $@.
- for arg
- do
- if test -n "$dstarg"; then
- # $@ is not empty: it contains at least $arg.
- set fnord "$@" "$dstarg"
- shift # fnord
- fi
- shift # arg
- dstarg=$arg
- done
- break;;
- esac
-done
-
-if test -z "$1"; then
- if test -z "$dir_arg"; then
- echo "$0: no input file specified." >&2
- exit 1
- fi
- # It's OK to call `install-sh -d' without argument.
- # This can happen when creating conditional directories.
- exit 0
-fi
-
-for src
-do
- # Protect names starting with `-'.
- case $src in
- -*) src=./$src ;;
- esac
-
- if test -n "$dir_arg"; then
- dst=$src
- src=
-
- if test -d "$dst"; then
- mkdircmd=:
- chmodcmd=
- else
- mkdircmd=$mkdirprog
- fi
- else
- # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
- # might cause directories to be created, which would be especially bad
- # if $src (and thus $dsttmp) contains '*'.
- if test ! -f "$src" && test ! -d "$src"; then
- echo "$0: $src does not exist." >&2
- exit 1
- fi
-
- if test -z "$dstarg"; then
- echo "$0: no destination specified." >&2
- exit 1
- fi
-
- dst=$dstarg
- # Protect names starting with `-'.
- case $dst in
- -*) dst=./$dst ;;
- esac
-
- # If destination is a directory, append the input filename; won't work
- # if double slashes aren't ignored.
- if test -d "$dst"; then
- if test -n "$no_target_directory"; then
- echo "$0: $dstarg: Is a directory" >&2
- exit 1
- fi
- dst=$dst/`basename "$src"`
- fi
- fi
-
- # This sed command emulates the dirname command.
- dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
-
- # Make sure that the destination directory exists.
-
- # Skip lots of stat calls in the usual case.
- if test ! -d "$dstdir"; then
- defaultIFS='
- '
- IFS="${IFS-$defaultIFS}"
-
- oIFS=$IFS
- # Some sh's can't handle IFS=/ for some reason.
- IFS='%'
- set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
- IFS=$oIFS
-
- pathcomp=
-
- while test $# -ne 0 ; do
- pathcomp=$pathcomp$1
- shift
- if test ! -d "$pathcomp"; then
- $mkdirprog "$pathcomp"
- # mkdir can fail with a `File exist' error in case several
- # install-sh are creating the directory concurrently. This
- # is OK.
- test -d "$pathcomp" || exit
- fi
- pathcomp=$pathcomp/
- done
- fi
-
- if test -n "$dir_arg"; then
- $doit $mkdircmd "$dst" \
- && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \
- && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \
- && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \
- && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; }
-
- else
- dstfile=`basename "$dst"`
-
- # Make a couple of temp file names in the proper directory.
- dsttmp=$dstdir/_inst.$$_
- rmtmp=$dstdir/_rm.$$_
-
- # Trap to clean up those temp files at exit.
- trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
- trap '(exit $?); exit' 1 2 13 15
-
- # Copy the file name to the temp name.
- $doit $cpprog "$src" "$dsttmp" &&
-
- # and set any options; do chmod last to preserve setuid bits.
- #
- # If any of these fail, we abort the whole thing. If we want to
- # ignore errors from any of these, just make sure not to ignore
- # errors from the above "$doit $cpprog $src $dsttmp" command.
- #
- { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \
- && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \
- && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \
- && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } &&
-
- # Now rename the file to the real destination.
- { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \
- || {
- # The rename failed, perhaps because mv can't rename something else
- # to itself, or perhaps because mv is so ancient that it does not
- # support -f.
-
- # Now remove or move aside any old file at destination location.
- # We try this two ways since rm can't unlink itself on some
- # systems and the destination file might be busy for other
- # reasons. In this case, the final cleanup might fail but the new
- # file should still install successfully.
- {
- if test -f "$dstdir/$dstfile"; then
- $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \
- || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \
- || {
- echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
- (exit 1); exit
- }
- else
- :
- fi
- } &&
-
- # Now rename the file to the real destination.
- $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
- }
- }
- fi || { (exit 1); exit; }
-done
-
-# The final little trick to "correctly" pass the exit status to the exit trap.
-{
- (exit 0); exit
-}
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-end: "$"
-# End:
diff --git a/irstlm/irstlm.vcproj b/irstlm/irstlm.vcproj
deleted file mode 100644
index 23599644e..000000000
--- a/irstlm/irstlm.vcproj
+++ /dev/null
@@ -1,255 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
- ProjectType="Visual C++"
- Version="8.00"
- Name="irstlm"
- ProjectGUID="{19C023D8-67DE-4609-9C89-3152EF95995D}"
- RootNamespace="irstlm"
- Keyword="ManagedCProj"
- >
- <Platforms>
- <Platform
- Name="Win32"
- />
- </Platforms>
- <ToolFiles>
- </ToolFiles>
- <Configurations>
- <Configuration
- Name="Debug|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="3"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Release|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- WholeProgramOptimization="1"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="2"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- </Configurations>
- <References>
- <AssemblyReference
- RelativePath="System.dll"
- AssemblyName="System, Version=2.0.0.0, PublicKeyToken=b77a5c561934e089, processorArchitecture=MSIL"
- />
- <AssemblyReference
- RelativePath="System.Data.dll"
- AssemblyName="System.Data, Version=2.0.0.0, PublicKeyToken=b77a5c561934e089, processorArchitecture=x86"
- />
- <AssemblyReference
- RelativePath="System.XML.dll"
- AssemblyName="System.Xml, Version=2.0.0.0, PublicKeyToken=b77a5c561934e089, processorArchitecture=MSIL"
- />
- </References>
- <Files>
- <Filter
- Name="Source Files"
- Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
- UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
- >
- <File
- RelativePath=".\src\cmd.c"
- >
- </File>
- <File
- RelativePath=".\src\compile-lm.cpp"
- >
- <FileConfiguration
- Name="Debug|Win32"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCLCompilerTool"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release|Win32"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCLCompilerTool"
- />
- </FileConfiguration>
- </File>
- <File
- RelativePath=".\src\dictionary.cpp"
- >
- </File>
- <File
- RelativePath=".\src\htable.cpp"
- >
- </File>
- <File
- RelativePath=".\src\lmtable.cpp"
- >
- </File>
- <File
- RelativePath=".\src\mempool.cpp"
- >
- </File>
- <File
- RelativePath=".\src\n_gram.cpp"
- >
- </File>
- <File
- RelativePath=".\src\ngramcache.cpp"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h;hpp;hxx;hm;inl;inc;xsd"
- UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
- >
- <File
- RelativePath=".\src\cmd.h"
- >
- </File>
- <File
- RelativePath=".\src\dictionary.h"
- >
- </File>
- <File
- RelativePath=".\src\htable.h"
- >
- </File>
- <File
- RelativePath=".\src\index.h"
- >
- </File>
- <File
- RelativePath=".\src\lmtable.h"
- >
- </File>
- <File
- RelativePath=".\src\mempool.h"
- >
- </File>
- <File
- RelativePath=".\src\n_gram.h"
- >
- </File>
- <File
- RelativePath=".\src\ngramcache.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
- UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
- >
- </Filter>
- </Files>
- <Globals>
- </Globals>
-</VisualStudioProject>
diff --git a/irstlm/missing b/irstlm/missing
deleted file mode 100755
index 64b5f901d..000000000
--- a/irstlm/missing
+++ /dev/null
@@ -1,353 +0,0 @@
-#! /bin/sh
-# Common stub for a few missing GNU programs while installing.
-
-scriptversion=2004-09-07.08
-
-# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004
-# Free Software Foundation, Inc.
-# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-if test $# -eq 0; then
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
-fi
-
-run=:
-
-# In the cases where this matters, `missing' is being run in the
-# srcdir already.
-if test -f configure.ac; then
- configure_ac=configure.ac
-else
- configure_ac=configure.in
-fi
-
-msg="missing on your system"
-
-case "$1" in
---run)
- # Try to run requested program, and just exit if it succeeds.
- run=
- shift
- "$@" && exit 0
- # Exit code 63 means version mismatch. This often happens
- # when the user try to use an ancient version of a tool on
- # a file that requires a minimum version. In this case we
- # we should proceed has if the program had been absent, or
- # if --run hadn't been passed.
- if test $? = 63; then
- run=:
- msg="probably too old"
- fi
- ;;
-
- -h|--h|--he|--hel|--help)
- echo "\
-$0 [OPTION]... PROGRAM [ARGUMENT]...
-
-Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
-error status if there is no known handling for PROGRAM.
-
-Options:
- -h, --help display this help and exit
- -v, --version output version information and exit
- --run try to run the given command, and emulate it if it fails
-
-Supported PROGRAM values:
- aclocal touch file \`aclocal.m4'
- autoconf touch file \`configure'
- autoheader touch file \`config.h.in'
- automake touch all \`Makefile.in' files
- bison create \`y.tab.[ch]', if possible, from existing .[ch]
- flex create \`lex.yy.c', if possible, from existing .c
- help2man touch the output file
- lex create \`lex.yy.c', if possible, from existing .c
- makeinfo touch the output file
- tar try tar, gnutar, gtar, then tar without non-portable flags
- yacc create \`y.tab.[ch]', if possible, from existing .[ch]
-
-Send bug reports to <bug-automake@gnu.org>."
- exit 0
- ;;
-
- -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
- echo "missing $scriptversion (GNU Automake)"
- exit 0
- ;;
-
- -*)
- echo 1>&2 "$0: Unknown \`$1' option"
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
- ;;
-
-esac
-
-# Now exit if we have it, but it failed. Also exit now if we
-# don't have it and --version was passed (most likely to detect
-# the program).
-case "$1" in
- lex|yacc)
- # Not GNU programs, they don't have --version.
- ;;
-
- tar)
- if test -n "$run"; then
- echo 1>&2 "ERROR: \`tar' requires --run"
- exit 1
- elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
- exit 1
- fi
- ;;
-
- *)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
- # Could not run --version or --help. This is probably someone
- # running `$TOOL --version' or `$TOOL --help' to check whether
- # $TOOL exists and not knowing $TOOL uses missing.
- exit 1
- fi
- ;;
-esac
-
-# If it does not exist, or fails to run (possibly an outdated version),
-# try to emulate it.
-case "$1" in
- aclocal*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`acinclude.m4' or \`${configure_ac}'. You might want
- to install the \`Automake' and \`Perl' packages. Grab them from
- any GNU archive site."
- touch aclocal.m4
- ;;
-
- autoconf)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`${configure_ac}'. You might want to install the
- \`Autoconf' and \`GNU m4' packages. Grab them from any GNU
- archive site."
- touch configure
- ;;
-
- autoheader)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`acconfig.h' or \`${configure_ac}'. You might want
- to install the \`Autoconf' and \`GNU m4' packages. Grab them
- from any GNU archive site."
- files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
- test -z "$files" && files="config.h"
- touch_files=
- for f in $files; do
- case "$f" in
- *:*) touch_files="$touch_files "`echo "$f" |
- sed -e 's/^[^:]*://' -e 's/:.*//'`;;
- *) touch_files="$touch_files $f.in";;
- esac
- done
- touch $touch_files
- ;;
-
- automake*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
- You might want to install the \`Automake' and \`Perl' packages.
- Grab them from any GNU archive site."
- find . -type f -name Makefile.am -print |
- sed 's/\.am$/.in/' |
- while read f; do touch "$f"; done
- ;;
-
- autom4te)
- echo 1>&2 "\
-WARNING: \`$1' is needed, but is $msg.
- You might have modified some files without having the
- proper tools for further handling them.
- You can get \`$1' as part of \`Autoconf' from any GNU
- archive site."
-
- file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'`
- test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'`
- if test -f "$file"; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo "#! /bin/sh"
- echo "# Created by GNU Automake missing as a replacement of"
- echo "# $ $@"
- echo "exit 0"
- chmod +x $file
- exit 1
- fi
- ;;
-
- bison|yacc)
- echo 1>&2 "\
-WARNING: \`$1' $msg. You should only need it if
- you modified a \`.y' file. You may need the \`Bison' package
- in order for those modifications to take effect. You can get
- \`Bison' from any GNU archive site."
- rm -f y.tab.c y.tab.h
- if [ $# -ne 1 ]; then
- eval LASTARG="\${$#}"
- case "$LASTARG" in
- *.y)
- SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
- if [ -f "$SRCFILE" ]; then
- cp "$SRCFILE" y.tab.c
- fi
- SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
- if [ -f "$SRCFILE" ]; then
- cp "$SRCFILE" y.tab.h
- fi
- ;;
- esac
- fi
- if [ ! -f y.tab.h ]; then
- echo >y.tab.h
- fi
- if [ ! -f y.tab.c ]; then
- echo 'main() { return 0; }' >y.tab.c
- fi
- ;;
-
- lex|flex)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a \`.l' file. You may need the \`Flex' package
- in order for those modifications to take effect. You can get
- \`Flex' from any GNU archive site."
- rm -f lex.yy.c
- if [ $# -ne 1 ]; then
- eval LASTARG="\${$#}"
- case "$LASTARG" in
- *.l)
- SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
- if [ -f "$SRCFILE" ]; then
- cp "$SRCFILE" lex.yy.c
- fi
- ;;
- esac
- fi
- if [ ! -f lex.yy.c ]; then
- echo 'main() { return 0; }' >lex.yy.c
- fi
- ;;
-
- help2man)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a dependency of a manual page. You may need the
- \`Help2man' package in order for those modifications to take
- effect. You can get \`Help2man' from any GNU archive site."
-
- file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
- if test -z "$file"; then
- file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'`
- fi
- if [ -f "$file" ]; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo ".ab help2man is required to generate this page"
- exit 1
- fi
- ;;
-
- makeinfo)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a \`.texi' or \`.texinfo' file, or any other file
- indirectly affecting the aspect of the manual. The spurious
- call might also be the consequence of using a buggy \`make' (AIX,
- DU, IRIX). You might want to install the \`Texinfo' package or
- the \`GNU make' package. Grab either from any GNU archive site."
- file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
- if test -z "$file"; then
- file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
- file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file`
- fi
- touch $file
- ;;
-
- tar)
- shift
-
- # We have already tried tar in the generic part.
- # Look for gnutar/gtar before invocation to avoid ugly error
- # messages.
- if (gnutar --version > /dev/null 2>&1); then
- gnutar "$@" && exit 0
- fi
- if (gtar --version > /dev/null 2>&1); then
- gtar "$@" && exit 0
- fi
- firstarg="$1"
- if shift; then
- case "$firstarg" in
- *o*)
- firstarg=`echo "$firstarg" | sed s/o//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- case "$firstarg" in
- *h*)
- firstarg=`echo "$firstarg" | sed s/h//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- fi
-
- echo 1>&2 "\
-WARNING: I can't seem to be able to run \`tar' with the given arguments.
- You may want to install GNU tar or Free paxutils, or check the
- command line arguments."
- exit 1
- ;;
-
- *)
- echo 1>&2 "\
-WARNING: \`$1' is needed, and is $msg.
- You might have modified some files without having the
- proper tools for further handling them. Check the \`README' file,
- it often tells you about the needed prerequisites for installing
- this package. You may also peek at any GNU archive site, in case
- some other package would contain this missing \`$1' program."
- exit 1
- ;;
-esac
-
-exit 0
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-end: "$"
-# End:
diff --git a/irstlm/src/Makefile.am b/irstlm/src/Makefile.am
deleted file mode 100644
index 0cc7935f9..000000000
--- a/irstlm/src/Makefile.am
+++ /dev/null
@@ -1,20 +0,0 @@
-lib_LIBRARIES = libirstlm.a
-
-libirstlm_a_SOURCES = \
- dictionary.cpp \
- htable.cpp \
- lmtable.cpp \
- mempool.cpp \
- ngram.cpp
-
-library_includedir=$(includedir)
-library_include_HEADERS = dictionary.h lmtable.h ngram.h
-
-bin_PROGRAMS = compile-lm quantize-lm
-
-AM_LDFLAGS=-L .
-LIBS=-lirstlm
-
-compile_lm_SOURCES = compile-lm.cpp
-quantize_lm_SOURCES = quantize-lm.cpp
-
diff --git a/irstlm/src/cmd.c b/irstlm/src/cmd.c
deleted file mode 100644
index aeb36d7b9..000000000
--- a/irstlm/src/cmd.c
+++ /dev/null
@@ -1,661 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-
-#include "cmd.h"
-
-static Enum_T BoolEnum[] = {
- { "FALSE", 0 },
- { "TRUE", 1 },
- { 0, 0 }
-};
-
-#ifdef NEEDSTRDUP
-char *strdup();
-#endif
-
-#define FALSE 0
-#define TRUE 1
-
-#define LINSIZ 10240
-#define MAXPARAM 256
-
-static char *GetLine(),
- **str2array();
-static int Scan(),
- SetParam(),
- SetEnum(),
- SetSubrange(),
- SetStrArray(),
- SetGte(),
- SetLte(),
- CmdError(),
- EnumError(),
- SubrangeError(),
- GteError(),
- LteError(),
- PrintParam(),
- PrintEnum(),
- PrintStrArray();
-
-static Cmd_T cmds[MAXPARAM+1];
-static char *SepString = " \t\n";
-
-#if defined(__STDC__)
-#include <stdarg.h>
-int DeclareParams(char *ParName, ...)
-#else
-#include <varargs.h>
-int DeclareParams(ParName, va_alist)
-char *ParName;
-va_dcl
-#endif
-{
- va_list args;
- static int ParamN = 0;
- int j,
- c;
- char *s;
-
-#if defined(__STDC__)
- va_start(args, ParName);
-#else
- va_start(args);
-#endif
- for(;ParName;) {
- if(ParamN==MAXPARAM) {
- fprintf(stderr, "Too many parameters !!\n");
- break;
- }
- for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
- ;
- if(!c) {
- fprintf(stderr,
- "Warning: parameter \"%s\" declared twice.\n",
- ParName);
- }
- for(c=ParamN; c>j; c--) {
- cmds[c] = cmds[c-1];
- }
- cmds[j].Name = ParName;
- cmds[j].Type = va_arg(args, int);
- cmds[j].Val = va_arg(args, void *);
- switch(cmds[j].Type) {
- case CMDENUMTYPE: /* get the pointer to Enum_T struct */
- cmds[j].p = va_arg(args, void *);
- break;
- case CMDSUBRANGETYPE: /* get the two extremes */
- cmds[j].p = (void*) calloc(2, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- ((int*)cmds[j].p)[1] = va_arg(args, int);
- break;
- case CMDGTETYPE: /* get lower or upper bound */
- case CMDLTETYPE:
- cmds[j].p = (void*) calloc(1, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- break;
- case CMDSTRARRAYTYPE: /* get the separators string */
- cmds[j].p = (s=va_arg(args, char*))
- ? (void*)strdup(s) : 0;
- break;
- case CMDBOOLTYPE:
- cmds[j].Type = CMDENUMTYPE;
- cmds[j].p = BoolEnum;
- break;
- case CMDDOUBLETYPE: /* nothing else is needed */
- case CMDINTTYPE:
- case CMDSTRINGTYPE:
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "DeclareParam()", "Unknown Type",
- cmds[j].Type, "for parameter", cmds[j].Name);
- exit(1);
- }
- ParamN++;
- ParName = va_arg(args, char *);
- }
- cmds[ParamN].Name = NULL;
- va_end(args);
- return 0;
-}
-
-int GetParams(n, a, CmdFileName)
-int *n;
-char ***a;
-char *CmdFileName;
-{
- char *Line,
- *ProgName;
- int argc = *n;
- char **argv = *a,
- *s;
- FILE *fp;
- int IsPipe;
-
-#ifdef MSDOS
-#define PATHSEP '\\'
- char *dot = NULL;
-#else
-#define PATHSEP '/'
-#endif
-
- if(!(Line=malloc(LINSIZ))) {
- fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
- LINSIZ);
- exit(1);
- }
- if((ProgName=strrchr(*argv, PATHSEP))) {
- ++ProgName;
- } else {
- ProgName = *argv;
- }
-#ifdef MSDOS
- if(dot=strchr(ProgName, '.')) *dot = 0;
-#endif
- --argc;
- ++argv;
- for(;;) {
- if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
- CmdFileName = argv[0]+2;
- ++argv;
- --argc;
- }
- if(!CmdFileName) {
- break;
- }
- IsPipe = !strncmp(CmdFileName, "@@", 2);
- fp = IsPipe
- ? popen(CmdFileName+2, "r")
- : strcmp(CmdFileName, "-")
- ? fopen(CmdFileName, "r")
- : stdin;
- if(!fp) {
- fprintf(stderr, "Unable to open command file %s\n",
- CmdFileName);
- exit(1);
- }
- while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
- if(Scan(ProgName, cmds, Line)) {
- CmdError(Line);
- }
- }
- if(fp!=stdin) {
- if(IsPipe) pclose(fp); else fclose(fp);
- }
- CmdFileName = NULL;
- }
- while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
- *s = ' ';
- sprintf(Line, "%s/%s", ProgName, *argv+1);
- *s = '=';
- if(Scan(ProgName, cmds, Line)) CmdError(*argv);
- --argc;
- ++argv;
- }
- *n = argc;
- *a = argv;
-#ifdef MSDOS
- if(dot) *dot = '.';
-#endif
- free(Line);
- return 0;
-}
-
-int PrintParams(ValFlag, fp)
-int ValFlag;
-FILE *fp;
-{
- int i;
-
- fflush(fp);
- if(ValFlag) {
- fprintf(fp, "Parameters Values:\n");
- } else {
- fprintf(fp, "Parameters:\n");
- }
- for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
- fprintf(fp, "\n");
- fflush(fp);
- return 0;
-}
-
-int SPrintParams(a, pfx)
-char ***a,
- *pfx;
-{
- int l,
- n;
- Cmd_T *cmd;
-
- if(!pfx) pfx="";
- l = strlen(pfx);
- for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
- a[0] = calloc(n, sizeof(char*));
- for(n=0, cmd=cmds; cmd->Name; cmd++) {
- if(!cmd->ArgStr) continue;
- a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
- sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
- ++n;
- }
- return n;
-}
-
-static int CmdError(opt)
-char *opt;
-{
- fprintf(stderr, "Invalid option \"%s\"\n", opt);
- fprintf(stderr, "This program expectes the following parameters:\n");
- PrintParams(FALSE, stderr);
- exit(0);
-}
-
-static int PrintParam(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- fprintf(fp, "%4s", "");
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDENUMTYPE:
- PrintEnum(cmd, ValFlag, fp);
- break;
- case CMDINTTYPE:
- case CMDSUBRANGETYPE:
- case CMDGTETYPE:
- case CMDLTETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDSTRINGTYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- if(*(char **)cmd->Val) {
- fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
- } else {
- fprintf(fp, ": %s", "NULL");
- }
- }
- fprintf(fp, "\n");
- break;
- case CMDSTRARRAYTYPE:
- PrintStrArray(cmd, ValFlag, fp);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "PrintParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- return 0;
-}
-
-static char *GetLine(fp, n, Line)
-FILE *fp;
-int n;
-char *Line;
-{
- int j,
- l,
- offs=0;
-
- for(;;) {
- if(!fgets(Line+offs, n-offs, fp)) {
- return NULL;
- }
- if(Line[offs]=='#') continue;
- l = strlen(Line+offs)-1;
- Line[offs+l] = 0;
- for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
- ;
- if(l<1) continue;
- if(j > offs) {
- char *s = Line+offs,
- *q = Line+j;
-
- while((*s++=*q++))
- ;
- }
- if(Line[offs+l-1]=='\\') {
- offs += l;
- Line[offs-1] = ' ';
- } else {
- break;
- }
- }
- return Line;
-}
-
-static int Scan(ProgName, cmds, Line)
-char *ProgName,
- *Line;
-Cmd_T *cmds;
-{
- char *q,
- *p;
- int i,
- hl,
- HasToMatch = FALSE,
- c0,
- c;
-
- p = Line+strspn(Line, SepString);
- if(!(hl=strcspn(p, SepString))) {
- return 0;
- }
- if((q=strchr(p, '/')) && q-p<hl) {
- *q = 0;
- if(strcmp(p, ProgName)) {
- *q = '/';
- return 0;
- }
- *q = '/';
- HasToMatch=TRUE;
- p = q+1;
- }
- if(!(hl = strcspn(p, SepString))) {
- return 0;
- }
- c0 = p[hl];
- p[hl] = 0;
- for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
- ;
- p[hl] = c0;
- if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
- return HasToMatch && c;
-}
-
-static int SetParam(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- if(!*s && cmd->Type != CMDSTRINGTYPE) {
- fprintf(stderr,
- "WARNING: No value specified for parameter \"%s\"\n",
- cmd->Name);
- return 0;
- }
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
- fprintf(stderr,
- "Float value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDENUMTYPE:
- SetEnum(cmd, s);
- break;
- case CMDINTTYPE:
- if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDSTRINGTYPE:
- *(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
- ? strdup(s)
- : 0;
- break;
- case CMDSTRARRAYTYPE:
- SetStrArray(cmd, s);
- break;
- case CMDGTETYPE:
- SetGte(cmd, s);
- break;
- case CMDLTETYPE:
- SetLte(cmd, s);
- break;
- case CMDSUBRANGETYPE:
- SetSubrange(cmd, s);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "SetParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- cmd->ArgStr = strdup(s);
- return 0;
-}
-
-static int SetEnum(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && !strcmp(s, en->Name)) {
- *(int *) cmd->Val = en->Idx;
- return 0;
- }
- }
- return EnumError(cmd, s);
-}
-
-static int SetSubrange(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
- return SubrangeError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetGte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n<*(int *)cmd->p) {
- return GteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetStrArray(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- *(char***)cmd->Val = str2array(s, (char*)cmd->p);
- return 0;
-}
-
-static int SetLte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n > *(int *)cmd->p) {
- return LteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int EnumError(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- fprintf(stderr,
- "Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
- fprintf(stderr, "Valid values are:\n");
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name) {
- fprintf(stderr, " %s\n", en->Name);
- }
- }
- fprintf(stderr, "\n");
- exit(1);
-}
-
-static int GteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be greater than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int LteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be less than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int SubrangeError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values range from %d to %d\n",
- *(int *)cmd->p, *((int *)cmd->p+1));
- exit(1);
-}
-
-static int PrintEnum(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- Enum_T *en;
-
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && en->Idx==*(int *)cmd->Val) {
- fprintf(fp, ": %s", en->Name);
- }
- }
- }
- fprintf(fp, "\n");
- return 0;
-}
-
-static int PrintStrArray(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- char *indent,
- **s = *(char***)cmd->Val;
- int l = 4+strlen(cmd->Name);
-
- fprintf(fp, "%s", cmd->Name);
- indent = malloc(l+2);
- memset(indent, ' ', l+1);
- indent[l+1] = 0;
- if(ValFlag) {
- fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
- if(s) while(*s) {
- fprintf(fp, "\n%s %s", indent, *s++);
- }
- }
- free(indent);
- fprintf(fp, "\n");
- return 0;
-}
-
-static char **str2array(s, sep)
-char *s,
- *sep;
-{
- char *p,
- **a;
- int n = 0,
- l;
-
- if(!sep) sep = SepString;
- p = s += strspn(s, sep);
- while(*p) {
- p += strcspn(p, sep);
- p += strspn(p, sep);
- ++n;
- }
- a = calloc(n+1, sizeof(char *));
- p = s;
- n = 0;
- while(*p) {
- l = strcspn(p, sep);
- a[n] = malloc(l+1);
- memcpy(a[n], p, l);
- a[n][l] = 0;
- ++n;
- p += l;
- p += strspn(p, sep);
- }
- return a;
-}
diff --git a/irstlm/src/cmd.h b/irstlm/src/cmd.h
deleted file mode 100644
index 708905f6f..000000000
--- a/irstlm/src/cmd.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#if !defined(CMD_H)
-
-#define CMD_H
-
-#define CMDDOUBLETYPE 1
-#define CMDENUMTYPE 2
-#define CMDINTTYPE 3
-#define CMDSTRINGTYPE 4
-#define CMDSUBRANGETYPE 5
-#define CMDGTETYPE 6
-#define CMDLTETYPE 7
-#define CMDSTRARRAYTYPE 8
-#define CMDBOOLTYPE 9
-
-typedef struct {
- char *Name;
- int Idx;
-} Enum_T;
-
-typedef struct {
- int Type;
- char *Name,
- *ArgStr;
- void *Val,
- *p;
-} Cmd_T;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__STDC__)
-int DeclareParams(char *, ...);
-#else
-int DeclareParams();
-#endif
-
-int GetParams(int *n, char ***a,char *CmdFileName),
- SPrintParams(),
- PrintParams();
-
-#ifdef __cplusplus
-}
-#endif
-#endif
-
-
-
diff --git a/irstlm/src/compile-lm.cpp b/irstlm/src/compile-lm.cpp
deleted file mode 100644
index ca9e412c4..000000000
--- a/irstlm/src/compile-lm.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit, compile LM
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-using namespace std;
-
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <string>
-#include <stdlib.h>
-
-#include "math.h"
-#include "lmtable.h"
-
-
-/* GLOBAL OPTIONS ***************/
-
-std::string stxt = "no";
-std::string seval = "";
-std::string sdebug = "0";
-
-/********************************/
-
-void usage(const char *msg = 0) {
- if (msg) { std::cerr << msg << std::endl; }
- std::cerr << "Usage: compile-lm [options] input-file.lm [output-file.blm]" << std::endl;
- if (!msg) std::cerr << std::endl
- << " compile-lm reads a standard LM file in ARPA format and produces" << std::endl
- << " a compiled representation that the IRST LM toolkit can quickly" << std::endl
- << " read and process." << std::endl << std::endl;
- std::cerr << "Options:\n"
- << "--text=[yes|no] -t=[yes|no] (output is again in text format)\n"
- << "--eval=text-file -e=text-file (computes perplexity of text-file and returns)\n"
- << "--debug=1 -d=1 (verbose output for --eval option)\n";
-}
-
-bool starts_with(const std::string &s, const std::string &pre) {
- if (pre.size() > s.size()) return false;
-
- if (pre == s) return true;
- std::string pre_equals(pre+'=');
- if (pre_equals.size() > s.size()) return false;
- return (s.substr(0,pre_equals.size()) == pre_equals);
-}
-
-std::string get_param(const std::string& opt, int argc, const char **argv, int& argi)
-{
- std::string::size_type equals = opt.find_first_of('=');
- if (equals != std::string::npos && equals < opt.size()-1) {
- return opt.substr(equals+1);
- }
- std::string nexto;
- if (argi + 1 < argc) {
- nexto = argv[++argi];
- } else {
- usage((opt + " requires a value!").c_str());
- exit(1);
- }
- return nexto;
-}
-
-void handle_option(const std::string& opt, int argc, const char **argv, int& argi)
-{
- if (opt == "--help" || opt == "-h") { usage(); exit(1); }
- if (starts_with(opt, "--text") || starts_with(opt, "-t"))
- stxt = get_param(opt, argc, argv, argi);
- else
- if (starts_with(opt, "--eval") || starts_with(opt, "-e"))
- seval = get_param(opt, argc, argv, argi);
- else
- if (starts_with(opt, "--debug") || starts_with(opt, "-d"))
- sdebug = get_param(opt, argc, argv, argi);
-
- else {
- usage(("Don't understand option " + opt).c_str());
- exit(1);
- }
-}
-
-int main(int argc, const char **argv)
-{
- cout << "hello\n";
-
- if (argc < 2) { usage(); exit(1); }
- std::vector<std::string> files;
- for (int i=1; i < argc; i++) {
- std::string opt = argv[i];
- if (opt[0] == '-') { handle_option(opt, argc, argv, i); }
- else files.push_back(opt);
- }
- if (files.size() > 2) { usage("Too many arguments"); exit(1); }
- if (files.size() < 1) { usage("Please specify a LM file to read from"); exit(1); }
-
- bool textoutput = (stxt == "yes"? true : false);
- int debug = atoi(sdebug.c_str());
-
- std::string infile = files[0];
- if (files.size() == 1) {
- std::string::size_type p = infile.rfind('/');
- if (p != std::string::npos && ((p+1) < infile.size())) {
- files.push_back(infile.substr(p+1) + (textoutput?".lm":".blm"));
- } else {
- files.push_back(infile + (textoutput?".lm":".blm"));
- }
- }
-
- std::string outfile = files[1];
- std::cout << "Reading " << infile << "..." << std::endl;
-
- std::fstream inp(infile.c_str());
- if (!inp.good()) {
- std::cerr << "Failed to open " << infile << "!\n";
- exit(1);
- }
- lmtable lmt;
- lmt.load(inp);
-
-
- if (seval != ""){
- ngram ng(lmt.dict);
- std::cout.setf(ios::fixed);
- std::cout.precision(2);
- if (debug>1) std::cout.precision(8);
- std::fstream inptxt(seval.c_str(),std::ios::in);
-
- int Nbo=0,Nw=0,Noov=0;
- double logPr=0,PP=0,PPwp=0,Pr;
-
- int bos=ng.dict->encode(ng.dict->BoS());
-
-#ifdef TRACE_CACHE
- lmt.init_probcache();
-#endif
-
- while(inptxt >> ng){
-
- if (ng.size>lmt.maxlevel()) ng.size=lmt.maxlevel();
-
- // reset ngram at begin of sentence
- if (*ng.wordp(1)==bos) continue;
-
- lmt.bo_state(0);
- if (ng.size>=1){
- logPr+=(Pr=lmt.clprob(ng));
- if (debug>1)
- std::cout << ng << "[" << ng.size << "-gram]" << " " << Pr << "\n";
-
- if (*ng.wordp(1) == lmt.dict->oovcode()) Noov++;
- Nw++; if (lmt.bo_state()) Nbo++;
- }
-
- }
-
- PP=exp((-logPr * log(10.0)) /Nw);
- PPwp= PP * exp(Noov * log(10000000.0-lmt.dict->size())/Nw);
-
- std::cout << "%% Nw=" << Nw << " PP=" << PP << " PPwp=" << PPwp
- << " Nbo=" << Nbo << " Noov=" << Noov
- << " OOV=" << (float)Noov/Nw * 100.0 << "%\n";
-
- return 0;
- };
-
- std::cout << "Saving to " << outfile << std::endl;
- if (textoutput)
- lmt.savetxt(outfile.c_str());
- else
- lmt.savebin(outfile.c_str());
-
- return 0;
-}
-
diff --git a/irstlm/src/dictionary.cpp b/irstlm/src/dictionary.cpp
deleted file mode 100644
index ab53116a2..000000000
--- a/irstlm/src/dictionary.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <iomanip>
-#include <iostream>
-#include <fstream>
-#include "mempool.h"
-#include "htable.h"
-#include "dictionary.h"
-#include "index.h"
-
-using namespace std;
-
-dictionary::dictionary(char *filename,int size,char* isymb,char* oovlexfile){
-
- // unitialized memory
- if (oovlexfile!=NULL)
- oovlex=new dictionary(oovlexfile,size,isymb,NULL);
- else
- oovlex=(dictionary *)NULL;
-
- htb = new htable(size/LOAD_FACTOR);
- tb = new dict_entry[size];
- st = new strstack(size * 10);
-
- for (int i=0;i<size;i++) tb[i].freq=0;
-
- is=(char*) NULL;
- intsymb(isymb);
-
- oov_code = -1;
- in_oov_lex=0;
- n = 0;
- N = 0;
- dubv = 0;
- lim = size;
- ifl=0; //increment flag
-
- if (filename==NULL) return;
-
- std::ifstream inp(filename,ios::in);
-
- if (!inp){
- cerr << "cannot open " << filename << "\n";
- exit(1);
- }
-
- char buffer[100];
-
- inp >> setw(100) >> buffer;
-
- inp.close();
-
- if ((strncmp(buffer,"dict",4)==0) ||
- (strncmp(buffer,"DICT",4)==0))
- load(filename);
- else
- generate(filename);
-
- cerr << "loaded \n";
-
-
-}
-
-
-
-void dictionary::generate(char *filename){
-
- char buffer[MAX_WORD];
- int k;
-
- ifstream inp(filename,ios::in);
-
- if (!inp){
- cerr << "cannot open " << filename << "\n";
- exit(1);
- }
-
- cerr << "dict:";
-
- ifl=1; k=0;
- while (inp >> setw(MAX_WORD) >> buffer){
-
- if (strlen(buffer)==(MAX_WORD-1)){
- cerr << "dictionary: a too long word was read ("
- << buffer << ")\n";
- };
-
-
- if (strlen(buffer)==0){
- cerr << "zero lenght word!\n";
- continue;
- }
-
- //if (is && (strlen(buffer)==1) && !index(is,buffer[0]))
- if (is && (strlen(buffer)==1) && (index(is,buffer[0])!=NULL))
- continue; //skip over the interruption symbol
-
- incfreq(encode(buffer),1);
-
- if (!(++k % 1000000)) cerr << ".";
- }
- ifl=0;
- cerr << "\n";
-
- inp.close();
-
-}
-
-void dictionary::load(char* filename){
- char header[100];
- char buffer[MAX_WORD];
- char *addr;
- int freqflag=0;
-
- ifstream inp(filename,ios::in);
-
- if (!inp){
- cerr << "\ncannot open " << filename << "\n";
- exit(1);
- }
-
- cerr << "dict:";
-
- inp.getline(header,100);
- if (strncmp(header,"DICT",4)==0)
- freqflag=1;
- else
- if (strncmp(header,"dict",4)!=0){
- cerr << "\ndictionary file " << filename << " has a wrong header\n";
- exit(1);
- }
-
-
- while (inp >> setw(MAX_WORD) >> buffer){
-
- if (strlen(buffer)==(MAX_WORD-1)){
- cerr << "\ndictionary: a too long word was read ("
- << buffer << ")\n";
- };
-
- tb[n].word=st->push(buffer);
- tb[n].code=n;
-
- if (freqflag)
- inp >> tb[n].freq;
- else
- tb[n].freq=0;
-
- if ((addr=htb->search((char *)&tb[n].word,HT_ENTER)))
- if (addr!=(char *)&tb[n].word){
- cerr << "dictionary::loadtxt wrong entry was found ("
- << buffer << ") in position " << n << "\n";
- exit(1);
- }
-
- N+=tb[n].freq;
-
- if (strcmp(buffer,OOV())==0) oov_code=n;
-
- if (++n==lim) grow();
-
- }
-
- inp.close();
-}
-
-
-void dictionary::load(std::istream& inp){
-
- char buffer[MAX_WORD];
- char *addr;
- int size;
-
- inp >> size;
-
- for (int i=0;i<size;i++){
-
- inp >> buffer;
-
- tb[n].word=st->push(buffer);
- tb[n].code=n;
- inp >> tb[n].freq;
- N+=tb[n].freq;
-
- if ((addr=htb->search((char *)&tb[n].word,HT_ENTER)))
- if (addr!=(char *)&tb[n].word){
- cerr << "dictionary::loadtxt wrong entry was found ("
- << buffer << ") in position " << n << "\n";
- exit(1);
- }
-
- if (strcmp(tb[n].word,OOV())==0)
- oov_code=n;
-
- if (++n==lim) grow();
- }
- inp.getline(buffer,MAX_WORD-1);
-}
-
-void dictionary::save(std::ostream& out){
- out << n << "\n";
- for (int i=0;i<n;i++)
- out << tb[i].word << " " << tb[i].freq << "\n";
-}
-
-
-int cmpdictentry(const void *a,const void *b){
- dict_entry *ae=(dict_entry *)a;
- dict_entry *be=(dict_entry *)b;
- return be->freq-ae->freq;
-}
-
-dictionary::dictionary(dictionary* d){
-
- //transfer values
-
- n=d->n; //total entries
- N=d->N; //total frequency
- lim=d->lim; //limit of entries
- oov_code=-1; //code od oov must be re-defined
- ifl=0; //increment flag=0;
- dubv=d->dubv; //dictionary upperbound transferred
- in_oov_lex=0; //does not copy oovlex;
-
-
- //creates a sorted copy of the table
-
- tb = new dict_entry[lim];
- htb = new htable(lim/LOAD_FACTOR);
- st = new strstack(lim * 10);
-
- for (int i=0;i<n;i++){
- tb[i].code=d->tb[i].code;
- tb[i].freq=d->tb[i].freq;
- tb[i].word=st->push(d->tb[i].word);
- }
-
- //sort all entries according to frequency
- cerr << "sorting dictionary ...";
- qsort(tb,n,sizeof(dict_entry),cmpdictentry);
- cerr << "done\n";
-
- for (int i=0;i<n;i++){
-
- //eventually re-assign oov code
- if (d->oov_code==tb[i].code) oov_code=i;
-
- tb[i].code=i;
- htb->search((char *)&tb[i].word,HT_ENTER);
- };
-
-}
-
-
-
-dictionary::~dictionary(){
- delete htb;
- delete st;
- delete [] tb;
-}
-
-void dictionary::stat(){
- cout << "dictionary class statistics\n";
- cout << "size " << n
- << " used memory "
- << (lim * sizeof(int) +
- htb->used() +
- st->used())/1024 << " Kb\n";
-}
-
-void dictionary::grow(){
-
- delete htb;
-
- cerr << "+\b";
-
- dict_entry *tb2=new dict_entry[lim+GROWTH_STEP];
-
- memcpy(tb2,tb,sizeof(dict_entry) * lim );
-
- delete [] tb; tb=tb2;
-
- htb=new htable((lim+GROWTH_STEP)/LOAD_FACTOR);
-
- for (int i=0;i<lim;i++)
-
- htb->search((char *)&tb[i].word,HT_ENTER);
-
- for (int i=lim;i<lim+GROWTH_STEP;i++) tb[i].freq=0;
-
- lim+=GROWTH_STEP;
-
-
-}
-
-void dictionary::save(char *filename,int freqflag){
-
- std::ofstream out(filename,ios::out);
-
- if (!out){
- cerr << "cannot open " << filename << "\n";
- }
-
- // header
- if (freqflag)
- out << "DICTIONARY 0 " << n << "\n";
- else
- out << "dictionary 0 " << n << "\n";
-
- for (int i=0;i<n;i++){
- out << tb[i].word;
- if (freqflag)
- out << " " << tb[i].freq;
- out << "\n";
- }
-
- out.close();
-}
-
-
-int dictionary::getcode(const char *w){
- dict_entry* ptr=(dict_entry *)htb->search((char *)&w,HT_FIND);
- if (ptr==NULL) return -1;
- return ptr->code;
-}
-
-int dictionary::encode(const char *w){
-
- //case of strange characters
- if (strlen(w)==0){cerr << "0";w=OOV();}
-
- dict_entry* ptr;
-
- if ((ptr=(dict_entry *)htb->search((char *)&w,HT_FIND))!=NULL)
- return ptr->code;
- else{
- if (!ifl){ //do not extend dictionary
- if (oov_code==-1){ //did not use OOV yet
- cerr << "starting to use OOV words [" << w << "]\n";
- tb[n].word=st->push(OOV());
- htb->search((char *)&tb[n].word,HT_ENTER);
- tb[n].code=n;
- tb[n].freq=0;
- oov_code=n;
- if (++n==lim) grow();
- }
- //if there is an oov lexicon, check if this word belongs to
- dict_entry* oovptr;
- if (oovlex){
- if ((oovptr=(dict_entry *)oovlex->htb->search((char *)&w,HT_FIND))!=NULL){
- in_oov_lex=1;
- oov_lex_code=oovptr->code;
- }else
- in_oov_lex=0;
- }
- return encode(OOV());
- }
- else{ //extend dictionary
- tb[n].word=st->push((char *)w);
- htb->search((char *)&tb[n].word,HT_ENTER);
- tb[n].code=n;
- tb[n].freq=0;
- if (++n==lim) grow();
- return n-1;
- }
- }
-}
-
-
-char *dictionary::decode(int c){
- if (c>=0 && c < n)
- return tb[c].word;
- else{
- cerr << "decode: code out of boundary\n";
- return OOV();
- }
-}
-
-
-dictionary_iter::dictionary_iter(dictionary *dict) : m_dict(dict) {
- m_dict->htb->scan(HT_INIT);
-}
-
-dict_entry* dictionary_iter::next() {
- return (dict_entry*)m_dict->htb->scan(HT_CONT);
-}
-
-
-
-
-
-/*
-main(int argc,char **argv){
- dictionary d(argv[1],40000);
- d.stat();
- cout << "ROMA" << d.decode(0) << "\n";
- cout << "ROMA:" << d.encode("ROMA") << "\n";
- d.save(argv[2]);
-}
-*/
diff --git a/irstlm/src/dictionary.h b/irstlm/src/dictionary.h
deleted file mode 100644
index 2e15f22bb..000000000
--- a/irstlm/src/dictionary.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#ifndef MF_DICTIONARY_H
-#define MF_DICTIONARY_H
-
-#include <cstring>
-#include <iostream>
-
-#define MAX_WORD 100
-#define LOAD_FACTOR 5
-
-#ifndef GROWTH_STEP
-#define GROWTH_STEP 100000
-#endif
-
-#ifndef DICT_INITSIZE
-#define DICT_INITSIZE 100000
-#endif
-
-//Begin of sentence symbol
-#ifndef BOS_
-#define BOS_ "<s>"
-#endif
-
-
-//End of sentence symbol
-#ifndef EOS_
-#define EOS_ "</s>"
-#endif
-
-//Out-Of-Vocabulary symbol
-#ifndef OOV_
-#define OOV_ "_unk_"
-#endif
-
-typedef struct{
- char *word;
- int code;
- int freq;
-}dict_entry;
-
-class strstack;
-class htable;
-
-class dictionary{
- strstack *st; //!< stack of strings
- dict_entry *tb; //!< entry table
- htable *htb; //!< hash table
- int n; //!< number of entries
- int N; //!< total frequency
- int lim; //!< limit of entries
- int oov_code; //!< code assigned to oov words
- char* is; //!< interruption symbol list
- char ifl; //!< increment flag
- int dubv; //!< dictionary size upper bound
- int in_oov_lex; //!< flag
- int oov_lex_code; //!< dictionary
- char* oov_str; //!< oov string
-
- public:
-
- friend class dictionary_iter;
-
- dictionary* oovlex; //<! additional dictionary
-
- inline int dub(){return dubv;}
- inline int dub(int value){return (dubv=value);}
-
- inline char *OOV(){return (OOV_);}
- inline char *BoS(){return (BOS_);}
- inline char *EoS(){return (EOS_);}
-
- inline int oovcode(int v=-1){return oov_code=(v>=0?v:oov_code);}
-
- inline char *intsymb(char* isymb=NULL){
- if (isymb==NULL) return is;
- if (is!=NULL) delete [] is;
- is=new char[strlen(isymb+1)];
- strcpy(is,isymb);
- return is=isymb;
- }
-
- inline int incflag(){return ifl;}
- inline int incflag(int v){return ifl=v;}
- inline int oovlexsize(){return oovlex?oovlex->n:0;}
- inline int inoovlex(){return in_oov_lex;}
- inline int oovlexcode(){return oov_lex_code;}
-
-
- int isprintable(char* w){
- char buffer[MAX_WORD];
- sprintf(buffer,"%s",w);
- return strcmp(w,buffer)==0;
- }
-
- inline void genoovcode(){
- int c=encode(OOV());
- std::cerr << "OOV code is "<< c << std::endl;
- oovcode(c);
- }
-
- inline dictionary* oovlexp(char *fname=NULL){
- if (fname==NULL) return oovlex;
- if (oovlex!=NULL) delete oovlex;
- oovlex=new dictionary(fname,DICT_INITSIZE);
- return oovlex;
- }
-
- inline int setoovrate(double oovrate){
- encode(OOV()); //be sure OOV code exists
- int oovfreq=(int)(oovrate * totfreq());
- std::cerr << "setting OOV rate to: " << oovrate << " -- freq= " << oovfreq << std::endl;
- return freq(oovcode(),oovfreq);
- }
-
-
- inline int incfreq(int code,int value){N+=value;return tb[code].freq+=value;}
-
- inline int multfreq(int code,double value){
- N+=(int)(value * tb[code].freq)-tb[code].freq;
- return tb[code].freq=(int)(value * tb[code].freq);
- }
-
- inline int freq(int code,int value=-1){
- if (value>=0){
- N+=value-tb[code].freq;
- tb[code].freq=value;
- }
- return tb[code].freq;
- }
-
- inline int totfreq(){return N;}
-
- void grow();
- //dictionary(int size=400,char* isym=NULL,char* oovlex=NULL);
- dictionary(char *filename=NULL,int size=DICT_INITSIZE,char* isymb=NULL,char* oovlex=NULL);
- dictionary(dictionary* d);
-
- ~dictionary();
- void generate(char *filename);
- void load(char *filename);
- void save(char *filename,int freqflag=0);
- void load(std::istream& fd);
- void save(std::ostream& fd);
-
- int size(){return n;};
- int getcode(const char *w);
- int encode(const char *w);
- char *decode(int c);
- void stat();
-
- void cleanfreq(){
- for (int i=0;i<n;tb[i++].freq=0);
- N=0;
- }
-
-};
-
-class dictionary_iter {
- public:
- dictionary_iter(dictionary *dict);
- dict_entry* next();
- private:
- dictionary* m_dict;
-};
-
-#endif
-
diff --git a/irstlm/src/htable.cpp b/irstlm/src/htable.cpp
deleted file mode 100644
index 73dab4d10..000000000
--- a/irstlm/src/htable.cpp
+++ /dev/null
@@ -1,329 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <iostream>
-#include <cassert>
-#include "mempool.h"
-#include "htable.h"
-
-//bitwise rotation of unsigned integers
-
-#define rot_right(a,k) (((a) >> k ) | ((a) << (32-(k))))
-#define rot_left(a,k) (((a) << k ) | ((a) >> (32-(k))))
-
-using namespace std;
-
-htable::htable(int n,int kl,HTYPE ht,size_t (*klf)(const char* )){
-
- if (ht!=STRPTR && ht!=STR && kl==0){
- cerr << "htable: key length must be specified for non-string entries!";
- exit(1);
- }
-
- memory=new mempool( sizeof(entry) , BlockSize );
-
- table = new entry* [ size=n ];
-
- memset(table,0,sizeof(entry *) * n );
-
- keylen=(ht==INT || ht==INTPTR?kl/sizeof(int):kl);
-
- htype=ht;
-
- keys = accesses = collisions = 0;
-
- keylenfunc=(klf?klf:&strlen);
-
-}
-
-
-char *htable::search(char *item, HT_ACTION action)
-
-{
- address h;
- entry *q,**p;
- int i;
-
- //if (action == HT_FIND)
- accesses++;
-
- h = Hash(item);
-
- i=(h % size);
-
- //cout << "htable::search() hash i=" << i << "\n";
-
- p = &table[h % size];
-
- q=*p;
-
- /*
- ** Follow collision chain
- */
-
- while (q != NULL && Comp((char *)q->key,(char *)item))
- {
- p = (entry **)&q->next;
- q=*p;
- //if (action == HT_FIND)
- collisions++;
- }
-
- if (
- q != NULL /* found */
- ||
- action == HT_FIND /* not found, search only */
- ||
- (q = (entry *)memory->allocate())
- ==
- NULL /* not found, no room */
- )
-
- return((q!=NULL)?(char *)q->key:(char *)NULL);
-
- *p = q; /* link into chain */
- /*
- ** Initialize new element
- */
-
- q->key = item;
- q->next = NULL;
- keys++;
-
- return((char *)q->key);
-}
-
-
-char *htable::scan(HT_ACTION action){
-
- char *k;
-
- if (action == HT_INIT)
- {
- scan_i=0;scan_p=table[0];
- return NULL;
- }
-
- // if scan_p==NULL go to the first non null pointer
- while ((scan_p==NULL) && (++scan_i<size)) scan_p=table[scan_i];
-
- if (scan_p!=NULL)
- {
- k=scan_p->key;
- scan_p=(entry *)scan_p->next;
- return k;
- };
-
- return NULL;
-}
-
-
-void htable::map(ostream& co,int cols){
-
- entry *p;
- char* img=new char[cols+1];
-
- img[cols]='\0';
- memset(img,'.',cols);
-
- co << "htable memory map: . (0 items), - (<5), # (>5)\n";
-
- for (int i=0; i<size;i++)
- {
- int n=0;p=table[i];
-
- while(p!=NULL){
- n++;
- p=(entry *)p->next;
- };
-
- if (i && (i % cols)==0){
- co << img << "\n";
- memset(img,'.',cols);
- }
-
- if (n>0)
- img[i % cols]=n<=5?'-':'#';
-
- }
-
- img[size % cols]='\0';
- co << img << "\n";
-
- delete []img;
-}
-
-
-void htable::stat(){
- cerr << "htable class statistics\n";
- cerr << "size " << size
- << " keys " << keys
- << " acc " << accesses
- << " coll " << collisions
- << " used memory " << used()/1024 << "Kb\n";
-}
-
-htable::~htable()
-{
- delete [] table;
- delete memory;
-}
-
-address htable::HashStr(char *key)
-{
- char *Key=(htype==STRPTR? *(char **)key:key);
- int length=(keylen?keylen:keylenfunc(Key));
-
- //cerr << "hash: " << Key << " length:" << length << "\n";
-
- register address h=0;
- register int i;
-
- for (i=0,h=0;i<length;i++)
- h = h * Prime1 ^ (Key[i] - ' ');
- h %= Prime2;
-
- return h;
-}
-
-//Herbert Glarner's "HSH 11/13" hash function.
-/*
-address htable::HashInt(char *key){
-
-int *Key=(htype==INTPTR? *(int **)key:(int *)key);
-
-address state=12345,h=0;
-register int i,j;
-
-int p=7; //precision=8 * sizeof(int)-1, in general must be >=7
-
- for (i=0,h=0;i<keylen;i++){
- h = h ^ ((address) Key[i]);
- for (j=0;j<p;j++){
- state = rot_left(state,11); //state = state left-rotate 11 bits
- h = rot_left(h,13); //h = h left-rotate 13 bits
- h ^= state ; //h = h xor state
- h = rot_left(h,(state & (address)31)); //h = h left-rotate (state mod 32) bits
- h = rot_left(h, (h & (address)31)); //h = h left-rotate (h mod 32) bits
- }
- }
-
- return h;
-}
-
-*/
-
-address htable::HashInt(char *key)
-{
- int *Key=(htype==INTPTR? *(int **)key:(int *)key);
-
-
- address h;
- register int i;
-
- //Thomas Wang's 32 bit Mix Function
- for (i=0,h=0;i<keylen;i++){
- h+=Key[i];
- h += ~(h << 15);
- h ^= (h >> 10);
- h += (h << 3);
- h ^= (h >> 6);
- h += ~(h << 11);
- h ^= (h >> 16);
- };
-
- return h;
-}
-
-int htable::CompStr(char *key1, char *key2)
-{
- assert(key1 && key2);
-
- char *Key1=(htype==STRPTR?*(char **)key1:key1);
- char *Key2=(htype==STRPTR?*(char **)key2:key2);
-
- assert(Key1 && Key2);
-
- int length1=(keylen?keylen:keylenfunc(Key1));
- int length2=(keylen?keylen:keylenfunc(Key2));
-
- if (length1!=length2) return 1;
-
- register int i;
-
- for (i=0;i<length1;i++)
- if (Key1[i]!=Key2[i]) return 1;
- return 0;
-}
-
-int htable::CompInt(char *key1, char *key2)
-{
- assert(key1 && key2);
-
- int *Key1=(htype==INTPTR?*(int **)key1:(int*)key1);
- int *Key2=(htype==INTPTR?*(int **)key2:(int*)key2);
-
- assert(Key1 && Key2);
-
- register int i;
-
- for (i=0;i<keylen;i++)
- if (Key1[i]!=Key2[i]) return 1;
- return 0;
-}
-
-
-/*
-main(){
-
-const int n=1000;
-
-htable *ht=new htable(1000/5);
-
- char w[n][20];
- char *c;
-
- for (int i=0;i<n;i++)
- {
- sprintf(w[i],"ciao%d",i);
- ht->search((char *)&w[i],HT_ENTER);
- }
-
- for (int i=0;i<n;i++)
- if (ht->search((char *)&w[i],HT_FIND))
- cout << w[i] << " trovato\n" ;
- else
- cout << w[i] << " non trovato\n";
-
- ht->stat();
-
- delete ht;
- htable *ht2=new htable(n);
- for (int i=0;i<n;i++)
- ht2->search((char *)&w[i],HT_ENTER);
-
- ht2->scan(INIT);
- cout << "elenco:\n";
- while ((c=ht2->scan(CONT))!=NULL)
- cout << *(char **) c << "\n";
-
- ht2->map();
-}
-*/
diff --git a/irstlm/src/htable.h b/irstlm/src/htable.h
deleted file mode 100644
index 111340632..000000000
--- a/irstlm/src/htable.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#ifndef MF_HTABLE_H
-#define MF_HTABLE_H
-
-#include <iostream>
-
-#define Prime1 37
-#define Prime2 1048583
-#define BlockSize 100
-
-
-// Fast arithmetic, relying on powers of 2,
-// and on pre-processor concatenation property
-
-typedef struct{
- char* key;
- char* next; // secret from user
-}entry;
-
-typedef unsigned int address;
-
-typedef enum {HT_FIND, //!< search: find an entry
- HT_ENTER, //!< search: enter an entry
- HT_INIT, //!< scan: start scan
- HT_CONT //!< scan: continue scan
-} HT_ACTION;
-
-typedef enum {
- STR, //!< string
- STRPTR, //!< pointer to string
- INT, //!< pointer to int
- INTPTR, //!< pointer to pointer to int
-}HTYPE;
-
-//! Hash Table for strings
-
-class htable {
- int size; //!< table size
- int keylen; //!< key length
- HTYPE htype; //!< type of entry pointer
- entry **table; //!< hash table
- int scan_i; //!< scan support
- entry *scan_p; //!< scan support
- // statistics
- long keys; //!< # of entries
- long accesses; //!< # of accesses
- long collisions; //!< # of collisions
-
- mempool *memory; //!< memory pool
-
- size_t (*keylenfunc)(const char*); //!< function computing key length
- address (*hashfunc)(const char*); //!< hash function
- int (*compfunc)(const char*, const char*); //!< comparison function
-
- public:
-
- //! Creates an hash table
- htable(int n,int kl=0,HTYPE ht=STRPTR,size_t (*klf)(const char* )=NULL);
-
- //! Destroys an and hash table
- ~htable();
-
- //! Computes the hash function
- address Hash(char *key){
- switch (htype){
- case INT:case INTPTR: return HashInt(key);
- break;
- case STR:case STRPTR: return HashStr(key);
- default: exit(1);
- }
- };
- address HashInt(char *key);
- address HashStr(char *key);
-
- //! Compares the keys of two entries
- int Comp(char *Key1,char *Key2){
- switch (htype){
- case INT:case INTPTR: return CompInt(Key1,Key2);
- break;
- case STR:case STRPTR: return CompStr(Key1,Key2);
- default: exit(1);
- };
- }
-
- int CompInt(char *Key1,char *Key2);
- int CompStr(char *Key1,char *Key2);
-
- //! Searches for an item
- char *search(char *item, HT_ACTION action);
-
- //! Scans the content
- char *scan(HT_ACTION action);
-
- //! Prints statistics
- void stat();
-
- //! Print a map of memory use
- void map(std::ostream& co=std::cout, int cols=80);
-
- //! Returns amount of used memory
- int used(){return
- size * sizeof(entry **) +
- memory->used();};
-};
-
-
-
-#endif
-
-
-
diff --git a/irstlm/src/index.h b/irstlm/src/index.h
deleted file mode 100644
index 500587989..000000000
--- a/irstlm/src/index.h
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-#pragma once
-
-#ifdef WIN32
-
-inline const char *index(const char *str, char search)
-{
- int i=0;
- while (i< strlen(str) ){
- if (str[i]==search) return &str[i];
- }
- return NULL;
-}
-
-
-#endif
-
-
diff --git a/irstlm/src/lmtable.cpp b/irstlm/src/lmtable.cpp
deleted file mode 100644
index 8a462f0d5..000000000
--- a/irstlm/src/lmtable.cpp
+++ /dev/null
@@ -1,1198 +0,0 @@
-/******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-#include <iostream>
-#include <fstream>
-#include <stdexcept>
-#include <cassert>
-#include "math.h"
-#include "mempool.h"
-#include "htable.h"
-#include "ngramcache.h"
-#include "dictionary.h"
-#include "n_gram.h"
-#include "lmtable.h"
-using namespace std;
-
-inline void error(char* message){
- cerr << message << "\n";
- throw std::runtime_error(message);
-}
-
-
-//instantiate an empty lm table
-lmtable::lmtable(){
-
- configure(1,false);
-
- dict=new dictionary((char *)NULL,1000000,(char*)NULL,(char*)NULL);
-
- memset(cursize, 0, sizeof(cursize));
- memset(tbltype, 0, sizeof(tbltype));
- memset(maxsize, 0, sizeof(maxsize));
- memset(info, 0, sizeof(info));
- memset(NumCenters, 0, sizeof(NumCenters));
-
- max_cache_lev=0;
- for (int i=0;i<=LMTMAXLEV+1;i++) lmtcache[i]=NULL;
- probcache=NULL;
- statecache=NULL;
-
-};
-
-
-//loadstd::istream& inp a lmtable from a lm file
-
-void lmtable::load(fstream& inp){
-
- //give a look at the header to select loading method
- char header[1024];
-
- inp >> header; cerr << header << "\n";
-
- if (strncmp(header,"Qblmt",5)==0 || strncmp(header,"blmt",4)==0)
- loadbin(inp,header);
- else
- loadtxt(inp,header);
-
- dict->genoovcode();
-
- cerr << "OOV code is " << dict->oovcode() << "\n";
-
-}
-
-
-
-int parseWords(char *sentence, char **words, int max)
-{
- char *word;
- int i = 0;
-
- char *const wordSeparators = " \t\r\n";
-
- for (word = strtok(sentence, wordSeparators);
- i < max && word != 0;
- i++, word = strtok(0, wordSeparators))
- {
- words[i] = word;
- }
-
- if (i < max){words[i] = 0;}
-
- return i;
-}
-
-
-
-//Load a LM as a text file. LM could have been generated either with the
-//IRST LM toolkit or with the SRILM Toolkit. In the latter we are not
-//sure that n-grams are lexically ordered (according to the 1-grams).
-//However, we make the following assumption:
-//"all successors of any prefix are sorted and written in contiguous lines!"
-//This method also loads files processed with the quantization
-//tool: qlm
-
-int parseline(fstream& inp, int Order,ngram& ng,float& prob,float& bow){
-
- char* words[1+ LMTMAXLEV + 1 + 1];
- int howmany;
- char line[1024];
-
- inp.getline(line,1024);
-
- howmany = parseWords(line, words, Order + 3);
- assert(howmany == (Order+ 1) || howmany == (Order + 2));
-
- //read words
- ng.size=0;
- for (int i=1;i<=Order;i++)
- ng.pushw(strcmp(words[i],"<unk>")?words[i]:ng.dict->OOV());
-
- //read logprob/code and logbow/code
- assert(sscanf(words[0],"%f",&prob));
- if (howmany==(Order+2))
- assert(sscanf(words[Order+1],"%f",&bow));
- else
- bow=0.0; //this is log10prob=0 for implicit backoff
-
- return 1;
-}
-
-
-void lmtable::loadcenters(fstream& inp,int Order){
- char line[11];
-
- //first read the coodebook
- cerr << Order << " read code book ";
- inp >> NumCenters[Order];
- Pcenters[Order]=new float[NumCenters[Order]];
- Bcenters[Order]=(Order<maxlev?new float[NumCenters[Order]]:NULL);
-
- for (int c=0;c<NumCenters[Order];c++){
- inp >> Pcenters[Order][c];
- if (Order<maxlev) inp >> Bcenters[Order][c];
- };
- //empty the last line
- inp.getline((char*)line,10);
-
-}
-
-
-void lmtable::loadtxt(fstream& inp,const char* header){
-
-
- //open input stream and prepare an input string
- char line[1024];
-
- //prepare word dictionary
- //dict=(dictionary*) new dictionary(NULL,1000000,NULL,NULL);
- dict->incflag(1);
-
- //put here ngrams, log10 probabilities or their codes
- ngram ng(dict);
- float prob,bow;;
-
- //check the header to decide if the LM is quantized or not
- isQtable=(strncmp(header,"qARPA",5)==0?true:false);
-
- //we will configure the table later we we know the maxlev;
- bool yetconfigured=false;
-
- cerr << "loadtxt()\n";
-
- // READ ARPA Header
- int Order,n;
-
- while (inp.getline(line,1024)){
-
- bool backslash = (line[0] == '\\');
-
- if (sscanf(line, "ngram %d=%d", &Order, &n) == 2) {
- maxsize[Order] = n; maxlev=Order; //upadte Order
-
- }
-
- if (backslash && sscanf(line, "\\%d-grams", &Order) == 1) {
-
- //at this point we are sure about the size of the LM
- if (!yetconfigured){
- configure(maxlev,isQtable);yetconfigured=true;
- //allocate space for loading the table of this level
- for (int i=1;i<=maxlev;i++)
- table[i]= new char[maxsize[i] * nodesize(tbltype[i])];
- }
-
- cerr << Order << "-grams: reading ";
-
- if (isQtable) loadcenters(inp,Order);
-
- //allocate support vector to manage badly ordered n-grams
- if (maxlev>1 && Order<maxlev) {
- startpos[Order]=new int[maxsize[Order]];
- for (int c=0;c<maxsize[Order];c++) startpos[Order][c]=-1;
- }
-
- //prepare to read the n-grams entries
- cerr << maxsize[Order] << " entries\n";
-
- //WE ASSUME A WELL STRUCTURED FILE!!!
-
- for (int c=0;c<maxsize[Order];c++){
-
- if (parseline(inp,Order,ng,prob,bow))
- add(ng,
- (int)(isQtable?prob:*((int *)&prob)),
- (int)(isQtable?bow:*((int *)&bow)));
- }
- // now we can fix table at level Order -1
- if (maxlev>1 && Order>1) checkbounds(Order-1);
- }
- }
-
- dict->incflag(0);
- cerr << "done\n";
-
-}
-
-
-
-//set all bounds of entries with no successors to the bound
-//of the previous entry.
-
-void lmtable::checkbounds(int level){
-
- char* tbl=table[level];
- char* succtbl=table[level+1];
-
- LMT_TYPE ndt=tbltype[level], succndt=tbltype[level+1];
- int ndsz=nodesize(ndt), succndsz=nodesize(succndt);
-
- //re-order table at level+1
- char* newtbl=new char[succndsz * cursize[level+1]];
- int start,end,newstart;
-
- //re-order table at
- newstart=0;
- for (int c=0;c<cursize[level];c++){
- start=startpos[level][c]; end=bound(tbl+c*ndsz,ndt);
- //is start==-1 there are no successors for this entry and end==-2
- if (end==-2) end=start;
- assert(start<=end);
- assert(newstart+(end-start)<=cursize[level+1]);
- assert(end<=cursize[level+1]);
-
- if (start<end)
- memcpy((void*)(newtbl + newstart * succndsz),
- (void*)(succtbl + start * succndsz),
- (end-start) * succndsz);
-
- bound(tbl+c*ndsz,ndt,newstart+(end-start));
- newstart+=(end-start);
- }
- delete [] table[level+1];
- table[level+1]=newtbl;
- newtbl=NULL;
-}
-
-//Add method inserts n-grams in the table structure. It is ONLY used during
-//loading of LMs in text format. It searches for the prefix, then it adds the
-//suffix to the last level and updates the start-end positions.
-
-int lmtable::add(ngram& ng,int iprob,int ibow){
-
- char *found; LMT_TYPE ndt; int ndsz;
-
- if (ng.size>1){
-
- // find the prefix starting from the first level
- int start=0, end=cursize[1];
-
- for (int l=1;l<ng.size;l++){
-
- ndt=tbltype[l]; ndsz=nodesize(ndt);
-
- if (search(table[l] + (start * ndsz),ndt,l,(end-start),ndsz,
- ng.wordp(ng.size-l+1),LMT_FIND, &found)){
-
- //update start-end positions for next step
- if (l< (ng.size-1)){
- //set start position
- if (found==table[l]) start=0; //first pos in table
- else start=bound(found - ndsz,ndt); //end of previous entry
-
- //set end position
- end=bound(found,ndt);
- }
- }
- else{
- cerr << "warning: missing back-off for ngram " << ng << "\n";
- return 0;
- }
- }
-
- // update book keeping information about level ng-size -1.
- // if this is the first successor update start position
- int position=(found-table[ng.size-1])/ndsz;
- if (startpos[ng.size-1][position]==-1)
- startpos[ng.size-1][position]=cursize[ng.size];
-
- //always update ending position
- bound(found,ndt,cursize[ng.size]+1);
- //cout << "startpos: " << startpos[ng.size-1][position]
- //<< " endpos: " << bound(found,ndt) << "\n";
-
- }
-
- // just add at the end of table[ng.size]
-
- assert(cursize[ng.size]< maxsize[ng.size]); // is there enough space?
- ndt=tbltype[ng.size];ndsz=nodesize(ndt);
-
- found=table[ng.size] + (cursize[ng.size] * ndsz);
- word(found,*ng.wordp(1));
- prob(found,ndt,iprob);
- if (ng.size<maxlev){bow(found,ndt,ibow);bound(found,ndt,-2);}
-
- cursize[ng.size]++;
-
- return 1;
-
-}
-
-
-void *lmtable::search(char* tb,
- LMT_TYPE ndt,
- int lev,
- int n,
- int sz,
- int *ngp,
- LMT_ACTION action,
- char **found){
-
- if (lev==1) return *found=(*ngp <n ? tb + *ngp * sz:NULL);
-
- //prepare search pattern
- char w[LMTCODESIZE];putmem(w,ngp[0],0,LMTCODESIZE);
-
- int idx=0; // index returned by mybsearch
- *found=NULL; //initialize output variable
- switch(action){
- case LMT_FIND:
- if (!tb || !mybsearch(tb,n,sz,(unsigned char *)w,&idx)) return NULL;
- else
- return *found=tb + (idx * sz);
- default:
- error("lmtable::search: this option is available");
- };
-
- return NULL;
-}
-
-
-int lmtable::mybsearch(char *ar, int n, int size,
- unsigned char *key, int *idx)
-{
-
- totbsearch++;
-
- register int low, high;
- register unsigned char *p;
- register int result;
- register int i;
-
- /* return idx with the first
- position equal or greater than key */
-
- /* Warning("start bsearch \n"); */
-
- low = 0;high = n; *idx=0;
- while (low < high)
- {
- *idx = (low + high) / 2;
- p = (unsigned char *) (ar + (*idx * size));
-
- //comparison
- for (i=(LMTCODESIZE-1);i>=0;i--){
- result=key[i]-p[i];
- if (result) break;
- }
-
- if (result < 0)
- high = *idx;
- else if (result > 0)
- low = *idx + 1;
- else
- return 1;
- }
-
- *idx=low;
-
- return 0;
-
-}
-
-
-// saves a LM table in text format
-
-void lmtable::savetxt(const char *filename){
-
- fstream out(filename,ios::out);
- int l;
-
- out.precision(7);
-
- if (isQtable) out << "qARPA\n";
-
-
- ngram ng(dict,0);
-
- cerr << "savetxt: " << filename << "\n";
-
- out << "\n\\data\\\n";
- for (l=1;l<=maxlev;l++){
- out << "ngram " << l << "= " << cursize[l] << "\n";
- }
-
- for (l=1;l<=maxlev;l++){
-
- out << "\n\\" << l << "-grams:\n";
- cerr << "save: " << cursize[l] << " " << l << "-grams\n";
- if (isQtable){
- out << NumCenters[l] << "\n";
- for (int c=0;c<NumCenters[l];c++){
- out << Pcenters[l][c];
- if (l<maxlev) out << " " << Bcenters[l][c];
- out << "\n";
- }
- }
-
- ng.size=0;
- dumplm(out,ng,1,l,0,cursize[1]);
-
- }
-
- out << "\\end\\\n";
- cerr << "done\n";
-}
-
-
-void lmtable::savebin(const char *filename){
-
- fstream out(filename,ios::out);
- cerr << "savebin: " << filename << "\n";
-
- // print header
- if (isQtable){
- out << "Qblmt " << maxlev;
- for (int i=1;i<=maxlev;i++) out << " " << cursize[i];
- out << "\nNumCenters";
- for (int i=1;i<=maxlev;i++) out << " " << NumCenters[i];
- out << "\n";
-
- }else{
- out << "blmt " << maxlev;
- for (int i=1;i<=maxlev;i++) out << " " << cursize[i] ;
- out << "\n";
- }
-
- dict->save(out);
-
- for (int i=1;i<=maxlev;i++){
- cerr << "saving " << cursize[i] << " " << i << "-grams\n";
- if (isQtable){
- out.write((char*)Pcenters[i],NumCenters[i] * sizeof(float));
- if (i<maxlev)
- out.write((char *)Bcenters[i],NumCenters[i] * sizeof(float));
- }
- out.write(table[i],cursize[i]*nodesize(tbltype[i]));
- }
-
- cerr << "done\n";
-}
-
-
-//manages the long header of a bin file
-
-void lmtable::loadbinheader(fstream& inp,const char* header){
-
- // read rest of header
- inp >> maxlev;
-
- if (strncmp(header,"Qblmt",5)==0) isQtable=1;
- else if(strncmp(header,"blmt",4)==0) isQtable=0;
- else error("loadbin: wrong header");
-
- configure(maxlev,isQtable);
-
- for (int i=1;i<=maxlev;i++){
- inp >> cursize[i]; maxsize[i]=cursize[i];
- table[i]=new char[cursize[i] * nodesize(tbltype[i])];
- }
-
-
- if (isQtable){
- char header2[100];
- cerr << "reading num centers:";
- inp >> header2;
- for (int i=1;i<=maxlev;i++){
- inp >> NumCenters[i];cerr << " " << NumCenters[i];
-
- }
- cerr << "\n";
- }
-}
-
-//load codebook of level l
-
-void lmtable::loadbincodebook(fstream& inp,int l){
-
- Pcenters[l]=new float [NumCenters[l]];
- inp.read((char*)Pcenters[l],NumCenters[l] * sizeof(float));
- if (l<maxlev){
- Bcenters[l]=new float [NumCenters[l]];
- inp.read((char *)Bcenters[l],NumCenters[l]*sizeof(float));
- }
-
-}
-
-//load a binary lmfile
-
-void lmtable::loadbin(fstream& inp, const char* header){
-
- cerr << "loadbin()\n";
-
- loadbinheader(inp,header);
-
- dict->load(inp);
-
- for (int l=1;l<=maxlev;l++){
- if (isQtable) loadbincodebook(inp,l);
-
- cerr << "loading " << cursize[l] << " " << l << "-grams\n";
- inp.read(table[l],cursize[l]*nodesize(tbltype[l]));
- };
-
- cerr << "done\n";
-}
-
-
-
-int lmtable::get(ngram& ng,int n,int lev){
-
- // cout << "cerco:" << ng << "\n";
- totget[lev]++;
-
- if (lev > maxlev) error("get: lev exceeds maxlevel");
- if (n < lev) error("get: ngram is too small");
-
- //set boudaries for 1-gram
- int offset=0,limit=cursize[1];
-
- //information of table entries
- int hit;char* found; LMT_TYPE ndt;
- ng.link=NULL;
- ng.lev=0;
-
- for (int l=1;l<=lev;l++){
-
- //initialize entry information
- hit = 0 ; found = NULL; ndt=tbltype[l];
-
- //if (l==2) cout <<"bicache: searching:" << ng <<"\n";
-
- if (lmtcache[l] && lmtcache[l]->get(ng.wordp(n),(char *)&found))
- hit=1;
- else
- search(table[l] + (offset * nodesize(ndt)),
- ndt,
- l,
- (limit-offset),
- nodesize(ndt),
- ng.wordp(n-l+1),
- LMT_FIND,
- &found);
-
- //insert both found and not found items!!!
- if (lmtcache[l] && hit==0)
- lmtcache[l]->add(ng.wordp(n),(char *)&found);
-
- if (!found) return 0;
-
- ng.link=found;
- ng.info=ndt;
- ng.lev=l;
-
- if (l<maxlev){ //set start/end point for next search
-
- //if current offset is at the bottom also that of successors will be
- if (offset+1==cursize[l]) limit=cursize[l+1];
- else limit=bound(found,ndt);
-
- //if current start is at the begin, then also that of successors will be
- if (found==table[l]) offset=0;
- else offset=bound((found - nodesize(ndt)),ndt);
-
- assert(offset!=-1); assert(limit!=-1);
- }
- }
-
- //put information inside ng
- ng.size=n; ng.freq=0;
- ng.succ=(lev<maxlev?limit-offset:0);
-
- return 1;
-}
-
-
-//recursively prints the language model table
-
-void lmtable::dumplm(fstream& out,ngram ng, int ilev, int elev, int ipos,int epos){
-
- LMT_TYPE ndt=tbltype[ilev];
- int ndsz=nodesize(ndt);
-
-
- assert(ng.size==ilev-1);
- assert(ipos>=0 && epos<=cursize[ilev] && ipos<epos);
- ng.pushc(0);
-
- for (int i=ipos;i<epos;i++){
- *ng.wordp(1)=word(table[ilev]+i*ndsz);
- if (ilev<elev){
- //get first and last successor position
- int isucc=(i>0?bound(table[ilev]+(i-1)*ndsz,ndt):0);
- int esucc=bound(table[ilev]+i*ndsz,ndt);
- if (isucc < esucc) //there are successors!
- dumplm(out,ng,ilev+1,elev,isucc,esucc);
- //else
- //cout << "no successors for " << ng << "\n";
- }
- else{
- //out << i << " "; //this was just to count printed n-grams
- int ipr=prob(table[ilev]+ i * ndsz,ndt);
- out << (isQtable?ipr:*(float *)&ipr) <<"\t";
- for (int k=ng.size;k>=1;k--){
- if (k<ng.size) out << " ";
- out << dict->decode(*ng.wordp(k));
- }
-
- if (ilev<maxlev){
- int ibo=bow(table[ilev]+ i * ndsz,ndt);
- if (isQtable) out << "\t" << ibo;
- else
- if (*((float *)&ibo)!=0.0)
- out << "\t" << *((float *)&ibo);
-
- }
- out << "\n";
- }
- }
-}
-
-//succscan iteratively returns all successors of an ngram h for which
-//get(h,h.size,h.size) returned true.
-
-
-int lmtable::succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev){
- assert(lev==h.lev+1 && h.size==lev && lev<=maxlev);
-
- LMT_TYPE ndt=tbltype[h.lev];
- int ndsz=nodesize(ndt);
-
- switch (action){
-
- case LMT_INIT:
- //reset ngram local indexes
-
- ng.size=lev;
- ng.trans(h);
- ng.midx[lev]=(h.link>table[h.lev]?bound(h.link-ndsz,ndt):0);
-
- return 1;
-
- case LMT_CONT:
-
- if (ng.midx[lev]<bound(h.link,ndt))
- {
- //put current word into ng
- *ng.wordp(1)=word(table[lev]+ng.midx[lev]*nodesize(tbltype[lev]));
- ng.midx[lev]++;
- return 1;
- }
- else
- return 0;
-
- default:
- cerr << "succscan: only permitted options are LMT_INIT and LMT_CONT\n";
- exit(0);
- }
-
-}
-
-//maxsuffptr returns the largest suffix of an n-gram that is contained
-//in the LM table. This can be used as a compact representation of the
-//(n-1)-gram state of a n-gram LM. if the input k-gram has k>=n then it
-//is trimmed to its n-1 suffix.
-
-const char *lmtable::maxsuffptr(ngram ong){
-
- if (ong.size==0) return (char*) NULL;
- if (ong.size>=maxlev) ong.size=maxlev-1;
-
- ngram ng=ong;
- //ngram ng(dict); //eventually use the <unk> word
- //ng.trans(ong);
-
- if (get(ng,ng.size,ng.size))
- return ng.link;
- else{
- ong.size--;
- return maxsuffptr(ong);
- }
-}
-
-
-const char *lmtable::cmaxsuffptr(ngram ong){
-
- if (ong.size==0) return (char*) NULL;
- if (ong.size>=maxlev) ong.size=maxlev-1;
-
- char* found;
-
- if (statecache && (ong.size==maxlev-1) && statecache->get(ong.wordp(maxlev-1),(char *)&found))
- return found;
-
- found=(char *)maxsuffptr(ong);
-
- if (statecache && ong.size==maxlev-1){
- //if (statecache->isfull()) statecache->reset();
- statecache->add(ong.wordp(maxlev-1),(char *)&found);
- };
-
- return found;
-
-}
-
-
-// returns the probability of an n-gram
-
-double lmtable::prob(ngram ong){
-
- if (ong.size==0) return 1.0;
- if (ong.size>maxlev) ong.size=maxlev;
-
- ngram ng(dict);
- ng.trans(ong);
-
- double rbow;
- int ibow,iprob;
- LMT_TYPE ndt;
-
-
- if (get(ng,ng.size,ng.size)){
- ndt=(LMT_TYPE)ng.info; iprob=prob(ng.link,ndt);
- return exp((double)(isQtable?Pcenters[ng.size][iprob]:*((float *)&iprob)));
- }
- else{ //size==1 means an OOV word
- if (ng.size==1)
- return 1.0/UNIGRAM_RESOLUTION;
- else{ // compute backoff
- //set backoff state, shift n-gram, set default bow prob
- bo_state(1); ng.shift();rbow=1.0;
- if (ng.lev==ng.size){
- ndt= (LMT_TYPE)ng.info; ibow=bow(ng.link,ndt);
- rbow= (double) (isQtable?Bcenters[ng.size][ibow]:*((float *)&ibow));
- }
- //prepare recursion step
- ong.size--;
- return exp(rbow) * prob(ong);
- }
- }
-}
-
-//return log10 probs
-
-double lmtable::lprob(ngram ong){
-
- if (ong.size==0) return 0.0;
- if (ong.size>maxlev) ong.size=maxlev;
-
- ngram ng=ong;
- //ngram ng(dict); //avoid dictionary transfer
- //ng.trans(ong);
-
- double rbow;
- int ibow,iprob;
- LMT_TYPE ndt;
-
-
- if (get(ng,ng.size,ng.size)){
- ndt=(LMT_TYPE)ng.info; iprob=prob(ng.link,ndt);
- return (double)(isQtable?Pcenters[ng.size][iprob]:*((float *)&iprob));
- }
- else{ //size==1 means an OOV word
- if (ng.size==1)
- return -log(UNIGRAM_RESOLUTION)/log(10.0);
- else{ // compute backoff
- //set backoff state, shift n-gram, set default bow prob
- bo_state(1); ng.shift();rbow=0.0;
- if (ng.lev==ng.size){
- ndt= (LMT_TYPE)ng.info; ibow=bow(ng.link,ndt);
- rbow= (double) (isQtable?Bcenters[ng.size][ibow]:*((float *)&ibow));
- }
- //prepare recursion step
- ong.size--;
- return rbow + lprob(ong);
- }
- }
-}
-
-//return log10 probsL use cache memory
-
-double lmtable::clprob(ngram ong){
-
- if (ong.size==0) return 0.0;
-
- if (ong.size>maxlev) ong.size=maxlev;
-
- double logpr;
-
-#ifdef TRACE_CACHE
- if (probcache && ong.size==maxlev && sentence_id>0lo){
- *cacheout << sentence_id << " " << ong << "\n";
- }
-#endif
-
- if (probcache && ong.size==maxlev && probcache->get(ong.wordp(maxlev),(char *)&logpr)){
- return logpr;
- }
-
- logpr=lprob(ong);
-
- if (probcache && ong.size==maxlev){
- //if (probcache->isfull()) probcache->reset();
- probcache->add(ong.wordp(maxlev),(char *)&logpr);
- };
-
- return logpr;
-
-};
-
-
-
-//Fill the lmtable with the n-grams in a huge lmfile.
-//Use the local dictionary to select the needed ngrams
-
-
-void lmtable::filter2(const char* binlmfile, int buffMb){
-
- //load header and dictionary of binary lm on disk
- lmtable* dsklmt=new lmtable();
- fstream inp(binlmfile,ios::in);
-
- // read header
- char header[1024];
- inp >> header;
-
- dsklmt->loadbinheader(inp, header);
- dsklmt->dict->load(inp);
-
- //inherit properties of the dsklmt
- configure(dsklmt->maxlevel(),dsklmt->isQuantized());
-
- //prepare word code conversion table; words which
- //are not in the local dictionary will have code -1
- //prepare a new dictionary sorted as the large dictionary
-
- dictionary* newdict=new dictionary((char *)NULL,1000000,(char*)NULL,(char*)NULL);
- newdict->incflag(1);
- int* code2code=new int[dsklmt->dict->size()];
- for (int w=0;w<dsklmt->dict->size();w++){
- if (dict->getcode(dsklmt->dict->decode(w))!=-1)
- code2code[w]=newdict->encode(dsklmt->dict->decode(w));
- else code2code[w]=-1;
- }
- newdict->incflag(0);
- delete dict;
- dict=newdict;
-
- //service variables
- char* p; char* q; char* r;
- int ndsz; LMT_TYPE type; int w;
- long i,j,l;
-
-
- disktable* dtbl;
- for (l=1;l<=maxlev;l++){
-
- //shortcuts for current table
- type=tbltype[l]; ndsz=nodesize(type);
-
- //steel eventual coodebooks from dsklm;
- if (isQtable) loadbincodebook(inp,l);
-
- //allocate the maxumum number of entries to be load at each time
- cerr << "loading part of" << dsklmt->cursize[l] << " " << l << "-grams\n";
-
-
- dtbl=new disktable(inp, (buffMb * 1024 *1024)/ndsz,ndsz,dsklmt->maxsize[l]);
-
- if (l==1){
-
-
- //compute actual table size
- maxsize[l]=0;
- for (i=0;i<dsklmt->maxsize[l];i++){
- p=dtbl->get(inp,i);
- if ((code2code[dsklmt->word(p)]) != -1) maxsize[l]++;
- }
-
- assert(maxsize[l]<=dsklmt->maxsize[l]);
-
- //allocate memory for table and start positions
- table[l]=new char[maxsize[l] * ndsz];
- startpos[l]=new int[maxsize[l]];
-
- //reset position of dsklmt
- dtbl->rewind(inp);
-
- //copy elements into table[l]
- cursize[l]=0;
- for (i=0;i<dsklmt->maxsize[l];i++){
- p=dtbl->get(inp,i);
- if ((w=code2code[dsklmt->word(p)]) != -1) {
- r=table[l] + cursize[l] * ndsz;
- memcpy(r,p,ndsz);
- //store the initial poition in startpos
- startpos[l][cursize[l]]=(i==0?0:bound(dtbl->get(inp,i-1),tbltype[l]));
- word(r,w);
- //cout << "1-gram bound:" << bound(r,tbltype[l]) << "\n";
- cursize[l]++;
- }
- }
-
- for (i=0;i<cursize[l];i++) assert(word(table[l]+i*ndsz)==i);
-
- assert(maxsize[l]==cursize[l]);
-
- }
- else{
-
- //shortcuts for the predecessors table
- char* ptable=table[l-1];
- LMT_TYPE ptype=tbltype[l-1];
- int pndsz=nodesize(ptype);
-
-
-
- //count actual table size, allocate memory, and copy elements
- //we scan elements through the previous table: ptable
- //cout << inp.tellp() << "\n";
-
- maxsize[l]=0;
-
- for (i=0;i<cursize[l-1];i++){
- p=ptable+i*pndsz;
- for (j=startpos[l-1][i];j<bound(p,ptype);j++){
- assert(startpos[l-1][i]<bound(p,ptype));
- q=dtbl->get(inp,j);
- if ((w=code2code[dsklmt->word(q)]) != -1) maxsize[l]++;
-
- }
- }
-
- //allocate memory for the table, and fill it
- assert(maxsize[l]<=dsklmt->maxsize[l]);
-
- table[l]=new char[maxsize[l] * ndsz];
- if (l<maxlev) startpos[l]=new int[maxsize[l]];
-
- //reset position of dsklmt
- dtbl->rewind(inp);
-
- r=table[l]; //next available position in table[l]
- cursize[l]=0;
-
- for (i=0;i<cursize[l-1];i++){
- p=ptable+i*pndsz;
- for (j=startpos[l-1][i];j<bound(p,ptype);j++){
- assert(startpos[l-1][i]<bound(p,ptype));
-
- q=dtbl->get(inp,j);
-
-
- if ((w=code2code[dsklmt->word(q)]) != -1){
- //copy element
- r=table[l] + cursize[l] * ndsz;
- memcpy(r,q,ndsz);
- if (l<maxlev) startpos[l][cursize[l]]=(j>0?bound(dtbl->get(inp,j-1),type):0);
- word(r,w);
- //cout << "+" << dict->decode(word(q)) << " - bound "
- //<< startpos[l][cursize[l]] << " " << bound(p,ptype) << "\n";
- cursize[l]++; //increment index in startpos
- }
-
- }
- //update bounds of predecessor
- bound(p,ptype,cursize[l]);
- }
-
- assert(cursize[l]==maxsize[l]);
- }
-
-
- delete dtbl;
- if (l>1) delete [] startpos[l-1];
- }
-
-
-}
-
-void lmtable::filter(const char* binlmfile){
-
- //load header and dictionary of binary lm on disk
- lmtable* dsklmt=new lmtable();
- fstream inp(binlmfile,ios::in);
-
- // read header
- char header[1024];
- inp >> header;
-
- dsklmt->loadbinheader(inp, header);
-
- dsklmt->dict->load(inp);
-
- //inherit properties of the dsklmt
- configure(dsklmt->maxlevel(),dsklmt->isQuantized());
-
- //prepare word code conversion table; words which
- //are not in the local dictionary will have code -1
- //prepare a new dictionary sorted as the large dictionary
-
- dictionary* newdict=new dictionary((char *)NULL,1000000,(char*)NULL,(char*)NULL);
- newdict->incflag(1);
- int* code2code=new int[dsklmt->dict->size()];
- for (int w=0;w<dsklmt->dict->size();w++){
- if (dict->getcode(dsklmt->dict->decode(w))!=-1)
- code2code[w]=newdict->encode(dsklmt->dict->decode(w));
- else code2code[w]=-1;
- }
- newdict->incflag(0);
- delete dict;
- dict=newdict;
-
- //service variables
- char* p; char* q; char* r;
- int ndsz; LMT_TYPE type; int w;
- int i,j,l;
-
- for (l=1;l<=maxlev;l++){
-
- //shortcuts for current table
- type=tbltype[l]; ndsz=nodesize(type);
-
- //steel eventual coodebooks from dsklm;
- if (isQtable) loadbincodebook(inp,l);
-
- //load single l-table from dsklmt: this table can be
- //removed at the ned of this cycle
- cerr << "loading " << dsklmt->cursize[l] << " " << l << "-grams\n";
- dsklmt->table[l]=new char[dsklmt->cursize[l]*ndsz];
- inp.read(dsklmt->table[l],dsklmt->cursize[l]*ndsz);
-
- //shortcuts for dsktable
- char* dtbl=dsklmt->table[l];
- int dsize=dsklmt->cursize[l];
-
- if (l==1){
-
- //count actual table size
- maxsize[l]=0;
- for (i=0;i<dsize;i++)
- if ((code2code[dsklmt->word(dtbl+i*ndsz)]) != -1) maxsize[l]++;
-
- assert(maxsize[l]<=dsklmt->maxsize[l]);
- //allocate memory for table and start positions
- table[l]=new char[maxsize[l] * ndsz];
- startpos[l]=new int[maxsize[l]];
-
- //copy elements one by one
-
- for (i=0;i<dsize;i++){
- p=dtbl+i*ndsz;
- if ((w=code2code[dsklmt->word(p)]) != -1) {
- r=table[l] + cursize[l] * ndsz;
- memcpy(r,p,ndsz);
- //store the initial poition in startpos
- startpos[l][cursize[l]]=(i==0?0:bound(p-ndsz,tbltype[l]));
- word(r,w);
- cursize[l]++;
- }
- }
-
- for (i=0;i<cursize[l];i++) assert(word(table[l]+i*ndsz)==i);
-
- assert(maxsize[l]==cursize[l]);
-
- }
- else{ //l>=1;
-
- //shortcuts for the predecessors table
- char* ptable=table[l-1];
- LMT_TYPE ptype=tbltype[l-1];
- int pndsz=nodesize(ptype);
-
- //count actual table size, allocate memory, and copy elements
- //we scan elements through the previous table: ptable
-
- maxsize[l]=0;
- for (i=0;i<cursize[l-1];i++){
- p=ptable+i*pndsz;
- for (j=startpos[l-1][i];j<bound(p,ptype);j++){
- q=dsklmt->table[l] + j * ndsz;
- if ((w=code2code[dsklmt->word(q)]) != -1){
- maxsize[l]++;
- }
- }
- }
-
- //allocate memory for the table, and fill it
- assert(maxsize[l]<=dsklmt->maxsize[l]);
- table[l]=new char[maxsize[l] * ndsz];
- if (l<maxlev) startpos[l]=new int[maxsize[l]];
-
- r=table[l]; //next available position in table[l]
- cursize[l]=0;
- for (i=0;i<cursize[l-1];i++){
- p=ptable+i*pndsz;
- for (j=startpos[l-1][i];j<bound(p,ptype);j++){
- q=dsklmt->table[l] + j * ndsz;
- if ((w=code2code[dsklmt->word(q)]) != -1){
- //copy element
- r=table[l] + cursize[l] * ndsz;
- memcpy(r,q,ndsz);
- if (l<maxlev)
- startpos[l][cursize[l]]=(j==0?0:dsklmt->bound(q-ndsz,type));
- word(r,w);
- //cout << "+" << dict->decode(word(q)) << " - bound "
- //<< startpos[l][cursize[l]] << " " << bound(p,ptype) << "\n";
- cursize[l]++; //increment index in startpos
- }
- }
- //update bounds of predecessor
- bound(p,ptype,cursize[l]);
- }
-
- }
-
- delete [] dsklmt->table[l];
- if (l>1) delete [] startpos[l-1];
- }
-
-}
-
-
-void lmtable::stat(int level){
- int totmem=0,memory;
- float mega=1024 * 1024;
-
- cout.precision(2);
-
- cout << "lmtable class statistics\n";
-
- cout << "levels " << maxlev << "\n";
- for (int l=1;l<=maxlev;l++){
- memory=cursize[l] * nodesize(tbltype[l]);
- cout << "lev " << l
- << " entries "<< cursize[l]
- << " used mem " << memory/mega << "Mb\n";
- totmem+=memory;
- }
-
- cout << "total allocated mem " << totmem/mega << "Mb\n";
-
- cout << "total number of get calls\n";
- for (int l=1;l<=maxlev;l++){
- cout << "level " << l << " " << totget[l] << "\n";
- }
- cout << "total binary search : " << totbsearch << "\n";
-
- if (level >1 ) dict->stat();
-
-}
diff --git a/irstlm/src/lmtable.h b/irstlm/src/lmtable.h
deleted file mode 100644
index 7960c766c..000000000
--- a/irstlm/src/lmtable.h
+++ /dev/null
@@ -1,365 +0,0 @@
-/******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-
-#ifndef MF_LMTABLE_H
-#define MF_LMTABLE_H
-
-#include "ngramcache.h"
-#include "dictionary.h"
-#include "n_gram.h"
-
-#undef TRACE_CACHE
-
-#define LMTMAXLEV 11
-
-#ifndef LMTCODESIZE
-#define LMTCODESIZE (int)3
-#endif
-
-#define SHORTSIZE (int)2
-#define PTRSIZE (int)sizeof(char *)
-#define INTSIZE (int)4
-#define CHARSIZE (int)1
-
-#define PROBSIZE (int)4 //use float
-#define QPROBSIZE (int)1
-#define BOUNDSIZE (int)4
-
-#define UNIGRAM_RESOLUTION 10000000.0
-
-typedef enum {INTERNAL,QINTERNAL,LEAF,QLEAF} LMT_TYPE;
-typedef char* node;
-
-typedef enum {LMT_FIND, //!< search: find an entry
- LMT_ENTER, //!< search: enter an entry
- LMT_INIT, //!< scan: start scan
- LMT_CONT //!< scan: continue scan
-} LMT_ACTION;
-
-
-//disktable or accessing tables stored on disk
-
-class disktable{
-private:
- char* buffer; //!< buffer of disk table
- int buffer_size; //!< size of buffer
- int entry_size; //!< size of each single entry in buffer
- long current_border; //!< current last available entry in buffer
- long file_border; //!< last element in disk table
- long start_position; /* */
-public:
-
- disktable(std::fstream& inp, int buffersize, int entrysize,long max_entries){
- buffer_size=buffersize;
- entry_size=entrysize;
- buffer=new char[(buffer_size+1) * entry_size];
- current_border=0;
- file_border=max_entries;
- start_position=inp.tellp();
- };
-
- ~disktable() {delete [] buffer;};
-
- char* get(std::fstream& inp,int position){
- assert(position < file_border);
-
- //you can look back at maximum one position before the first in the buffer!
- assert(position >= (current_border-buffer_size -1));
-
- while (position>=current_border){
- int how_many=(current_border + buffer_size <= file_border?buffer_size:file_border-current_border);
- //store last value in position buffer_size;
- memcpy(buffer + buffer_size * entry_size, buffer+(buffer_size-1) * entry_size, entry_size);
- //read the number of elements
- inp.read(buffer,how_many * entry_size);
- //update curent border
- current_border+=how_many;
- }
- if (position > (current_border-buffer_size-1) ) //then it is in buffer
- return buffer + (position % buffer_size) * entry_size;
- else
- if (current_border>buffer_size) //asks for last of previous block
- return buffer + buffer_size * entry_size;
- else return NULL;
- };
-
- void rewind(std::fstream& inp){
- inp.seekp(start_position);
- current_border=0;
- }
-
-};
-
-class lmtable{
-
- char* table[LMTMAXLEV+1]; //storage of all levels
- LMT_TYPE tbltype[LMTMAXLEV+1]; //table type for each levels
- int cursize[LMTMAXLEV+1]; //current size of levels
- int maxsize[LMTMAXLEV+1]; //current size of levels
- int* startpos[LMTMAXLEV+1]; //support vector to store start positions
-
- int maxlev; //max level of table
- char info[100]; //information put in the header
-
- //statistics
- int totget[LMTMAXLEV+1];
- int totbsearch;
-
- //probability quantization
- bool isQtable;
-
- int NumCenters[LMTMAXLEV+1];
- float* Pcenters[LMTMAXLEV+1];
- float* Bcenters[LMTMAXLEV+1];
-
- int lmt_oov_code;
- int lmt_oov_size;
- int backoff_state;
-
- //improve access speed
- ngramcache* lmtcache[LMTMAXLEV+1];
- ngramcache* probcache;
- ngramcache* statecache;
- int max_cache_lev;
-
-public:
-
-#ifdef TRACE_CACHE
- std::fstream* cacheout;
- int sentence_id;
-#endif
-
- dictionary *dict; // dictionary
-
- lmtable();
-
- ~lmtable(){
- for (int i=2;i<=LMTMAXLEV;i++)
- if (lmtcache[i]){
- std::cerr << i <<"-gram cache: "; lmtcache[i]->stat();
- delete lmtcache[i];
- }
-
- if (probcache){
- std::cerr << "Prob Cache: "; probcache->stat();
- delete probcache;
-#if TRACE_CACHE
- cacheout->close();
- delete cacheout;
-#endif
-
- }
- if (statecache){
- std::cerr << "State Cache: "; statecache->stat();
- delete statecache;
- }
-
-
- for (int i=1;i<=maxlev;i++){
- if (table[i]) delete [] table[i];
- if (isQtable){
- if (Pcenters[i]) delete [] Pcenters[i];
- if (i<maxlev)
- if (Bcenters[i]) delete [] Bcenters[i];
- }
- }
- }
-
- void init_probcache(){
- assert(probcache==NULL);
- probcache=new ngramcache(maxlev,sizeof(double),200000);
-#ifdef TRACE_CACHE
- cacheout=new std::fstream("/tmp/tracecache",std::ios::out);
- sentence_id=0;
-#endif
- }
-
- void init_statecache(){
- assert(statecache==NULL);
- statecache=new ngramcache(maxlev-1,sizeof(char *),100000);
- }
-
- void init_lmtcaches(int uptolev){
- max_cache_lev=uptolev;
- for (int i=2;i<=max_cache_lev;i++){
- assert(lmtcache[i]==NULL);
- lmtcache[i]=new ngramcache(i,sizeof(char *),200000);
- }
- }
-
- void check_cache_levels(){
- if (probcache && probcache->isfull()) probcache->reset();
- if (statecache && statecache->isfull()) statecache->reset();
- for (int i=2;i<=max_cache_lev;i++)
- if (lmtcache[i]->isfull()) lmtcache[i]->reset();
- }
-
- bool is_probcache_active(){return probcache!=NULL;}
- bool is_statecache_active(){return statecache!=NULL;}
- bool are_lmtcaches_active(){return lmtcache[2]!=NULL;}
-
- void configure(int n,bool quantized){
- maxlev=n;
- if (n==1)
- tbltype[1]=(quantized?QLEAF:LEAF);
- else{
- for (int i=1;i<n;i++) tbltype[i]=(quantized?QINTERNAL:INTERNAL);
- tbltype[n]=(quantized?QLEAF:LEAF);
- }
- };
-
- int maxlevel(){return maxlev;};
- bool isQuantized(){return isQtable;}
-
-
- void savetxt(const char *filename);
- void savebin(const char *filename);
- void dumplm(std::fstream& out,ngram ng, int ilev, int elev, int ipos,int epos);
-
- void load(std::fstream& inp);
- void loadtxt(std::fstream& inp,const char* header);
- void loadbin(std::fstream& inp,const char* header);
-
- void loadbinheader(std::fstream& inp, const char* header);
- void loadbincodebook(std::fstream& inp,int l);
-
- void lmtable::filter(const char* lmfile);
- void lmtable::filter2(const char* lmfile,int buffMb=512);
-
- void loadcenters(std::fstream& inp,int Order);
-
- double prob(ngram ng);
- double lprob(ngram ng);
- double clprob(ngram ng);
-
-
- void *search(char *tb,LMT_TYPE ndt,int lev,int n,int sz,int *w,
- LMT_ACTION action,char **found=(char **)NULL);
-
- int mybsearch(char *ar, int n, int size, unsigned char *key, int *idx);
-
- int add(ngram& ng,int prob,int bow);
- void checkbounds(int level);
-
- int get(ngram& ng){return get(ng,ng.size,ng.size);}
- int get(ngram& ng,int n,int lev);
-
- int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev);
-
- const char *maxsuffptr(ngram ong);
- const char *cmaxsuffptr(ngram ong);
-
- inline int putmem(char* ptr,int value,int offs,int size){
- assert(ptr!=NULL);
- for (int i=0;i<size;i++)
- ptr[offs+i]=(value >> (8 * i)) & 0xff;
- return value;
- };
-
- inline int getmem(char* ptr,int* value,int offs,int size){
- assert(ptr!=NULL);
- *value=ptr[offs] & 0xff;
- for (int i=1;i<size;i++)
- *value= *value | ( ( ptr[offs+i] & 0xff ) << (8 *i));
- return *value;
- };
-
-
- int bo_state(int value=-1){
- return (value==-1?backoff_state:backoff_state=value);
- };
-
-
- int nodesize(LMT_TYPE ndt){
- switch (ndt){
- case INTERNAL:
- return LMTCODESIZE + PROBSIZE + PROBSIZE + BOUNDSIZE;
- case QINTERNAL:
- return LMTCODESIZE + QPROBSIZE + QPROBSIZE + BOUNDSIZE;
- case QLEAF:
- return LMTCODESIZE + QPROBSIZE;
- case LEAF:
- return LMTCODESIZE + PROBSIZE;
- default:
- assert(0);
- return 0;
- }
- }
-
- inline int word(node nd,int value=-1)
- {
- int offset=0;
-
- if (value==-1)
- getmem(nd,&value,offset,LMTCODESIZE);
- else
- putmem(nd,value,offset,LMTCODESIZE);
-
- return value;
- };
-
- inline int prob(node nd,LMT_TYPE ndt, int value=-1)
- {
- int offs=LMTCODESIZE;
- int size=(ndt==QINTERNAL || ndt==QLEAF?QPROBSIZE:PROBSIZE);
-
- if (value==-1)
- getmem(nd,&value,offs,size);
- else
- putmem(nd,value,offs,size);
-
- return value;
- };
-
-
- inline int bow(node nd,LMT_TYPE ndt, int value=-1)
- {
- assert(ndt==INTERNAL || ndt==QINTERNAL);
- int size=(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
- int offs=LMTCODESIZE+size;
-
- if (value==-1)
- getmem(nd,&value,offs,size);
- else
- putmem(nd,value,offs,size);
-
- return value;
- };
-
- inline int bound(node nd,LMT_TYPE ndt, int value=-1)
- {
- assert(ndt==INTERNAL || ndt==QINTERNAL);
- int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
-
- if (value==-1)
- getmem(nd,&value,offs,BOUNDSIZE);
- else
- putmem(nd,value,offs,BOUNDSIZE);
-
- return value;
- };
-
- void stat(int lev=0);
-
-};
-
-#endif
-
diff --git a/irstlm/src/mempool.cpp b/irstlm/src/mempool.cpp
deleted file mode 100644
index f7220b897..000000000
--- a/irstlm/src/mempool.cpp
+++ /dev/null
@@ -1,496 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-// An efficient memory pool manager
-// by M. Federico
-// Copyright Marcello Federico, ITC-irst, 1998
-
-using namespace std;
-
-#include <iostream>
-#include <cassert>
-#include "mempool.h"
-
-#ifdef TRACE_ENABLE
-#define TRACE_ERR(str) { std::cerr << str; }
-#else
-#define TRACE_ERR(str) { }
-#endif
-
-/*! The pool contains:
- - entries of size is
- - tables for bs entries
-*/
-
-
-mempool::mempool(int is, int bs){
-
- // item size must be multiple of memory alignment step (4 bytes)
- // example: is is=9 becomes i=12 (9 + 4 - 9 %4 )
-
- is=(is>(int)sizeof(char *)?is:0);
-
- is=is + sizeof(char *) - (is % sizeof(char *));
-
- item_size = is;
-
- block_size = bs;
-
- true_size = is * bs;
-
- block_list = new memnode;
-
- block_list->block = new char[true_size];
-
- memset(block_list->block,'0',true_size);
-
- block_list->next = 0;
-
- blocknum = 1;
-
- entries = 0;
-
- // build free list
-
- char *ptr = free_list = block_list->block;
-
- for (int i=0;i<block_size-1;i++) {
- *(char **)ptr= ptr + item_size;
- ptr+=item_size;
- }
- *(char **)ptr = NULL; //last item
-
-}
-
-
-char * mempool::allocate(){
-
- char *ptr;
-
- if (free_list==NULL)
- {
- memnode *new_block = new memnode;
-
- new_block->block = new char[true_size];
-
- //memset(new_block->block,'0',true_size);
-
- new_block->next = block_list;
-
- block_list=new_block; // update block list
-
- /* update free list */
-
- ptr = free_list = block_list->block;
-
- for (int i=0;i<block_size-1;i++) {
- *(char **)ptr = ptr + item_size;
- ptr = ptr + item_size;
- }
-
- *(char **)ptr=NULL;
-
- blocknum++;
- }
-
- assert(free_list);
-
- ptr = free_list;
-
- free_list=*(char **)ptr;
-
- *(char **)ptr=NULL; // reset the released item
-
- entries++;
-
- return ptr;
-
-}
-
-
-int mempool::free(char* addr){
-
- // do not check if it belongs to this pool !!
- /*
- memnode *list=block_list;
- while ((list != NULL) &&
- ((addr < list->block) ||
- (addr >= (list->block + true_size))))
- list=list->next;
-
- if ((list==NULL) || (((addr - list->block) % item_size)!=0))
- {
- //cerr << "mempool::free-> addr does not belong to this pool\n";
- return 0;
- }
- */
-
- *(char **)addr=free_list;
- free_list=addr;
-
- entries--;
-
- return 1;
-}
-
-
-mempool::~mempool()
-{
- memnode *ptr;
-
- while (block_list !=NULL){
- ptr=block_list->next;
- delete [] block_list->block;
- delete block_list;
- block_list=ptr;
- }
-
-}
-
-void mempool::map (ostream& co){
-
- co << "mempool memory map:\n";
- //percorri piu` volte la lista libera
-
- memnode *bl=block_list;
- char *fl=free_list;
-
- char* img=new char[block_size+1];
- img[block_size]='\0';
-
- while (bl !=NULL){
-
- memset(img,'#',block_size);
-
- fl=free_list;
- while (fl != NULL){
- if ((fl >= bl->block)
- &&
- (fl < bl->block + true_size))
- {
- img[(fl-bl->block)/item_size]='-';
- }
-
- fl=*(char **)fl;
- }
-
- co << img << "\n";
- bl=bl->next;
- }
- delete [] img;
-}
-
-void mempool::stat(){
-
- TRACE_ERR("mempool class statistics\n"
- << "entries " << entries
- << " blocks " << blocknum
- << " used memory " << (blocknum * true_size)/1024 << " Kb\n");
-}
-
-
-
-strstack::strstack(int bs){
-
- size=bs;
- list=new memnode;
-
- list->block=new char[size];
-
- list->next=0;
-
- memset(list->block,'\0',size);
- idx=0;
-
- waste=0;
- memory=size;
- entries=0;
- blocknum=1;
-
-}
-
-
-void strstack::stat(){
-
- TRACE_ERR("strstack class statistics\n"
- << "entries " << entries
- << " blocks " << blocknum
- << " used memory " << memory/1024 << " Kb\n");
-}
-
-
-char *strstack::push(char *s){
- int len=strlen(s);
-
- if ((len+1) >= size){
- cerr << "strstack::push string is too long\n";
- exit(1);
- };
-
- if ((idx+len+1) >= size){
- //append a new block
- //there must be space to
- //put the index after
- //the word
-
- waste+=size-idx;
- blocknum++;
- memory+=size;
-
- memnode* nd=new memnode;
- nd->block=new char[size];
- nd->next=list;
-
- list=nd;
-
- memset(list->block,'\0',size);
-
- idx=0;
-
- }
-
- // append in current block
-
- strcpy(&list->block[idx],s);
-
- idx+=len+1;
-
- entries++;
-
- return &list->block[idx-len-1];
-
-}
-
-
-char *strstack::pop(){
-
- if (list==0) return 0;
-
- if (idx==0){
-
- // free this block and go to next
-
- memnode *ptr=list->next;
-
- delete [] list->block;
- delete list;
-
- list=ptr;
-
- if (list==0)
- return 0;
- else
- idx=size-1;
- }
-
- //go back to first non \0
- while (idx>0)
- if (list->block[idx--]!='\0')
- break;
-
- //go back to first \0
- while (idx>0)
- if (list->block[idx--]=='\0')
- break;
-
- entries--;
-
- if (list->block[idx+1]=='\0')
- {
- idx+=2;
- memset(&list->block[idx],'\0',size-idx);
- return &list->block[idx];
- }
- else{
- idx=0;
- memset(&list->block[idx],'\0',size);
- return &list->block[0];
- }
-}
-
-
-char *strstack::top(){
-
- int tidx=idx;
- memnode *tlist=list;
-
- if (tlist==0) return 0;
-
- if (idx==0){
-
- tlist=tlist->next;
-
- if (tlist==0) return 0;
-
- tidx=size-1;
- }
-
- //go back to first non \0
- while (tidx>0)
- if (tlist->block[tidx--]!='\0')
- break;
-
- //aaa\0bbb\0\0\0\0
-
- //go back to first \0
- while (tidx>0)
- if (tlist->block[tidx--]=='\0')
- break;
-
- if (tlist->block[tidx+1]=='\0')
- {
- tidx+=2;
- return &tlist->block[tidx];
- }
- else{
- tidx=0;
- return &tlist->block[0];
- }
-
-}
-
-
-strstack::~strstack(){
- memnode *ptr;
- while (list !=NULL){
- ptr=list->next;
- delete [] list->block;
- delete list;
- list=ptr;
- }
-}
-
-
-storage::storage(int maxsize,int blocksize)
-{
- newmemory=0;
- newcalls=0;
- setsize=maxsize;
- poolsize=blocksize; //in bytes
- poolset=new mempool* [setsize+1];
- for (int i=0;i<=setsize;i++)
- poolset[i]=NULL;
-}
-
-
-storage::~storage(){
- for (int i=0;i<=setsize;i++)
- if (poolset[i])
- delete poolset[i];
- delete [] poolset;
-}
-
-char *storage::allocate(int size){
-
- if (size<=setsize){
- if (!poolset[size]){
- poolset[size]=new mempool(size,poolsize/size);
- }
- return poolset[size]->allocate();
- }
- else{
-
- newmemory+=size+8;
- newcalls++;
- char* p=(char *)calloc(sizeof(char),size);
- if (p==NULL){
- cerr << "storage::alloc insufficient memory\n";
- exit(1);
- }
- return p;
- }
-}
-
-char *storage::reallocate(char *oldptr,int oldsize,int newsize){
-
- char *newptr;
-
- assert(newsize>oldsize);
-
- if (oldsize<=setsize){
- if (newsize<=setsize){
- if (!poolset[newsize])
- poolset[newsize]=new mempool(newsize,poolsize/newsize);
- newptr=poolset[newsize]->allocate();
- memset((char*)newptr,0,newsize);
- }
- else
- newptr=(char *)calloc(sizeof(char),newsize);
-
- if (oldptr && oldsize){
- memcpy(newptr,oldptr,oldsize);
- poolset[oldsize]->free(oldptr);
- }
- }
- else{
- newptr=(char *)realloc(oldptr,newsize);
- if (newptr==oldptr)
- cerr << "r\b";
- else
- cerr << "a\b";
- }
- if (newptr==NULL){
- cerr << "storage::realloc insufficient memory\n";
- exit(1);
- }
-
- return newptr;
-}
-
-int storage::free(char *addr,int size){
-
- /*
- while(size<=setsize){
- if (poolset[size] && poolset[size]->free(addr))
- break;
- size++;
- }
- */
-
- if (size>setsize)
- return free(addr),1;
- else{
- poolset[size] && poolset[size]->free(addr);
- }
- return 1;
-}
-
-
-void storage::stat(){
- int used=0;
- int memory=sizeof(char *) * setsize;
- int waste=0;
-
- for (int i=0;i<=setsize;i++)
- if (poolset[i]){
- used++;
- memory+=poolset[i]->used();
- waste+=poolset[i]->wasted();
- }
-
- TRACE_ERR("storage class statistics\n"
- << "alloc entries " << newcalls
- << " used memory " << newmemory/1024 << "Kb\n"
- << "mpools " << setsize
- << " active " << used
- << " used memory " << memory/1024 << "Kb"
- << " wasted " << waste/1024 << "Kb\n");
-}
-
diff --git a/irstlm/src/mempool.h b/irstlm/src/mempool.h
deleted file mode 100644
index eafdcd110..000000000
--- a/irstlm/src/mempool.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-// An efficient memory manager
-// by M. Federico
-// Copyright Marcello Federico, ITC-irst, 1998
-
-#ifndef MF_MEMPOOL_H
-#define MF_MEMPOOL_H
-
-#ifndef NULL
-const int NULL=0;
-//#define NULL=0;
-#endif
-
-#include <iostream> // std::ostream
-
-//! Memory block
-/*! This can be used by:
-- mempool to store items of fixed size
-- strstack to store strings of variable size
-*/
-
-class memnode{
- friend class mempool; //!< grant access
- friend class strstack; //!< grant access
- char *block; //!< block of memory
- memnode *next; //!< next block ptr
-};
-
-
-//! Memory pool
-
-/*! A memory pool is composed of:
- - a linked list of block_num memory blocks
- - each block might contain up to block_size items
- - each item is made of exactly item_size bytes
-*/
-
-class mempool{
- int block_size; //!< number of entries per block
- int item_size; //!< number of bytes per entry
- int true_size; //!< number of bytes per block
- memnode* block_list; //!< list of blocks
- char* free_list; //!< free entry list
- int entries; //!< number of stored entries
- int blocknum; //!< number of allocated blocks
- public:
-
- //! Creates a memory pool
- mempool(int is, int bs);
-
- //! Destroys memory pool
- ~mempool();
-
- //! Prints a map of memory occupancy
- void map(std::ostream& co);
-
- //! Allocates a single memory entry
- char *allocate();
-
- //! Frees a single memory entry
- int free(char* addr);
-
- //! Prints statistics about this mempool
- void stat();
-
- //! Returns effectively used memory (bytes)
- /*! includes 8 bytes required by each call of new */
-
- int used(){return blocknum * (true_size + 8);};
-
- //! Returns amount of wasted memory (bytes)
- int wasted(){return used()-(entries * item_size);};
-};
-
-//! A stack to store strings
-
-/*!
- The stack is composed of
- - a list of blocks memnode of fixed size
- - attribute blocknum tells the block on top
- - attribute idx tells position of the top string
-*/
-
-class strstack{
- memnode* list; //!< list of memory blocks
- int size; //!< size of each block
- int idx; //!< index of last stored string
- int waste; //!< current waste of memory
- int memory; //!< current use of memory
- int entries; //!< current number of stored strings
- int blocknum; //!< current number of used blocks
-
- public:
-
- strstack(int bs=1000);
-
- ~strstack();
-
- char *push(char *s);
-
- char *pop();
-
- char *top();
-
- void stat();
-
- int used(){return memory;};
-
- int wasted(){return waste;};
-
-};
-
-
-//! Manages multiple memory pools
-
-/*!
- This class permits to manage memory pools
- with items up to a specified size.
- - items within the allowed range are stored in memory pools
- - items larger than the limit are allocated with new
-*/
-
-
-class storage{
- mempool **poolset; //!< array of memory pools
- int setsize; //!< number of memory pools/maximum elem size
- int poolsize; //!< size of each block
- int newmemory; //!< stores amount of used memory
- int newcalls; //!< stores number of allocated blocks
- public:
-
- //! Creates storage
- storage(int maxsize,int blocksize);
-
- //! Destroys storage
- ~storage();
-
- /* names of below functions have been changed so as not to interfere with macros for malloc/realloc/etc -- EVH */
-
- //! Allocates memory
- char *allocate(int size);
-
- //! Realloc memory
- char *reallocate(char *oldptr,int oldsize,int newsize);
-
- //! Frees memory of an entry
- int free(char *addr,int size=0);
-
- //! Prints statistics about storage
- void stat();
-};
-
-#endif
diff --git a/irstlm/src/n_gram.cpp b/irstlm/src/n_gram.cpp
deleted file mode 100644
index bf12b7015..000000000
--- a/irstlm/src/n_gram.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <iomanip>
-#include <cassert>
-#include "mempool.h"
-#include "htable.h"
-#include "dictionary.h"
-#include "n_gram.h"
-#include "index.h"
-
-using namespace std;
-
-ngram::ngram(dictionary* d,int sz){
- dict=d;
- size=sz;
- succ=0;
- freq=0;
- info=0;
- pinfo=0;
- link=NULL;
- isym=-1;
- memset(word,0,sizeof(int)*MAX_NGRAM);
- memset(midx,0,sizeof(int)*MAX_NGRAM);
-}
-
-ngram::ngram(ngram& ng){
- size=ng.size;
- freq=ng.freq;
- succ=0;
- info=0;
- pinfo=0;
- link=NULL;
- isym=-1;
- dict=ng.dict;
- memcpy(word,ng.word,sizeof(int)*MAX_NGRAM);
- memcpy(midx,ng.word,sizeof(int)*MAX_NGRAM);
-
-}
-
-void ngram::trans (const ngram& ng){
- size=ng.size;
- freq=ng.freq;
- if (dict == ng.dict){
- info=ng.info;
- isym=ng.isym;
- memcpy(word,ng.word,sizeof(int)*MAX_NGRAM);
- memcpy(midx,ng.midx,sizeof(int)*MAX_NGRAM);
- }
- else{
- info=0;
- memset(midx,0,sizeof(int)*MAX_NGRAM);
- isym=-1;
- for (int i=1;i<=size;i++)
- word[MAX_NGRAM-i]=dict->encode(ng.dict->decode(*ng.wordp(i)));
- }
-}
-
-
-ifstream& operator>> ( ifstream& fi , ngram& ng){
- char w[MAX_WORD];
- memset(w,0,MAX_WORD);
- w[0]='\0';
-
- if (!(fi >> setw(MAX_WORD) >> w))
- return fi;
-
- if (strlen(w)==(MAX_WORD-1))
- cerr << "ngram: a too long word was read ("
- << w << ")\n";
-
- if (ng.dict->intsymb() &&
- (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){
-
- ng.isym=(long)index(ng.dict->intsymb(),w[0]) -
- (long)ng.dict->intsymb();
- ng.size=0;
- return fi;
- }
-
- int c=ng.dict->encode(w);
-
- if (c == -1 ){
- cerr << "ngram: " << w << " is OOV \n";
- exit(1);
- }
-
- memcpy(ng.word,ng.word+1,(MAX_NGRAM-1)*sizeof(int));
-
- ng.word[MAX_NGRAM-1]=(int)c;
- ng.freq=1;
-
- if (ng.size<MAX_NGRAM) ng.size++;
-
- return fi;
-
-}
-
-
-int ngram::pushw(char* w){
-
- assert(dict!=NULL);
-
- int c=dict->encode(w);
-
- if (c == -1 ){
- cerr << "ngram: " << w << " is OOV \n";
- exit(1);
- }
-
- pushc(c);
-
- return 1;
-
-}
-
-int ngram::pushc(int c){
-
- int buff[MAX_NGRAM-1];
- memcpy(buff,word+1,(MAX_NGRAM-1)*sizeof(int));
- memcpy(word,buff,(MAX_NGRAM-1)*sizeof(int));
-
- word[MAX_NGRAM-1]=(int)c;
- if (size<MAX_NGRAM) size++;
-
- return 1;
-
-}
-
-
-istream& operator>> ( istream& fi , ngram& ng){
- char w[MAX_WORD];
- memset(w,0,MAX_WORD);
- w[0]='\0';
-
- assert(ng.dict != NULL);
-
- if (!(fi >> setw(MAX_WORD) >> w))
- return fi;
-
- if (strlen(w)==(MAX_WORD-1))
- cerr << "ngram: a too long word was read ("
- << w << ")\n";
-
- if (ng.dict->intsymb() &&
- (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){
- ng.isym=(long)index(ng.dict->intsymb(),w[0])-(long)ng.dict->intsymb();
- ng.size=0;
- return fi;
- }
-
- ng.pushw(w);
-
- ng.freq=1;
-
- return fi;
-
-}
-
-ofstream& operator<< (ofstream& fo,ngram& ng){
-
- assert(ng.dict != NULL);
-
- for (int i=ng.size;i>0;i--)
- fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " ";
- //fo << "[size " << ng.size << " freq " << ng.freq << "]";
- fo << ng.freq;
- return fo;
-}
-
-ostream& operator<< (ostream& fo,ngram& ng){
-
- assert(ng.dict != NULL);
-
- for (int i=ng.size;i>0;i--)
- fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " ";
- //fo << "[size " << ng.size << " freq " << ng.freq << "]";
- fo << ng.freq;
-
- return fo;
-}
-
-/*
-main(int argc, char** argv){
- dictionary d(argv[1]);
- ifstream txt(argv[1]);
- ngram ng(&d);
-
- while (txt >> ng){
- cout << ng << "\n";
- }
-
- ngram ng2=ng;
- cerr << "copia l'ultimo =" << ng << "\n";
-}
-*/
-
diff --git a/irstlm/src/n_gram.h b/irstlm/src/n_gram.h
deleted file mode 100644
index 12a885be0..000000000
--- a/irstlm/src/n_gram.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-// n-gram tables
-// by M. Federico
-// Copyright Marcello Federico, ITC-irst, 1998
-
-#ifndef MF_NGRAM_H
-#define MF_NGRAM_H
-
-#include <fstream>
-#include "dictionary.h"
-
-#ifdef MYMAXNGRAM
-#define MAX_NGRAM MYMAXNGRAM
-#else
-#define MAX_NGRAM 20
-#endif
-
-class dictionary;
-
-//typedef int code;
-
-class ngram{
- int word[MAX_NGRAM]; //encoded ngram
- public:
- dictionary *dict; //dictionary
- char* link; // ngram-tree pointer
- int midx[MAX_NGRAM]; // ngram-tree scan pointer
- int lev; // ngram-tree level
- int size; // ngram size
- int freq; // ngram frequency
- int succ; // number of successors
-
- unsigned char info; // ngram-tree info flags
- unsigned char pinfo; // ngram-tree parent info flags
- int isym; // last interruption symbol
-
- ngram(dictionary* d,int sz=0);
- ngram(ngram& ng);
-
- int *wordp()// n-gram pointer
- {return wordp(size);};
- int *wordp(int k) // n-gram pointer
- {return size>=k?&word[MAX_NGRAM-k]:0;};
- const int *wordp() const // n-gram pointer
- {return wordp(size);};
- const int *wordp(int k) const // n-gram pointer
- {return size>=k?&word[MAX_NGRAM-k]:0;};
-
- int shift(){
- for (int i=(MAX_NGRAM-1);i>0;i--){
- word[i]=word[i-1];
- }
- size--;
- return 1;
- }
-
-
- int containsWord(char* s,int lev){
-
- int c=dict->encode(s);
- if (c == -1) return 0;
-
- assert(lev <= size);
- for (int i=0;i<lev;i++){
- if (*wordp(size-i)== c) return 1;
- }
- return 0;
- }
-
-
- void trans(const ngram& ng);
-
- friend std::ifstream& operator>> (std::ifstream& fi,ngram& ng);
- friend std::ofstream& operator<< (std::ofstream& fi,ngram& ng);
- friend std::istream& operator>> (std::istream& fi,ngram& ng);
- friend std::ostream& operator<< (std::ostream& fi,ngram& ng);
-
- inline int ckhisto(int sz){
-
- for (int i=sz;i>1;i--)
- if (*wordp(i)==dict->oovcode())
- return 0;
- return 1;
- }
-
- int pushc(int c);
- int pushw(char* w);
-
- //~ngram();
-
-
-
-};
-
-#endif
-
-
-
diff --git a/irstlm/src/ngramcache.cpp b/irstlm/src/ngramcache.cpp
deleted file mode 100644
index 271be4cf1..000000000
--- a/irstlm/src/ngramcache.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-#include <iostream>
-#include <fstream>
-#include <stdexcept>
-#include <cassert>
-#include "math.h"
-#include "mempool.h"
-#include "htable.h"
-
-#include "ngramcache.h"
-
-using namespace std;
-
-ngramcache::ngramcache(int n,int size,int maxentries){
- ngsize=n;
- infosize=size;
- maxn=maxentries;
- entries=0;
- ht=new htable(maxn * 2, ngsize * sizeof(int),INT,NULL); //lower load factor to reduce collisions
- mp=new mempool(ngsize * sizeof(int)+infosize,maxn/5);
- accesses=0;
- hits=0;
- };
-
-ngramcache::~ngramcache(){
- ht->stat();//ht->map();
- mp->stat();
- delete ht;delete mp;
-};
-
-
-void ngramcache::reset(){
- ht->stat();
- delete ht;delete mp;
- ht=new htable(maxn * 2, ngsize * sizeof(int),INT,NULL); //load factor 2
- mp=new mempool(ngsize * sizeof(int)+infosize,maxn/5);
- entries=0;
- }
-
-
-char* ngramcache::get(const int* ngp,char* info){
- char *found;
- // cout << "ngramcache::get() ";
- //for (int i=0;i<ngsize;i++) cout << ngp[i] << " "; cout <<"\n";
- accesses++;
- if (found=ht->search((char *)ngp,HT_FIND)){
- if (info) memcpy(info,found+ngsize*sizeof(int),infosize);
- hits++;
- };
- return found;
- };
-
-
-int ngramcache::add(const int* ngp,const char* info){
-
- char* entry=mp->allocate();
- memcpy(entry,(char*) ngp,sizeof(int) * ngsize);
- memcpy(entry + ngsize * sizeof(int),(char *)info,infosize);
- char *found=ht->search((char *)entry,HT_ENTER);
- assert(found == entry); //false if key is already insided
- entries++;
- return 1;
- }
-
-void ngramcache::stat(){
- cerr << "ngramcache stats: entries=" << entries << " acc=" << accesses << " hits=" << hits << "\n";
-};
diff --git a/irstlm/src/ngramcache.h b/irstlm/src/ngramcache.h
deleted file mode 100644
index 6f57cc926..000000000
--- a/irstlm/src/ngramcache.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#ifndef MF_NGRAMCACHE_H
-#define MF_NGRAMCACHE_H
-
-#include "mempool.h"
-#include "htable.h"
-
-class ngramcache{
-private:
- htable* ht;
- mempool *mp;
- int maxn;
- int ngsize;
- int infosize;
- int accesses;
- int hits;
- int entries;
-
-public:
-
- ngramcache(int n,int size,int maxentries);
- ~ngramcache();
- void reset();
- char* get(const int* ngp,char* info=NULL);
- int add(const int* ngp,const char* info);
- int isfull(){return (entries >= maxn);};
- void stat();
-};
-
-#endif
-
diff --git a/irstlm/src/quantize-lm.cpp b/irstlm/src/quantize-lm.cpp
deleted file mode 100644
index ee122b351..000000000
--- a/irstlm/src/quantize-lm.cpp
+++ /dev/null
@@ -1,388 +0,0 @@
-/******************************************************************************
-IrstLM: IRST Language Model Toolkit, compile LM
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-using namespace std;
-
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <string>
-#include <stdlib.h>
-
-#include "math.h"
-#include "lmtable.h"
-
-//----------------------------------------------------------------------
-// Special type and global variable for the BIN CLUSTERING algorithm
-//
-//
-//----------------------------------------------------------------------
-
-typedef struct{
- double pt;
- int idx;
- short code;
-}BinEntry;
-
-
-int cmpBinEntry(const void* a,const void* b){
- if (*(double *)a > *(double*)b)
- return 1;
- else if (*(double *)a < *(double*)b)
- return -1;
- else
- return 0;
-}
-
-BinEntry* bintable=NULL;
-
-//----------------------------------------------------------------------
-// Global entry points
-//----------------------------------------------------------------------
-
-int parseWords(char *sentence, char **words, int max);
-
-int ComputeCluster(int nc, double* cl,int N,double* Pts);
-
-//----------------------------------------------------------------------
-// Global parameters (some are set in getArgs())
-//----------------------------------------------------------------------
-
-int k = 256; // number of centers
-const int MAXLEV = 11; //maximum n-gram size
-
-//----------------------------------------------------------------------
-// Main program
-//----------------------------------------------------------------------
-
-void usage(const char *msg = 0) {
- if (msg) { std::cerr << msg << std::endl; }
- std::cerr << "Usage: quantize-lm input-file.lm [output-file.qlm]" << std::endl;
- if (!msg) std::cerr << std::endl
- << " quantize-lm reads a standard LM file in ARPA format and produces" << std::endl
- << " a version of it with quantized probabilities and back-off weights"<< std::endl
- << " that the IRST LMtoolkit can compile." << std::endl;
- }
-
-
-int main(int argc, const char **argv)
-{
-
- //Process Parameters
-
- if (argc < 2) { usage(); exit(1); }
- std::vector<std::string> files;
- for (int i=1; i < argc; i++) {
- std::string opt = argv[i];
- files.push_back(opt);
- }
- if (files.size() > 2) { usage("Too many arguments"); exit(1); }
- if (files.size() < 1) { usage("Please specify a LM file to read from"); exit(1); }
-
-
- std::string infile = files[0];
- if (files.size() == 1) {
- std::string::size_type p = infile.rfind('/');
- if (p != std::string::npos && ((p+1) < infile.size())) {
- files.push_back(infile.substr(p+1) + ".qlm");
- } else {
- files.push_back(infile + ".qlm");
- }
- }
-
-
- std::cout << "Reading " << infile << "..." << std::endl;
-
- std::fstream inp(infile.c_str());
- if (!inp.good()) {
- std::cerr << "Failed to open " << infile << "!\n";
- exit(1);
- }
-
- std::string outfile = files[1];
- std::ofstream out(outfile.c_str());
- std::cout << "Writing " << outfile << "..." << std::endl;
-
- int nPts = 0; // actual number of points
-
- // *** Read ARPA FILE **
-
- int numNgrams[MAXLEV + 1]; /* # n-grams for each order */
- int Order,MaxOrder;
- int n;
-
- float logprob,logbow, logten=log(10.0);
-
- double* dataPts=NULL;
- double* centersP=NULL; double* centersB=NULL;
-
- int* mapP=NULL; int* mapB=NULL;
-
- int centers=k;
- streampos iposition;
-
- out << "qARPA\n"; //print output header
-
- for (int i=1;i<=MAXLEV;i++) numNgrams[i]=0;
-
- char line[1024];
-
- while (inp.getline(line,1024)){
-
- bool backslash = (line[0] == '\\');
-
- if (sscanf(line, "ngram %d=%d", &Order, &n) == 2) {
- numNgrams[Order] = n;
- MaxOrder=Order;
- }
-
- if (backslash && sscanf(line, "\\%d-grams", &Order) == 1) {
-
- out << line << "\n";
- cerr << "-- Start processing of " << Order << "-grams\n";
- assert(Order <= MAXLEV);
-
- int N=numNgrams[Order];
- centers=k;
- if (Order==1) centers=256; // always use 256 centers
-
- char* words[MAXLEV+3];
- dataPts=new double[N]; // allocate data
-
- iposition=inp.tellg();
-
- for (nPts=0;nPts<N;nPts++){
- inp.getline(line,1024);
- int howmany = parseWords(line, words, Order + 3);
- assert(howmany == Order+2 || howmany == Order+1);
- sscanf(words[0],"%f",&logprob);
- dataPts[nPts]=exp(logprob * logten);
- }
-
- cerr << "quantizing " << N << " probabilities\n";
-
- centersP=new double[centers];
- mapP=new int[N];
-
- ComputeCluster(centers,centersP,N,dataPts);
-
-
- assert(bintable !=NULL);
- for (int p=0;p<N;p++){
- mapP[bintable[p].idx]=bintable[p].code;
- }
-
- if (Order<MaxOrder){
-
- inp.seekg(iposition);
-
- for (nPts=0;nPts<N;nPts++){
- inp.getline(line,1024);
- int howmany = parseWords(line, words, Order + 3);
- if (howmany==Order+2) //backoff is written
- sscanf(words[Order+1],"%f",&logbow);
- else
- logbow=0; // backoff is implicit
- dataPts[nPts]=exp(logbow * logten);
- }
-
- centersB=new double[centers];
- mapB=new int[N];
-
- cerr << "quantizing " << N << " backoff weights\n";
- ComputeCluster(centers,centersB,N,dataPts);
-
- assert(bintable !=NULL);
- for (int p=0;p<N;p++){
- mapB[bintable[p].idx]=bintable[p].code;
- }
-
- }
-
- inp.seekg(iposition);
-
- out << centers << "\n";
- for (nPts=0;nPts<centers;nPts++){
- out << log(centersP[nPts])/logten;
- if (Order<MaxOrder) out << " " << log(centersB[nPts])/logten;
- out << "\n";
- }
-
- for (nPts=0;nPts<numNgrams[Order];nPts++){
-
- inp.getline(line,1024);
-
- parseWords(line, words, Order + 3);
-
- out << mapP[nPts];
-
- for (int i=1;i<=Order;i++) out << "\t" << words[i];
-
- if (Order < MaxOrder) out << "\t" << mapB[nPts];
-
- out << "\n";
-
- }
-
- if (mapP){delete [] mapP;mapP=NULL;}
- if (mapB){delete [] mapB;mapB=NULL;}
-
- if (centersP){delete [] centersP; centersP=NULL;}
- if (centersB){delete [] centersB; centersB=NULL;}
-
- delete [] dataPts;
-
- continue;
-
-
- }
-
- out << line << "\n";
- }
-
- cerr << "---- done\n";
-
- out.flush();
- inp.flush();
-
- out.close();
- inp.close();
-
-}
-
-// Compute Clusters
-
-int ComputeCluster(int centers,double* ctrs,int N,double* dataPts){
-
-
- //cerr << "\nExecuting Clutering Algorithm: k=" << centers<< "\n";
-
- if (bintable) delete [] bintable;
-
- bintable=new BinEntry[N];
- for (int i=0;i<N;i++){
- bintable[i].pt=dataPts[i];
- bintable[i].idx=i;
- bintable[i].code=0;
- }
-
- //cout << "start sort \n";
- qsort(bintable,N,sizeof(BinEntry),cmpBinEntry);
-
- int different=1;
-
- for (int i=1;i<N;i++)
- if (bintable[i].pt!=bintable[i-1].pt)
- different++;
-
- int interval=different/centers;
- if (interval==0) interval++;
-
- int* population=new int[centers];
- int* species=new int[centers];
-
- //cout << " Different entries=" << different
- // << " Total Entries=" << N << " Bin Size=" << interval << "\n";
-
- for (int i=0;i<centers;i++){
- population[i]=species[i]=0;
- ctrs[i]=0.0;
- }
-
- // initial values
- bintable[0].code=0;
- population[0]=1;
- species[0]=1;
-
- int currcode=0;
- different=1;
-
- for (int i=1;i<N;i++){
-
- if ((bintable[i].pt!=bintable[i-1].pt)){
- different++;
- if ((different % interval) == 0)
- if ((currcode+1) < centers
- &&
- population[currcode]>0){
- currcode++;
- }
- }
-
- if (bintable[i].pt == bintable[i-1].pt)
- bintable[i].code=bintable[i-1].code;
- else{
- bintable[i].code=currcode;
- species[currcode]++;
- }
-
- population[bintable[i].code]++;
-
- assert(bintable[i].code < centers);
-
- ctrs[bintable[i].code]+=bintable[i].pt;
-
- }
-
-
- for (int i=0;i<centers;i++){
- if (population[i]>0)
- ctrs[i]/=(float)population[i];
- //cout << i << " population " << population[i] << " species " << species[i] <<"\n";
- }
-
- cout.flush();
-
- delete [] population;
-
- return 1;
-
-}
-
-//----------------------------------------------------------------------
-// Reading/Printing utilities
-// readPt - read a point from input stream into data storage
-// at position i. Returns false on error or EOF.
-// printPt - prints a points to output file
-//----------------------------------------------------------------------
-
-
-int parseWords(char *sentence, char **words, int max)
-{
- char *word;
- int i = 0;
-
- char *const wordSeparators = " \t\r\n";
-
- for (word = strtok(sentence, wordSeparators);
- i < max && word != 0;
- i++, word = strtok(0, wordSeparators))
- {
- words[i] = word;
- }
- if (i < max) {
- words[i] = 0;
- }
-
- return i;
-}
-
-
- \ No newline at end of file
diff --git a/misc/.project b/misc/.project
deleted file mode 100644
index 8da09013f..000000000
--- a/misc/.project
+++ /dev/null
@@ -1,11 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>misc</name>
- <comment></comment>
- <projects>
- </projects>
- <buildSpec>
- </buildSpec>
- <natures>
- </natures>
-</projectDescription>
diff --git a/misc/GenerateTuples.cpp b/misc/GenerateTuples.cpp
deleted file mode 100644
index 97b0bdd96..000000000
--- a/misc/GenerateTuples.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-
-////////////////////////////////////////////////////////////
-//
-// generate set of target candidates for confusion net
-//
-////////////////////////////////////////////////////////////
-
-
-
-#include <numeric>
-#include "Word.h"
-#include "Phrase.h"
-#include "ConfusionNet.h"
-#include "WordsRange.h"
-#include "PhraseDictionaryTree.h"
-#if 0
-// Generates all tuples from n indexes with ranges 0 to card[j]-1, respectively..
-// Input: number of indexes and ranges: ranges[0] ... ranges[num_idx-1]
-// Output: number of tuples and monodimensional array of tuples.
-// Reference: mixed-radix generation algorithm (D. E. Knuth, TAOCP v. 4.2)
-
-size_t GenerateTuples(unsigned num_idx,unsigned* ranges,unsigned *&tuples)
-{
- unsigned* single_tuple= new unsigned[num_idx+1];
- unsigned num_tuples=1;
-
- for (unsigned k=0;k<num_idx;++k)
- {
- num_tuples *= ranges[k];
- single_tuple[k]=0;
- }
-
- tuples=new unsigned[num_idx * num_tuples];
-
- // we need this additional element for the last iteration
- single_tuple[num_idx]=0;
- unsigned j=0;
- for (unsigned n=0;n<num_tuples;++n){
- memcpy((void *)((tuples + n * num_idx)),(void *)single_tuple,num_idx * sizeof(unsigned));
- j=0;
- while (single_tuple[j]==ranges[j]-1){single_tuple[j]=0; ++j;}
- ++single_tuple[j];
- }
- delete [] single_tuple;
- return num_tuples;
-}
-
-
-typedef PhraseDictionaryTree::PrefixPtr PPtr;
-typedef std::vector<PPtr> vPPtr;
-typedef std::vector<std::vector<Factor const*> > mPhrase;
-
-std::ostream& operator<<(std::ostream& out,const mPhrase& p) {
- for(size_t i=0;i<p.size();++i) {
- out<<i<<" - ";
- for(size_t j=0;j<p[i].size();++j)
- out<<p[i][j]->ToString()<<" ";
- out<<"|";
- }
-
- return out;
-}
-
-struct State {
- vPPtr ptrs;
- WordsRange range;
- float score;
-
- State() : range(0,0),score(0.0) {}
- State(size_t b,size_t e,const vPPtr& v,float sc=0.0) : ptrs(v),range(b,e),score(sc) {}
-
- size_t begin() const {return range.GetStartPos();}
- size_t end() const {return range.GetEndPos();}
- float GetScore() const {return score;}
-
-};
-
-std::ostream& operator<<(std::ostream& out,const State& s) {
- out<<"["<<s.ptrs.size()<<" ("<<s.begin()<<","<<s.end()<<") "<<s.GetScore()<<"]";
-
- return out;
-}
-
-typedef std::map<mPhrase,float> E2Costs;
-
-
-struct GCData {
- const std::vector<PhraseDictionaryTree const*>& pdicts;
- const std::vector<std::vector<float> >& weights;
- std::vector<FactorType> inF,outF;
- size_t distinctOutputFactors;
- vPPtr root;
- size_t totalTuples,distinctTuples;
-
-
- GCData(const std::vector<PhraseDictionaryTree const*>& a,
- const std::vector<std::vector<float> >& b)
- : pdicts(a),weights(b),totalTuples(0),distinctTuples(0) {
-
- assert(pdicts.size()==weights.size());
- std::set<FactorType> distinctOutFset;
- inF.resize(pdicts.size());
- outF.resize(pdicts.size());
- root.resize(pdicts.size());
- for(size_t i=0;i<pdicts.size();++i)
- {
- root[i]=pdicts[i]->GetRoot();
- inF[i]=pdicts[i]->GetInputFactorType();
- outF[i]=pdicts[i]->GetOutputFactorType();
- distinctOutFset.insert(pdicts[i]->GetOutputFactorType());
- }
- distinctOutputFactors=distinctOutFset.size();
- }
-
- FactorType OutFT(size_t i) const {return outF[i];}
- FactorType InFT(size_t i) const {return inF[i];}
- size_t DistinctOutFactors() const {return distinctOutputFactors;}
-
- const vPPtr& GetRoot() const {return root;}
-
-};
-
-typedef std::vector<Factor const*> vFactor;
-typedef std::vector<std::pair<float,vFactor> > TgtCandList;
-
-typedef std::vector<TgtCandList> OutputFactor2TgtCandList;
-typedef std::vector<OutputFactor2TgtCandList*> Len2Cands;
-
-void GeneratePerFactorTgtList(size_t factorType,PPtr pptr,GCData& data,Len2Cands& len2cands)
-{
- std::vector<FactorTgtCand> cands;
- data.pdicts[factorType]->GetTargetCandidates(pptr,cands);
-
- for(std::vector<FactorTgtCand>::const_iterator cand=cands.begin();cand!=cands.end();++cand) {
- assert(data.weights[factorType].size()==cand->second.size());
- float costs=std::inner_product(data.weights[factorType].begin(),
- data.weights[factorType].end(),
- cand->second.begin(),
- 0.0);
-
- size_t len=cand->first.size();
- if(len>=len2cands.size()) len2cands.resize(len+1,0);
- if(!len2cands[len]) len2cands[len]=new OutputFactor2TgtCandList(data.DistinctOutFactors());
- OutputFactor2TgtCandList &outf2tcandlist=*len2cands[len];
-
- outf2tcandlist[data.OutFT(factorType)].push_back(std::make_pair(costs,cand->first));
- }
-}
-
-void GenerateTupleTgtCands(OutputFactor2TgtCandList& tCand,E2Costs& e2costs,GCData& data)
-{
- // check if candidates are non-empty
- bool gotCands=1;
- for(size_t j=0;gotCands && j<tCand.size();++j)
- gotCands &= !tCand[j].empty();
-
- if(gotCands) {
- // enumerate tuples
- assert(data.DistinctOutFactors()==tCand.size());
- std::vector<unsigned> radix(data.DistinctOutFactors());
- for(size_t i=0;i<tCand.size();++i) radix[i]=tCand[i].size();
-
- unsigned *tuples=0;
- size_t numTuples=GenerateTuples(radix.size(),&radix[0],tuples);
-
- data.totalTuples+=numTuples;
-
- for(size_t i=0;i<numTuples;++i)
- {
- mPhrase e(radix.size());float costs=0.0;
- for(size_t j=0;j<radix.size();++j)
- {
- assert(tuples[radix.size()*i+j]<tCand[j].size());
- std::pair<float,vFactor> const& mycand=tCand[j][tuples[radix.size()*i+j]];
- e[j]=mycand.second;
- costs+=mycand.first;
- }
-#ifdef DEBUG
- bool mismatch=0;
- for(size_t j=1;!mismatch && j<e.size();++j)
- if(e[j].size()!=e[j-1].size()) mismatch=1;
- assert(mismatch==0);
-#endif
- std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(e,costs));
- if(p.second) ++data.distinctTuples;
- else {
- // entry known, take min of costs, alternative: sum probs
- if(costs<p.first->second) p.first->second=costs;
- }
- }
- delete [] tuples;
- }
-}
-
-void GenerateCandidates_(E2Costs& e2costs,const vPPtr& nextP,GCData& data)
-{
- Len2Cands len2cands;
- // generate candidates for each element of nextP:
- for(size_t factorType=0;factorType<nextP.size();++factorType)
- if(nextP[factorType])
- GeneratePerFactorTgtList(factorType,nextP[factorType],data,len2cands);
-
- // for each length: enumerate tuples, compute score, and insert in e2costs
- for(size_t len=0;len<len2cands.size();++len) if(len2cands[len])
- GenerateTupleTgtCands(*len2cands[len],e2costs,data);
-}
-
-void GenerateCandidates(const ConfusionNet& src,
- const std::vector<PhraseDictionaryTree const*>& pdicts,
- const std::vector<std::vector<float> >& weights,
- int verbose) {
- GCData data(pdicts,weights);
-
- std::vector<State> stack;
- for(size_t i=0;i<src.GetSize();++i) stack.push_back(State(i,i,data.GetRoot()));
-
- std::map<WordsRange,E2Costs> cov2E;
-
- // std::cerr<<"start while loop. initial stack size: "<<stack.size()<<"\n";
-
- while(!stack.empty())
- {
- State curr(stack.back());
- stack.pop_back();
-
- //std::cerr<<"processing state "<<curr<<" stack size: "<<stack.size()<<"\n";
-
- assert(curr.end()<src.GetSize());
- const ConfusionNet::Column &currCol=src[curr.end()];
- for(size_t colidx=0;colidx<currCol.size();++colidx)
- {
- const Word& w=currCol[colidx].first;
- vPPtr nextP(curr.ptrs);
- for(size_t j=0;j<nextP.size();++j)
- nextP[j]=pdicts[j]->Extend(nextP[j],
- w.GetFactor(data.InFT(j))->GetString());
-
- bool valid=1;
- for(size_t j=0;j<nextP.size();++j) if(!nextP[j]) {valid=0;break;}
-
- if(valid)
- {
- if(curr.end()+1<src.GetSize())
- stack.push_back(State(curr.begin(),curr.end()+1,nextP,
- curr.GetScore()+currCol[colidx].second));
-
- E2Costs &e2costs=cov2E[WordsRange(curr.begin(),curr.end()+1)];
- GenerateCandidates_(e2costs,nextP,data);
- }
- }
-
- // check if there are translations of one-word phrases ...
- //if(curr.begin()==curr.end() && tCand.empty()) {}
-
- } // end while(!stack.empty())
-
- if(verbose) {
- // print statistics for debugging purposes
- std::cerr<<"tuple stats: total: "<<data.totalTuples
- <<" distinct: "<<data.distinctTuples<<" ("
- <<(data.distinctTuples/(0.01*data.totalTuples))
- <<"%)\n";
- std::cerr<<"per coverage set:\n";
- for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
- i!=cov2E.end();++i) {
- std::cerr<<i->first<<" -- distinct cands: "
- <<i->second.size()<<"\n";
- }
- std::cerr<<"\n\n";
- }
-
- if(verbose>10) {
- std::cerr<<"full list:\n";
- for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
- i!=cov2E.end();++i) {
- std::cerr<<i->first<<" -- distinct cands: "
- <<i->second.size()<<"\n";
- for(E2Costs::const_iterator j=i->second.begin();j!=i->second.end();++j)
- std::cerr<<j->first<<" -- "<<j->second<<"\n";
- }
- }
-}
-
-#else
-
-void GenerateCandidates(const ConfusionNet&,
- const std::vector<PhraseDictionaryTree const*>&,
- const std::vector<std::vector<float> >&,
- int)
-{
- std::cerr<<"ERROR: GenerateCandidates is currently broken\n";
-}
-
-#endif
diff --git a/misc/GenerateTuples.h b/misc/GenerateTuples.h
deleted file mode 100644
index 362c1534f..000000000
--- a/misc/GenerateTuples.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// $Id$
-#ifndef GENERATETUPLES_H_
-#define GENERATETUPLES_H_
-#include "PhraseDictionaryTree.h"
-
-class ConfusionNet;
-
-void GenerateCandidates(const ConfusionNet& src,
- const std::vector<PhraseDictionaryTree const*>& pdicts,
- const std::vector<std::vector<float> >& weights,
- int verbose=0) ;
-#endif
diff --git a/misc/Makefile b/misc/Makefile
deleted file mode 100644
index ab038611e..000000000
--- a/misc/Makefile
+++ /dev/null
@@ -1,26 +0,0 @@
-
-
-BOOSTDIR=/home/ws06/cdyer/boost-stage
-SRIDIR=/home/ws06/cdyer/srilm/lib/i686
-CXX=g++
-CXXFLAGS=-W -Wall -O0 -g -ggdb -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES
-LDFLAGS=-static
-INCLUDES=-I../moses/src -I$(BOOSTDIR) -I$(BOOSTDIR)/include
-BOOSTLIBS=-L$(BOOSTDIR)/lib -L$(BOOSTDIR)/stage/lib -lboost_iostreams-gcc-mt -lboost_filesystem-gcc-mt -lboost_thread-gcc-mt -lz
-SRILIBS=-L$(SRIDIR) -loolm -ldstruct -lmisc
-
-default: processPhraseTable
-
-%.o: %.cpp
- $(CXX) $(CXXFLAGS) $(INCLUDES) $< -c -o $@
-
-
-
-MOSESLIB =../moses/src/libmoses.a
-
-processPhraseTable: processPhraseTable.o GenerateTuples.o $(MOSESLIB)
- $(CXX) $(LDFLAGS) $^ -o $@ $(SRILIBS) $(BOOSTLIBS)
-
-
-
-
diff --git a/misc/java-utils/.classpath b/misc/java-utils/.classpath
deleted file mode 100644
index 233be1d2c..000000000
--- a/misc/java-utils/.classpath
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<classpath>
- <classpathentry kind="src" path=""/>
- <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
- <classpathentry kind="output" path=""/>
-</classpath>
diff --git a/misc/java-utils/.cvsignore b/misc/java-utils/.cvsignore
deleted file mode 100644
index 6b468b62a..000000000
--- a/misc/java-utils/.cvsignore
+++ /dev/null
@@ -1 +0,0 @@
-*.class
diff --git a/misc/java-utils/.project b/misc/java-utils/.project
deleted file mode 100644
index 7f927621f..000000000
--- a/misc/java-utils/.project
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>java-utils</name>
- <comment></comment>
- <projects>
- </projects>
- <buildSpec>
- <buildCommand>
- <name>org.eclipse.jdt.core.javabuilder</name>
- <arguments>
- </arguments>
- </buildCommand>
- </buildSpec>
- <natures>
- <nature>org.eclipse.jdt.core.javanature</nature>
- </natures>
-</projectDescription>
diff --git a/misc/java-utils/CombineTags.java b/misc/java-utils/CombineTags.java
deleted file mode 100644
index 207791682..000000000
--- a/misc/java-utils/CombineTags.java
+++ /dev/null
@@ -1,89 +0,0 @@
-// $Id$
-
-import java.io.*;
-import java.util.*;
-
-// create sentences with all features combined from files with individual tags
-class CombineTags
-{
- public static void main(String[] args) throws Exception
- {
- System.err.println("Starting...");
-
- Vector vecInstream = new Vector();
- for (int i = 0 ; i < args.length ; i++)
- {
- InputStreamReader temp = new InputStreamReader(new FileInputStream(args[i]), "Latin1");
- BufferedReader inStream = new BufferedReader(temp);
- vecInstream.add(inStream);
- }
- OutputStreamWriter outStream = new OutputStreamWriter((OutputStream)System.out, "Latin1");
-
- new CombineTags(vecInstream, outStream);
-
- System.err.println("End...");
- }
-
- public CombineTags(Vector vecInstream , OutputStreamWriter outStream) throws Exception
- {
- BufferedReader inFile = (BufferedReader) vecInstream.get(0);
- String inLine;
- while ((inLine = inFile.readLine()) != null)
- {
- Vector phrases = new Vector();
-
- // do 1st stream
- Vector phrase = new Vector();
- StringTokenizer st = new StringTokenizer(inLine);
- while (st.hasMoreTokens())
- {
- String tag = st.nextToken();
- phrase.add(tag);
- }
- phrases.add(phrase);
-
- // read other stream
- for (int i = 1 ; i < vecInstream.size() ; i++)
- {
- BufferedReader otherFile = (BufferedReader) vecInstream.get(i);
- String otherLine = otherFile.readLine();
- StringTokenizer otherSt = new StringTokenizer(otherLine);
- Vector otherPhrase = new Vector();
-
- while (otherSt.hasMoreTokens())
- {
- String tag = otherSt.nextToken();
- otherPhrase.add(tag);
- }
- phrases.add(otherPhrase);
- }
-
- // combine
- phrase = (Vector) phrases.get(0);
-
- for (int pos = 0 ; pos < phrase.size() ; pos++)
- {
- String outLine = (String) phrase.get(pos) + "|";
-
- for (int stream = 1 ; stream < phrases.size() ; stream++)
- {
- Vector otherPhrase = (Vector) phrases.get(stream);
- String otherTag;
- if (otherPhrase.size() <= pos)
- otherTag = (String) otherPhrase.get(0);
- else
- otherTag = (String) otherPhrase.get(pos);
- outLine += otherTag + "|";
- }
- outLine = outLine.substring(0, outLine.length() - 1) + " ";
- outStream.write(outLine);
- }
- outStream.write("\n");
- }
- // close stream
- outStream.flush();
- outStream.close();
- outStream = null;
- }
-}
-
diff --git a/misc/java-utils/ProcessShallowParse.java b/misc/java-utils/ProcessShallowParse.java
deleted file mode 100644
index 77b789b20..000000000
--- a/misc/java-utils/ProcessShallowParse.java
+++ /dev/null
@@ -1,82 +0,0 @@
-// $Id$
-
-
-import java.io.*;
-import java.util.*;
-
-//input is the sentences with all features combined
-//output sentences combination of morphology, lopar tags and parsed tags
-// used to create generation table
-public class ProcessShallowParse
-{
- public static void main(String[] args) throws Exception
- {
- System.err.println("Starting...");
-
- InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
- , "Latin1");
- OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
- , "Latin1");
-
- new ProcessShallowParse2(inStream, outStream);
-
- System.err.println("End...");
- }
-}
-
-class ProcessShallowParse2
-{ // factored sentence
-
- public ProcessShallowParse2(Reader inStream, Writer outStream) throws Exception
- {
- BufferedReader inFile = new BufferedReader(inStream);
- BufferedWriter outFile = new BufferedWriter(outStream);
-
- // tokenise
- String inLine;
- int i = 1;
- while ((inLine = inFile.readLine()) != null)
- {
- StringTokenizer st = new StringTokenizer(inLine);
- String ret = "";
- while (st.hasMoreTokens())
- {
- String factoredWord = st.nextToken();
- ret += Output(factoredWord);
- }
- outFile.write(ret + "\n");
- i++;
- }
- outFile.flush();
- outFile.close();
- outFile = null;
- System.err.print("no of lines = " + i);
- }
-
- protected String Output(String factoredWord) throws Exception
- {
- StringTokenizer st = new StringTokenizer(factoredWord, "|");
-
- String surface = st.nextToken();
- String posNormal = st.nextToken();
- String morph = st.nextToken();
- String posImproved = st.nextToken();
- String ret = "";
-
- if (posImproved.equals("ART-SB")
- || posImproved.equals("NN-NK_NP-SB"))
- {
- ret = posImproved + "_" + morph + " ";
- }
- else if (posImproved.equals("???"))
- {
- ret = "??? ";
- }
- else
- {
- ret = surface + " ";
- }
-
- return ret;
- }
-}
diff --git a/misc/java-utils/ShrinkSentence.java b/misc/java-utils/ShrinkSentence.java
deleted file mode 100644
index e69fc6b1c..000000000
--- a/misc/java-utils/ShrinkSentence.java
+++ /dev/null
@@ -1,48 +0,0 @@
-// $Id$
-
-import java.io.*;
-import java.util.*;
-
-//used to create language model
-public class ShrinkSentence
-{
- public static void main(String[] args) throws Exception
- {
- System.err.println("Starting...");
-
- InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
- , "Latin1");
- OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
- , "Latin1");
-
- new ShrinkSentence(inStream, outStream);
-
- System.err.println("End...");
- }
-
- public ShrinkSentence(Reader inStream, Writer outStream) throws Exception
- {
- BufferedReader inFile = new BufferedReader(inStream);
- BufferedWriter outFile = new BufferedWriter(outStream);
-
- // tokenise
- String inLine;
- int i = 1;
- while ((inLine = inFile.readLine()) != null)
- {
- StringTokenizer st = new StringTokenizer(inLine);
- while (st.hasMoreTokens())
- {
- String word = st.nextToken();
- if (!word.equals("???"))
- outFile.write(word + " ");
- }
- outFile.write("\n");
- i++;
- }
- outFile.flush();
- outFile.close();
- outFile = null;
- System.err.print("no of lines = " + i);
- }
-} \ No newline at end of file
diff --git a/misc/java-utils/TagHierarchy.java b/misc/java-utils/TagHierarchy.java
deleted file mode 100644
index cdec14948..000000000
--- a/misc/java-utils/TagHierarchy.java
+++ /dev/null
@@ -1,135 +0,0 @@
-// $Id$
-
-import java.io.*;
-import java.util.*;
-
-// create pos-tag sentences from LISP-like input tree.
-// NN-NK tag augmented with NP-SP if parent is NP-SB
-class TagHierarchy
-{
- public static void main(String[] args) throws Exception
- {
- System.err.println("Starting...");
-
- InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
- , "Latin1");
- OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
- , "Latin1");
-
- new TagHierarchy(inStream, outStream);
-
- System.err.println("End...");
- }
-
- public TagHierarchy(Reader inStream, OutputStreamWriter outStream) throws Exception
- {
- BufferedReader inFile = new BufferedReader(inStream);
- BufferedWriter outFile = new BufferedWriter(outStream);
-
- // tokenise
- String inLine;
- int nullLines = 0;
- while ((inLine = inFile.readLine()) != null)
- {
- if (inLine.equals("null"))
- {
- nullLines++;
- outFile.write("null\n");
- }
- else
- {
- OutputHierarchy2(inLine, outFile);
- }
- }
- outFile.flush();
- outFile.close();
- outFile = null;
- System.err.println(nullLines + " null lines\n");
- }
-
- // indent parsed tree to make it easier to look at
- public void OutputHierarchy(String inLine, BufferedWriter outFile) throws Exception
- {
- int level = 0;
- StringTokenizer st = new StringTokenizer(inLine);
- while (st.hasMoreTokens())
- {
- String parsed = st.nextToken();
- if (parsed.substring(0, 1).compareTo("(") == 0)
- { // start of new node
- outFile.write('\n');
- for (int currLevel = 0 ; currLevel < level ; currLevel++)
- {
- outFile.write(' ');
- }
- String tag = parsed.substring(1, parsed.length());
- outFile.write(tag);
- level++;
- }
- else
- { // closing nodes
- int firstBracket = parsed.indexOf(')');
- int noBracket = parsed.length() - firstBracket;
- String tag = parsed.substring(0, firstBracket);
- outFile.write(" == " + tag);
- level -= noBracket;
- }
- }
- outFile.write('\n');
- }
-
- public void OutputHierarchy2(String inLine, BufferedWriter outFile) throws Exception
- {
- int level = 0;
- Stack prevTags = new Stack();
-
- StringTokenizer st = new StringTokenizer(inLine);
-
- while (st.hasMoreTokens())
- {
- String parsed = st.nextToken();
- if (parsed.substring(0, 1).compareTo("(") == 0)
- { // start of new node
- String tag = parsed.substring(1, parsed.length());
- prevTags.push(tag);
- level++;
- }
- else
- { // closing nodes
-
- String parentTag = (String) prevTags.get(prevTags.size() - 2)
- , currTag = (String) prevTags.get(prevTags.size() - 1);
- if (currTag.equals("NN-NK") && parentTag.equals("NP-SB"))
- currTag += "_" + parentTag;
-
- int firstBracket = parsed.indexOf(')');
- int noBracket = parsed.length() - firstBracket;
- String word = parsed.substring(0, firstBracket);
-
- if (currTag.equals("ART-SB")
- || currTag.equals("NN-NK_NP-SB")
- || currTag.equals("VAFIN-HD")
- || currTag.equals("VVFIN-HD")
- || currTag.equals("VMFIN-HD")
- || currTag.equals("PPER-SB")
- || currTag.equals("PRELS-SB")
- || currTag.equals("PDS-SB")
- || currTag.equals("PPER-PH")
- || currTag.equals("PPER-EP")
- )
- outFile.write(currTag + " ");
- else
- outFile.write("??? ");
-
- level -= noBracket;
-
- // pop the rest
- for (int i = 0 ; i < noBracket ; ++i)
- {
- prevTags.pop();
- }
- }
- }
- outFile.write('\n');
- }
-}
diff --git a/misc/misc.vcproj b/misc/misc.vcproj
deleted file mode 100644
index 107af47f8..000000000
--- a/misc/misc.vcproj
+++ /dev/null
@@ -1,174 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
- ProjectType="Visual C++"
- Version="8.00"
- Name="misc"
- ProjectGUID="{7FC3D2AC-0C9B-4F85-A679-34D0ABC0495E}"
- >
- <Platforms>
- <Platform
- Name="Win32"
- />
- </Platforms>
- <ToolFiles>
- </ToolFiles>
- <Configurations>
- <Configuration
- Name="Debug|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="1"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Release|Win32"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="1"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- </Configurations>
- <References>
- </References>
- <Files>
- <Filter
- Name="Source Files"
- Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
- UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
- >
- <File
- RelativePath=".\GenerateTuples.cpp"
- >
- </File>
- <File
- RelativePath=".\processPhraseTable.cpp"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h;hpp;hxx;hm;inl;inc;xsd"
- UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
- >
- <File
- RelativePath=".\GenerateTuples.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
- UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
- >
- </Filter>
- </Files>
- <Globals>
- </Globals>
-</VisualStudioProject>
diff --git a/misc/processPhraseTable.cpp b/misc/processPhraseTable.cpp
deleted file mode 100644
index 0dfc51960..000000000
--- a/misc/processPhraseTable.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-#include <iostream>
-//#include <fstream>
-#include <sstream>
-#include <vector>
-#include <string>
-#include <iterator>
-#include <functional>
-#include <sys/stat.h>
-#include "TypeDef.h"
-#include "PhraseDictionaryTree.h"
-#include "ConfusionNet.h"
-#include "FactorCollection.h"
-#include "Phrase.h"
-#include "InputFileStream.h"
-#include "Timer.h"
-
-Timer timer;
-
-template<typename T>
-std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
-{
- out<<x.size()<<" ";
- typename std::vector<T>::const_iterator iend=x.end();
- for(typename std::vector<T>::const_iterator i=x.begin();i!=iend;++i)
- out<<*i<<' ';
- return out;
-}
-#if 0
-FactorType getFactorType(int i) {
- switch(i) {
- case 0: return Surface;
- case 1: return POS;
- case 2: return Stem;
- case 3: return Morphology;
- }
- return Surface;
-}
-#endif
-
-FactorCollection factorCollection;
-
-inline bool existsFile(const char* filename) {
- struct stat mystat;
- return (stat(filename,&mystat)==0);
-}
-inline bool existsFile(const std::string& filename) {
- return existsFile(filename.c_str());
-}
-
-int main(int argc,char **argv) {
- std::string fto;size_t noScoreComponent=5;int cn=0;
- std::vector<std::pair<std::string,std::pair<char*,char*> > > ftts;
- int verb=0;
- for(int i=1;i<argc;++i) {
- std::string s(argv[i]);
- if(s=="-ttable") {
- std::pair<char*,char*> p;
- p.first=argv[++i];
- p.second=argv[++i];
- ftts.push_back(std::make_pair(std::string(argv[++i]),p));
- }
- else if(s=="-nscores") noScoreComponent=atoi(argv[++i]);
- else if(s=="-out") fto=std::string(argv[++i]);
- else if(s=="-cn") cn=1;
- else if(s=="-irst") cn=2;
- else if(s=="-v") verb=atoi(argv[++i]);
- else if(s=="-h")
- {
- std::cerr<<"usage "<<argv[0]<<" :\n\n"
- "options:\n"
- "\t-ttable int int string -- translation table file, use '-' for stdin\n"
- "\t-out string -- output file name prefix for binary ttable\n"
- "\t-nscores int -- number of scores in ttable\n"
- "\nfunctions:\n"
- "\t - convert ascii ttable in binary format\n"
- "\t - if ttable is not read from stdin:\n"
- "\t treat each line as source phrase an print tgt candidates\n"
- "\n";
- return 1;
- }
- else
- {
- std::cerr<<"ERROR: unknown option '"<<s<<"'\n";
- return 1;
- }
- }
-
- if(ftts.size()) {
- std::cerr<<"processing ptree for\n";
-
- if(ftts.size()==1 && ftts[0].first=="-") {
- PhraseDictionaryTree pdt(noScoreComponent);
- pdt.Create(std::cin,fto);}
- else
- {
-#if 0
- std::vector<PhraseDictionaryTree const*> pdicts;
- std::vector<FactorType> factorOrder;
- for(size_t i=0;i<ftts.size();++i) {
-
- PhraseDictionaryTree *pdtptr=new PhraseDictionaryTree(noScoreComponent,
- &factorCollection,
- getFactorType(atoi(ftts[i].second.first)),
- getFactorType(atoi(ftts[i].second.second))
- );
- factorOrder.push_back(pdtptr->GetInputFactorType());
- PhraseDictionaryTree &pdt=*pdtptr;
- pdicts.push_back(pdtptr);
-
- std::string facStr="."+std::string(ftts[i].second.first)+"-"+std::string(ftts[i].second.second);
- std::string prefix=ftts[i].first+facStr;
- if(!existsFile(prefix+".binphr.idx")) {
- std::cerr<<"bin ttable does not exist -> create it\n";
- InputFileStream in(prefix);
- pdt.Create(in,prefix);
- }
- std::cerr<<"reading bin ttable\n";
- pdt.Read(prefix);
- }
-
- std::cerr<<"processing stdin\n";
- if(!cn) {
- std::string line;
- while(getline(std::cin,line)) {
- std::istringstream is(line);
-#if 0
- std::vector<std::string> f;
- std::copy(std::istream_iterator<std::string>(is),
- std::istream_iterator<std::string>(),
- std::back_inserter(f));
-#endif
- std::cerr<<"got source phrase '"<<line<<"'\n";
-
- Phrase F(Input);
- F.CreateFromString(factorOrder,line,factorCollection);
-
- for(size_t k=0;k<pdicts.size();++k) {
- PhraseDictionaryTree const& pdt=*pdicts[k];
-
- std::vector<std::string> f(F.GetSize());
- for(size_t i=0;i<F.GetSize();++i)
- f[i]=F.GetFactor(i,pdt.GetInputFactorType())->ToString();
-
- std::stringstream iostA,iostB;
- std::cerr<<"full phrase processing "<<f<<"\n";
- pdt.PrintTargetCandidates(f,iostA);
-
- std::cerr<<"processing with prefix ptr\n";
- PhraseDictionaryTree::PrefixPtr p(pdt.GetRoot());
-
- for(size_t i=0;i<f.size() && p;++i) {
- std::cerr<<"pre "<<i<<" "<<(p?"1":"0")<<"\n";
- p=pdt.Extend(p,f[i]);
- std::cerr<<"post "<<i<<" "<<(p?"1":"0")<<"\n";
- }
- if(p) {
- std::cerr<<"retrieving candidates from prefix ptr\n";
- pdt.PrintTargetCandidates(p,iostB);}
- else {
- std::cerr<<"final ptr is invalid\n";
- iostB<<"there are 0 target candidates\n";
- }
- if(iostA.str() != iostB.str())
- std::cerr<<"ERROR: translation candidates mismatch '"<<iostA.str()<<"' and for prefix pointer: '"<<iostB.str()<<"'\n";
-
- std::cerr<<"translation candidates:\n"<<iostA.str()<<"\n";
- pdt.FreeMemory();
-
- }
-
- }
- }
- else {
- // process confusion net input
- ConfusionNet net(&factorCollection);
- std::vector<std::vector<float> > weights;
- for(size_t i=0;i<pdicts.size();++i)
- weights.push_back(std::vector<float>(noScoreComponent,1/(1.0*noScoreComponent)));
-
- while(net.ReadF(std::cin,factorOrder,cn-1)) {
- net.Print(std::cerr);
- GenerateCandidates(net,pdicts,weights,verb);
- }
-
- }
-#else
- std::cerr<<"ERROR: these functions are currently broken...\n";
- exit(1);
-#endif
- }
- }
-
-}
diff --git a/moses-cmd/.cdtbuild b/moses-cmd/.cdtbuild
deleted file mode 100644
index 50d252597..000000000
--- a/moses-cmd/.cdtbuild
+++ /dev/null
@@ -1,140 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?fileVersion 3.0.0?>
-
-<ManagedProjectBuildInfo>
-<project id="moses-cmd.cdt.managedbuild.target.gnu.exe.880461730" name="Executable (Gnu)" projectType="cdt.managedbuild.target.gnu.exe">
-<configuration artifactName="moses-cmd" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.exe.debug.862821065" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
-<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.442539703" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
-<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.626133972" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.381079048" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
-<option id="gnu.cpp.compiler.option.include.paths.1241301640" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
-<listOptionValue builtIn="false" value="${ProjDirPath}/../moses/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../irstlm/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../boost/include/boost-1_33_1"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/misc/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/dstruct/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/include"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/lm/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysql/include"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib"/>
-</option>
-<option id="gnu.cpp.compiler.option.preprocessor.def.1028025969" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
-<listOptionValue builtIn="false" value="LM_SRI"/>
-<listOptionValue builtIn="false" value="LM_IRST"/>
-<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
-</option>
-<option id="gnu.cpp.compiler.option.debugging.gprof.514527174" superClass="gnu.cpp.compiler.option.debugging.gprof" value="false" valueType="boolean"/>
-</tool>
-<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1905266391" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.888044188" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
-<option id="gnu.cpp.link.option.libs.427302130" superClass="gnu.cpp.link.option.libs" valueType="libs">
-<listOptionValue builtIn="false" value="pthread"/>
-<listOptionValue builtIn="false" value="mysqlclient"/>
-<listOptionValue builtIn="false" value="z"/>
-<listOptionValue builtIn="false" value="lattice"/>
-<listOptionValue builtIn="false" value="misc"/>
-<listOptionValue builtIn="false" value="dstruct"/>
-<listOptionValue builtIn="false" value="oolm"/>
-</option>
-<option id="gnu.cpp.link.option.paths.1718276622" superClass="gnu.cpp.link.option.paths" valueType="stringList">
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysql/lib"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/lib/i686"/>
-</option>
-<option id="gnu.cpp.link.option.userobjs.551693347" superClass="gnu.cpp.link.option.userobjs" valueType="userObjs">
-<listOptionValue builtIn="false" value="${ProjDirPath}/../moses/${ConfigName}/libmoses.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../irstlm/${ConfigName}/libirstlm.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../boost/lib/libboost_filesystem-gcc.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../boost/lib/libboost_iostreams-gcc.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../boost/lib/libboost_thread-gcc-mt.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/manip.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/myset.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/qparms.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/sql_string.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/string_util.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/type_info.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/vallist.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/coldata.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/datetime.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/field_names.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/field_types.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/result.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/query.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/connection.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/row.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/fields.o"/>
-</option>
-<option id="gnu.cpp.link.option.flags.1995182231" superClass="gnu.cpp.link.option.flags" value="" valueType="string"/>
-</tool>
-<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1540394737" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug"/>
-<macros/>
-</toolChain>
-</configuration>
-<configuration artifactName="moses-cmd" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.exe.release.1126249995" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
-<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1691856622" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
-<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1171408650" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1960479367" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
-<option id="gnu.cpp.compiler.option.include.paths.1986529351" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
-<listOptionValue builtIn="false" value="${ProjDirPath}/../moses/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../irstlm/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../boost/include/boost-1_33_1"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/misc/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/dstruct/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/include"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/lm/src"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysql/include"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib"/>
-</option>
-<option id="gnu.cpp.compiler.option.preprocessor.def.542441515" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
-<listOptionValue builtIn="false" value="LM_SRI"/>
-<listOptionValue builtIn="false" value="LM_IRST"/>
-<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
-</option>
-<option id="gnu.cpp.compiler.option.debugging.gprof.24360799" superClass="gnu.cpp.compiler.option.debugging.gprof" value="false" valueType="boolean"/>
-</tool>
-<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.285054015" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.1153317521" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
-<option id="gnu.cpp.link.option.libs.1100640875" superClass="gnu.cpp.link.option.libs" valueType="libs">
-<listOptionValue builtIn="false" value="pthread"/>
-<listOptionValue builtIn="false" value="mysqlclient"/>
-<listOptionValue builtIn="false" value="z"/>
-<listOptionValue builtIn="false" value="lattice"/>
-<listOptionValue builtIn="false" value="misc"/>
-<listOptionValue builtIn="false" value="dstruct"/>
-<listOptionValue builtIn="false" value="oolm"/>
-</option>
-<option id="gnu.cpp.link.option.paths.724264639" superClass="gnu.cpp.link.option.paths" valueType="stringList">
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysql/lib"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../srilm/lib/i686"/>
-</option>
-<option id="gnu.cpp.link.option.userobjs.1365891611" superClass="gnu.cpp.link.option.userobjs" valueType="userObjs">
-<listOptionValue builtIn="false" value="${ProjDirPath}/../moses/${ConfigName}/libmoses.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../irstlm/${ConfigName}/libirstlm.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../boost/lib/libboost_filesystem-gcc.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../boost/lib/libboost_iostreams-gcc.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../boost/lib/libboost_thread-gcc-mt.a"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/manip.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/myset.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/qparms.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/sql_string.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/string_util.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/type_info.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/vallist.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/coldata.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/datetime.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/field_names.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/field_types.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/result.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/query.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/connection.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/row.o"/>
-<listOptionValue builtIn="false" value="${ProjDirPath}/../mysqlpp/lib/fields.o"/>
-</option>
-<option id="gnu.cpp.link.option.flags.213483667" superClass="gnu.cpp.link.option.flags" value="" valueType="string"/>
-</tool>
-<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1402140367" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release"/>
-<macros/>
-</toolChain>
-</configuration>
-<macros/>
-</project>
-</ManagedProjectBuildInfo>
diff --git a/moses-cmd/.cdtproject b/moses-cmd/.cdtproject
deleted file mode 100644
index 9d4253d9e..000000000
--- a/moses-cmd/.cdtproject
+++ /dev/null
@@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?eclipse-cdt version="2.0"?>
-
-<cdtproject id="org.eclipse.cdt.managedbuilder.core.managedMake">
-<extension id="org.eclipse.cdt.managedbuilder.core.ManagedBuildManager" point="org.eclipse.cdt.core.ScannerInfoProvider"/>
-<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-<extension id="org.eclipse.cdt.core.nullindexer" point="org.eclipse.cdt.core.CIndexer"/>
-<data>
-<item id="org.eclipse.cdt.core.pathentry">
-<pathentry kind="src" path=""/>
-<pathentry kind="out" path=""/>
-<pathentry kind="con" path="org.eclipse.cdt.managedbuilder.MANAGED_CONTAINER"/>
-</item>
-</data>
-</cdtproject>
diff --git a/moses-cmd/.cvsignore b/moses-cmd/.cvsignore
deleted file mode 100644
index fec7e99aa..000000000
--- a/moses-cmd/.cvsignore
+++ /dev/null
@@ -1,11 +0,0 @@
-Makefile
-stamp-h1
-config.status
-.cvsignore
-config.h
-autom4te.cache
-.cdtproject
-gmon.out
-Debug*
-Release*
-out.dat
diff --git a/moses-cmd/.project b/moses-cmd/.project
deleted file mode 100644
index d53cf546e..000000000
--- a/moses-cmd/.project
+++ /dev/null
@@ -1,21 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>moses-cmd</name>
- <comment></comment>
- <projects>
- <project>irstlm</project>
- <project>moses</project>
- </projects>
- <buildSpec>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
- <arguments>
- </arguments>
- </buildCommand>
- </buildSpec>
- <natures>
- <nature>org.eclipse.cdt.core.cnature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
- <nature>org.eclipse.cdt.core.ccnature</nature>
- </natures>
-</projectDescription>
diff --git a/moses-cmd/.settings/org.eclipse.cdt.managedbuilder.core.prefs b/moses-cmd/.settings/org.eclipse.cdt.managedbuilder.core.prefs
deleted file mode 100644
index d34c24864..000000000
--- a/moses-cmd/.settings/org.eclipse.cdt.managedbuilder.core.prefs
+++ /dev/null
@@ -1,16 +0,0 @@
-#Tue Aug 08 14:27:23 EDT 2006
-=\=\=\=\=\=\=
-<<<<<<<=org.eclipse.cdt.managedbuilder.core.prefs
->>>>>>>=1.2
-eclipse.preferences.version=1
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.exe.debug.807464492=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.exe.debug.862821065=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.exe.release.1126249995=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.exe.release.797815336=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.exe.debug.807464492=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="LIBRARY_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.exe.debug.862821065=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="LIBRARY_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.exe.release.1126249995=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="LIBRARY_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.exe.release.797815336=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="LIBRARY_PATH" operation\="remove"/>\n</environment>\n
-environment/project=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
-environment/project/cdt.managedbuild.config.gnu.exe.debug.862821065=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
-environment/project/cdt.managedbuild.config.gnu.exe.release.1126249995=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
diff --git a/moses-cmd/Makefile.am b/moses-cmd/Makefile.am
deleted file mode 100644
index 9b16c41cc..000000000
--- a/moses-cmd/Makefile.am
+++ /dev/null
@@ -1,5 +0,0 @@
-# not a GNU package. You can remove this line, if
-# have all needed files, that a GNU package needs
-#AUTOMAKE_OPTIONS = foreign 1.4
-SUBDIRS = src
-
diff --git a/moses-cmd/Makefile.in b/moses-cmd/Makefile.in
deleted file mode 100644
index e6530e546..000000000
--- a/moses-cmd/Makefile.in
+++ /dev/null
@@ -1,574 +0,0 @@
-# Makefile.in generated by automake 1.9.2 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004 Free Software Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-srcdir = @srcdir@
-top_srcdir = @top_srcdir@
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-top_builddir = .
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-INSTALL = @INSTALL@
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-subdir = .
-DIST_COMMON = $(am__configure_deps) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in $(srcdir)/config.h.in \
- $(top_srcdir)/configure depcomp install-sh missing
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/configure.in
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
- configure.lineno configure.status.lineno
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = config.h
-CONFIG_CLEAN_FILES =
-SOURCES =
-DIST_SOURCES =
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-exec-recursive install-info-recursive \
- install-recursive installcheck-recursive installdirs-recursive \
- pdf-recursive ps-recursive uninstall-info-recursive \
- uninstall-recursive
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-distdir = $(PACKAGE)-$(VERSION)
-top_distdir = $(distdir)
-am__remove_distdir = \
- { test ! -d $(distdir) \
- || { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \
- && rm -fr $(distdir); }; }
-DIST_ARCHIVES = $(distdir).tar.gz
-GZIP_ENV = --best
-distuninstallcheck_listfiles = find . -type f -print
-distcleancheck_listfiles = find . -type f -print
-ACLOCAL = @ACLOCAL@
-AMDEP_FALSE = @AMDEP_FALSE@
-AMDEP_TRUE = @AMDEP_TRUE@
-AMTAR = @AMTAR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BUILD_MYSQL_SUPPORT_FALSE = @BUILD_MYSQL_SUPPORT_FALSE@
-BUILD_MYSQL_SUPPORT_TRUE = @BUILD_MYSQL_SUPPORT_TRUE@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXDEPMODE = @CXXDEPMODE@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-INTERNAL_LM_FALSE = @INTERNAL_LM_FALSE@
-INTERNAL_LM_TRUE = @INTERNAL_LM_TRUE@
-IRST_LM_FALSE = @IRST_LM_FALSE@
-IRST_LM_TRUE = @IRST_LM_TRUE@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LTLIBOBJS = @LTLIBOBJS@
-MAKEINFO = @MAKEINFO@
-MYSQLCLIENT_CPPFLAGS = @MYSQLCLIENT_CPPFLAGS@
-MYSQLCLIENT_LDFLAGS = @MYSQLCLIENT_LDFLAGS@
-MYSQLCLIENT_LIBS = @MYSQLCLIENT_LIBS@
-OBJEXT = @OBJEXT@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-RANLIB = @RANLIB@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SRI_LM_FALSE = @SRI_LM_FALSE@
-SRI_LM_TRUE = @SRI_LM_TRUE@
-STRIP = @STRIP@
-VERSION = @VERSION@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_RANLIB = @ac_ct_RANLIB@
-ac_ct_STRIP = @ac_ct_STRIP@
-am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
-am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build_alias = @build_alias@
-datadir = @datadir@
-exec_prefix = @exec_prefix@
-host_alias = @host_alias@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-
-# not a GNU package. You can remove this line, if
-# have all needed files, that a GNU package needs
-#AUTOMAKE_OPTIONS = foreign 1.4
-SUBDIRS = src
-all: config.h
- $(MAKE) $(AM_MAKEFLAGS) all-recursive
-
-.SUFFIXES:
-am--refresh:
- @:
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- echo ' cd $(srcdir) && $(AUTOMAKE) --foreign '; \
- cd $(srcdir) && $(AUTOMAKE) --foreign \
- && exit 0; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \
- cd $(top_srcdir) && \
- $(AUTOMAKE) --foreign Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- echo ' $(SHELL) ./config.status'; \
- $(SHELL) ./config.status;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- $(SHELL) ./config.status --recheck
-
-$(top_srcdir)/configure: $(am__configure_deps)
- cd $(srcdir) && $(AUTOCONF)
-$(ACLOCAL_M4): $(am__aclocal_m4_deps)
- cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
-
-config.h: stamp-h1
- @if test ! -f $@; then \
- rm -f stamp-h1; \
- $(MAKE) stamp-h1; \
- else :; fi
-
-stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
- @rm -f stamp-h1
- cd $(top_builddir) && $(SHELL) ./config.status config.h
-$(srcdir)/config.h.in: $(am__configure_deps)
- cd $(top_srcdir) && $(AUTOHEADER)
- rm -f stamp-h1
- touch $@
-
-distclean-hdr:
- -rm -f config.h stamp-h1
-uninstall-info-am:
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @set fnord $$MAKEFLAGS; amf=$$2; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-mostlyclean-recursive clean-recursive distclean-recursive \
-maintainer-clean-recursive:
- @set fnord $$MAKEFLAGS; amf=$$2; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) ' { files[$$0] = 1; } \
- END { for (i in files) print i; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) ' { files[$$0] = 1; } \
- END { for (i in files) print i; }'`; \
- if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$tags $$unique; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) ' { files[$$0] = 1; } \
- END { for (i in files) print i; }'`; \
- test -z "$(CTAGS_ARGS)$$tags$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$tags $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && cd $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) $$here
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- $(am__remove_distdir)
- mkdir $(distdir)
- @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
- list='$(DISTFILES)'; for file in $$list; do \
- case $$file in \
- $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
- $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
- esac; \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test "$$dir" != "$$file" && test "$$dir" != "."; then \
- dir="/$$dir"; \
- $(mkdir_p) "$(distdir)$$dir"; \
- else \
- dir=''; \
- fi; \
- if test -d $$d/$$file; then \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
- fi; \
- cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
- else \
- test -f $(distdir)/$$file \
- || cp -p $$d/$$file $(distdir)/$$file \
- || exit 1; \
- fi; \
- done
- list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test -d "$(distdir)/$$subdir" \
- || $(mkdir_p) "$(distdir)/$$subdir" \
- || exit 1; \
- distdir=`$(am__cd) $(distdir) && pwd`; \
- top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
- (cd $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$top_distdir" \
- distdir="$$distdir/$$subdir" \
- distdir) \
- || exit 1; \
- fi; \
- done
- -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
- ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
- ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
- ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \
- || chmod -R a+r $(distdir)
-dist-gzip: distdir
- tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
- $(am__remove_distdir)
-
-dist-bzip2: distdir
- tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
- $(am__remove_distdir)
-
-dist-tarZ: distdir
- tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
- $(am__remove_distdir)
-
-dist-shar: distdir
- shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
- $(am__remove_distdir)
-
-dist-zip: distdir
- -rm -f $(distdir).zip
- zip -rq $(distdir).zip $(distdir)
- $(am__remove_distdir)
-
-dist dist-all: distdir
- tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
- $(am__remove_distdir)
-
-# This target untars the dist file and tries a VPATH configuration. Then
-# it guarantees that the distribution is self-contained by making another
-# tarfile.
-distcheck: dist
- case '$(DIST_ARCHIVES)' in \
- *.tar.gz*) \
- GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\
- *.tar.bz2*) \
- bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\
- *.tar.Z*) \
- uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
- *.shar.gz*) \
- GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\
- *.zip*) \
- unzip $(distdir).zip ;;\
- esac
- chmod -R a-w $(distdir); chmod a+w $(distdir)
- mkdir $(distdir)/_build
- mkdir $(distdir)/_inst
- chmod a-w $(distdir)
- dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
- && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
- && cd $(distdir)/_build \
- && ../configure --srcdir=.. --prefix="$$dc_install_base" \
- $(DISTCHECK_CONFIGURE_FLAGS) \
- && $(MAKE) $(AM_MAKEFLAGS) \
- && $(MAKE) $(AM_MAKEFLAGS) dvi \
- && $(MAKE) $(AM_MAKEFLAGS) check \
- && $(MAKE) $(AM_MAKEFLAGS) install \
- && $(MAKE) $(AM_MAKEFLAGS) installcheck \
- && $(MAKE) $(AM_MAKEFLAGS) uninstall \
- && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
- distuninstallcheck \
- && chmod -R a-w "$$dc_install_base" \
- && ({ \
- (cd ../.. && umask 077 && mkdir "$$dc_destdir") \
- && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
- && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
- && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
- distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
- } || { rm -rf "$$dc_destdir"; exit 1; }) \
- && rm -rf "$$dc_destdir" \
- && $(MAKE) $(AM_MAKEFLAGS) dist \
- && rm -rf $(DIST_ARCHIVES) \
- && $(MAKE) $(AM_MAKEFLAGS) distcleancheck
- $(am__remove_distdir)
- @(echo "$(distdir) archives ready for distribution: "; \
- list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
- sed -e '1{h;s/./=/g;p;x;}' -e '$${p;x;}'
-distuninstallcheck:
- @cd $(distuninstallcheck_dir) \
- && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
- || { echo "ERROR: files left after uninstall:" ; \
- if test -n "$(DESTDIR)"; then \
- echo " (check DESTDIR support)"; \
- fi ; \
- $(distuninstallcheck_listfiles) ; \
- exit 1; } >&2
-distcleancheck: distclean
- @if test '$(srcdir)' = . ; then \
- echo "ERROR: distcleancheck can only run from a VPATH build" ; \
- exit 1 ; \
- fi
- @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
- || { echo "ERROR: files left in build directory after distclean:" ; \
- $(distcleancheck_listfiles) ; \
- exit 1; } >&2
-check-am: all-am
-check: check-recursive
-all-am: Makefile config.h
-installdirs: installdirs-recursive
-installdirs-am:
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-generic mostlyclean-am
-
-distclean: distclean-recursive
- -rm -f $(am__CONFIG_DISTCLEAN_FILES)
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-exec-am:
-
-install-info: install-info-recursive
-
-install-man:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -f $(am__CONFIG_DISTCLEAN_FILES)
- -rm -rf $(top_srcdir)/autom4te.cache
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-generic
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am: uninstall-info-am
-
-uninstall-info: uninstall-info-recursive
-
-.PHONY: $(RECURSIVE_TARGETS) CTAGS GTAGS all all-am am--refresh check \
- check-am clean clean-generic clean-recursive ctags \
- ctags-recursive dist dist-all dist-bzip2 dist-gzip dist-shar \
- dist-tarZ dist-zip distcheck distclean distclean-generic \
- distclean-hdr distclean-recursive distclean-tags \
- distcleancheck distdir distuninstallcheck dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-exec install-exec-am install-info \
- install-info-am install-man install-strip installcheck \
- installcheck-am installdirs installdirs-am maintainer-clean \
- maintainer-clean-generic maintainer-clean-recursive \
- mostlyclean mostlyclean-generic mostlyclean-recursive pdf \
- pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
- uninstall-info-am
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/moses-cmd/aclocal.m4 b/moses-cmd/aclocal.m4
deleted file mode 100644
index ef60f9840..000000000
--- a/moses-cmd/aclocal.m4
+++ /dev/null
@@ -1,1044 +0,0 @@
-# generated automatically by aclocal 1.9.2 -*- Autoconf -*-
-
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
-# Free Software Foundation, Inc.
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-# -*- Autoconf -*-
-# Copyright (C) 2002, 2003 Free Software Foundation, Inc.
-# Generated from amversion.in; do not edit by hand.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-
-# AM_AUTOMAKE_VERSION(VERSION)
-# ----------------------------
-# Automake X.Y traces this macro to ensure aclocal.m4 has been
-# generated from the m4 files accompanying Automake X.Y.
-AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version="1.9"])
-
-# AM_SET_CURRENT_AUTOMAKE_VERSION
-# -------------------------------
-# Call AM_AUTOMAKE_VERSION so it can be traced.
-# This function is AC_REQUIREd by AC_INIT_AUTOMAKE.
-AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
- [AM_AUTOMAKE_VERSION([1.9.2])])
-
-# AM_AUX_DIR_EXPAND
-
-# Copyright (C) 2001, 2003 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
-# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
-# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
-#
-# Of course, Automake must honor this variable whenever it calls a
-# tool from the auxiliary directory. The problem is that $srcdir (and
-# therefore $ac_aux_dir as well) can be either absolute or relative,
-# depending on how configure is run. This is pretty annoying, since
-# it makes $ac_aux_dir quite unusable in subdirectories: in the top
-# source directory, any form will work fine, but in subdirectories a
-# relative path needs to be adjusted first.
-#
-# $ac_aux_dir/missing
-# fails when called from a subdirectory if $ac_aux_dir is relative
-# $top_srcdir/$ac_aux_dir/missing
-# fails if $ac_aux_dir is absolute,
-# fails when called from a subdirectory in a VPATH build with
-# a relative $ac_aux_dir
-#
-# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
-# are both prefixed by $srcdir. In an in-source build this is usually
-# harmless because $srcdir is `.', but things will broke when you
-# start a VPATH build or use an absolute $srcdir.
-#
-# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
-# iff we strip the leading $srcdir from $ac_aux_dir. That would be:
-# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
-# and then we would define $MISSING as
-# MISSING="\${SHELL} $am_aux_dir/missing"
-# This will work as long as MISSING is not called from configure, because
-# unfortunately $(top_srcdir) has no meaning in configure.
-# However there are other variables, like CC, which are often used in
-# configure, and could therefore not use this "fixed" $ac_aux_dir.
-#
-# Another solution, used here, is to always expand $ac_aux_dir to an
-# absolute PATH. The drawback is that using absolute paths prevent a
-# configured tree to be moved without reconfiguration.
-
-AC_DEFUN([AM_AUX_DIR_EXPAND],
-[dnl Rely on autoconf to set up CDPATH properly.
-AC_PREREQ([2.50])dnl
-# expand $ac_aux_dir to an absolute path
-am_aux_dir=`cd $ac_aux_dir && pwd`
-])
-
-# AM_CONDITIONAL -*- Autoconf -*-
-
-# Copyright (C) 1997, 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# serial 6
-
-# AM_CONDITIONAL(NAME, SHELL-CONDITION)
-# -------------------------------------
-# Define a conditional.
-AC_DEFUN([AM_CONDITIONAL],
-[AC_PREREQ(2.52)dnl
- ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])],
- [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
-AC_SUBST([$1_TRUE])
-AC_SUBST([$1_FALSE])
-if $2; then
- $1_TRUE=
- $1_FALSE='#'
-else
- $1_TRUE='#'
- $1_FALSE=
-fi
-AC_CONFIG_COMMANDS_PRE(
-[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
- AC_MSG_ERROR([[conditional "$1" was never defined.
-Usually this means the macro was only invoked conditionally.]])
-fi])])
-
-# serial 7 -*- Autoconf -*-
-
-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
-# Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-
-# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
-# written in clear, in which case automake, when reading aclocal.m4,
-# will think it sees a *use*, and therefore will trigger all it's
-# C support machinery. Also note that it means that autoscan, seeing
-# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
-
-
-
-# _AM_DEPENDENCIES(NAME)
-# ----------------------
-# See how the compiler implements dependency checking.
-# NAME is "CC", "CXX", "GCJ", or "OBJC".
-# We try a few techniques and use that to set a single cache variable.
-#
-# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
-# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
-# dependency, and given that the user is not expected to run this macro,
-# just rely on AC_PROG_CC.
-AC_DEFUN([_AM_DEPENDENCIES],
-[AC_REQUIRE([AM_SET_DEPDIR])dnl
-AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
-AC_REQUIRE([AM_MAKE_INCLUDE])dnl
-AC_REQUIRE([AM_DEP_TRACK])dnl
-
-ifelse([$1], CC, [depcc="$CC" am_compiler_list=],
- [$1], CXX, [depcc="$CXX" am_compiler_list=],
- [$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
- [$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'],
- [depcc="$$1" am_compiler_list=])
-
-AC_CACHE_CHECK([dependency style of $depcc],
- [am_cv_$1_dependencies_compiler_type],
-[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
- # We make a subdir and do the tests there. Otherwise we can end up
- # making bogus files that we don't know about and never remove. For
- # instance it was reported that on HP-UX the gcc test will end up
- # making a dummy file named `D' -- because `-MD' means `put the output
- # in D'.
- mkdir conftest.dir
- # Copy depcomp to subdir because otherwise we won't find it if we're
- # using a relative directory.
- cp "$am_depcomp" conftest.dir
- cd conftest.dir
- # We will build objects and dependencies in a subdirectory because
- # it helps to detect inapplicable dependency modes. For instance
- # both Tru64's cc and ICC support -MD to output dependencies as a
- # side effect of compilation, but ICC will put the dependencies in
- # the current directory while Tru64 will put them in the object
- # directory.
- mkdir sub
-
- am_cv_$1_dependencies_compiler_type=none
- if test "$am_compiler_list" = ""; then
- am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
- fi
- for depmode in $am_compiler_list; do
- # Setup a source with many dependencies, because some compilers
- # like to wrap large dependency lists on column 80 (with \), and
- # we should not choose a depcomp mode which is confused by this.
- #
- # We need to recreate these files for each test, as the compiler may
- # overwrite some of them when testing with obscure command lines.
- # This happens at least with the AIX C compiler.
- : > sub/conftest.c
- for i in 1 2 3 4 5 6; do
- echo '#include "conftst'$i'.h"' >> sub/conftest.c
- # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
- # Solaris 8's {/usr,}/bin/sh.
- touch sub/conftst$i.h
- done
- echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
-
- case $depmode in
- nosideeffect)
- # after this tag, mechanisms are not by side-effect, so they'll
- # only be used when explicitly requested
- if test "x$enable_dependency_tracking" = xyes; then
- continue
- else
- break
- fi
- ;;
- none) break ;;
- esac
- # We check with `-c' and `-o' for the sake of the "dashmstdout"
- # mode. It turns out that the SunPro C++ compiler does not properly
- # handle `-M -o', and we need to detect this.
- if depmode=$depmode \
- source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \
- depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
- $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \
- >/dev/null 2>conftest.err &&
- grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
- grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 &&
- ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
- # icc doesn't choke on unknown options, it will just issue warnings
- # or remarks (even with -Werror). So we grep stderr for any message
- # that says an option was ignored or not supported.
- # When given -MP, icc 7.0 and 7.1 complain thusly:
- # icc: Command line warning: ignoring option '-M'; no argument required
- # The diagnosis changed in icc 8.0:
- # icc: Command line remark: option '-MP' not supported
- if (grep 'ignoring option' conftest.err ||
- grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
- am_cv_$1_dependencies_compiler_type=$depmode
- break
- fi
- fi
- done
-
- cd ..
- rm -rf conftest.dir
-else
- am_cv_$1_dependencies_compiler_type=none
-fi
-])
-AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
-AM_CONDITIONAL([am__fastdep$1], [
- test "x$enable_dependency_tracking" != xno \
- && test "$am_cv_$1_dependencies_compiler_type" = gcc3])
-])
-
-
-# AM_SET_DEPDIR
-# -------------
-# Choose a directory name for dependency files.
-# This macro is AC_REQUIREd in _AM_DEPENDENCIES
-AC_DEFUN([AM_SET_DEPDIR],
-[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
-AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
-])
-
-
-# AM_DEP_TRACK
-# ------------
-AC_DEFUN([AM_DEP_TRACK],
-[AC_ARG_ENABLE(dependency-tracking,
-[ --disable-dependency-tracking speeds up one-time build
- --enable-dependency-tracking do not reject slow dependency extractors])
-if test "x$enable_dependency_tracking" != xno; then
- am_depcomp="$ac_aux_dir/depcomp"
- AMDEPBACKSLASH='\'
-fi
-AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
-AC_SUBST([AMDEPBACKSLASH])
-])
-
-# Generate code to set up dependency tracking. -*- Autoconf -*-
-
-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
-# Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-#serial 2
-
-# _AM_OUTPUT_DEPENDENCY_COMMANDS
-# ------------------------------
-AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
-[for mf in $CONFIG_FILES; do
- # Strip MF so we end up with the name of the file.
- mf=`echo "$mf" | sed -e 's/:.*$//'`
- # Check whether this is an Automake generated Makefile or not.
- # We used to match only the files named `Makefile.in', but
- # some people rename them; so instead we look at the file content.
- # Grep'ing the first line is not enough: some people post-process
- # each Makefile.in and add a new line on top of each file to say so.
- # So let's grep whole file.
- if grep '^#.*generated by automake' $mf > /dev/null 2>&1; then
- dirpart=`AS_DIRNAME("$mf")`
- else
- continue
- fi
- # Extract the definition of DEPDIR, am__include, and am__quote
- # from the Makefile without running `make'.
- DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
- test -z "$DEPDIR" && continue
- am__include=`sed -n 's/^am__include = //p' < "$mf"`
- test -z "am__include" && continue
- am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
- # When using ansi2knr, U may be empty or an underscore; expand it
- U=`sed -n 's/^U = //p' < "$mf"`
- # Find all dependency output files, they are included files with
- # $(DEPDIR) in their names. We invoke sed twice because it is the
- # simplest approach to changing $(DEPDIR) to its actual value in the
- # expansion.
- for file in `sed -n "
- s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
- sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
- # Make sure the directory exists.
- test -f "$dirpart/$file" && continue
- fdir=`AS_DIRNAME(["$file"])`
- AS_MKDIR_P([$dirpart/$fdir])
- # echo "creating $dirpart/$file"
- echo '# dummy' > "$dirpart/$file"
- done
-done
-])# _AM_OUTPUT_DEPENDENCY_COMMANDS
-
-
-# AM_OUTPUT_DEPENDENCY_COMMANDS
-# -----------------------------
-# This macro should only be invoked once -- use via AC_REQUIRE.
-#
-# This code is only required when automatic dependency tracking
-# is enabled. FIXME. This creates each `.P' file that we will
-# need in order to bootstrap the dependency handling code.
-AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
-[AC_CONFIG_COMMANDS([depfiles],
- [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
- [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
-])
-
-# Like AC_CONFIG_HEADER, but automatically create stamp file. -*- Autoconf -*-
-
-# Copyright (C) 1996, 1997, 2000, 2001, 2003 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# serial 7
-
-# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS.
-AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
-
-# Do all the work for Automake. -*- Autoconf -*-
-
-# This macro actually does too much some checks are only needed if
-# your package does certain things. But this isn't really a big deal.
-
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
-# Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# serial 11
-
-# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
-# AM_INIT_AUTOMAKE([OPTIONS])
-# -----------------------------------------------
-# The call with PACKAGE and VERSION arguments is the old style
-# call (pre autoconf-2.50), which is being phased out. PACKAGE
-# and VERSION should now be passed to AC_INIT and removed from
-# the call to AM_INIT_AUTOMAKE.
-# We support both call styles for the transition. After
-# the next Automake release, Autoconf can make the AC_INIT
-# arguments mandatory, and then we can depend on a new Autoconf
-# release and drop the old call support.
-AC_DEFUN([AM_INIT_AUTOMAKE],
-[AC_PREREQ([2.58])dnl
-dnl Autoconf wants to disallow AM_ names. We explicitly allow
-dnl the ones we care about.
-m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
-AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
-AC_REQUIRE([AC_PROG_INSTALL])dnl
-# test to see if srcdir already configured
-if test "`cd $srcdir && pwd`" != "`pwd`" &&
- test -f $srcdir/config.status; then
- AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
-fi
-
-# test whether we have cygpath
-if test -z "$CYGPATH_W"; then
- if (cygpath --version) >/dev/null 2>/dev/null; then
- CYGPATH_W='cygpath -w'
- else
- CYGPATH_W=echo
- fi
-fi
-AC_SUBST([CYGPATH_W])
-
-# Define the identity of the package.
-dnl Distinguish between old-style and new-style calls.
-m4_ifval([$2],
-[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
- AC_SUBST([PACKAGE], [$1])dnl
- AC_SUBST([VERSION], [$2])],
-[_AM_SET_OPTIONS([$1])dnl
- AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
- AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
-
-_AM_IF_OPTION([no-define],,
-[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
- AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
-
-# Some tools Automake needs.
-AC_REQUIRE([AM_SANITY_CHECK])dnl
-AC_REQUIRE([AC_ARG_PROGRAM])dnl
-AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
-AM_MISSING_PROG(AUTOCONF, autoconf)
-AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
-AM_MISSING_PROG(AUTOHEADER, autoheader)
-AM_MISSING_PROG(MAKEINFO, makeinfo)
-AM_PROG_INSTALL_SH
-AM_PROG_INSTALL_STRIP
-AC_REQUIRE([AM_PROG_MKDIR_P])dnl
-# We need awk for the "check" target. The system "awk" is bad on
-# some platforms.
-AC_REQUIRE([AC_PROG_AWK])dnl
-AC_REQUIRE([AC_PROG_MAKE_SET])dnl
-AC_REQUIRE([AM_SET_LEADING_DOT])dnl
-_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
- [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
- [_AM_PROG_TAR([v7])])])
-_AM_IF_OPTION([no-dependencies],,
-[AC_PROVIDE_IFELSE([AC_PROG_CC],
- [_AM_DEPENDENCIES(CC)],
- [define([AC_PROG_CC],
- defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
-AC_PROVIDE_IFELSE([AC_PROG_CXX],
- [_AM_DEPENDENCIES(CXX)],
- [define([AC_PROG_CXX],
- defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
-])
-])
-
-
-# When config.status generates a header, we must update the stamp-h file.
-# This file resides in the same directory as the config header
-# that is generated. The stamp files are numbered to have different names.
-
-# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
-# loop where config.status creates the headers, so we can generate
-# our stamp files there.
-AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
-[# Compute $1's index in $config_headers.
-_am_stamp_count=1
-for _am_header in $config_headers :; do
- case $_am_header in
- $1 | $1:* )
- break ;;
- * )
- _am_stamp_count=`expr $_am_stamp_count + 1` ;;
- esac
-done
-echo "timestamp for $1" >`AS_DIRNAME([$1])`/stamp-h[]$_am_stamp_count])
-
-# AM_PROG_INSTALL_SH
-# ------------------
-# Define $install_sh.
-
-# Copyright (C) 2001, 2003 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-AC_DEFUN([AM_PROG_INSTALL_SH],
-[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
-install_sh=${install_sh-"$am_aux_dir/install-sh"}
-AC_SUBST(install_sh)])
-
-# -*- Autoconf -*-
-# Copyright (C) 2003 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# serial 1
-
-# Check whether the underlying file-system supports filenames
-# with a leading dot. For instance MS-DOS doesn't.
-AC_DEFUN([AM_SET_LEADING_DOT],
-[rm -rf .tst 2>/dev/null
-mkdir .tst 2>/dev/null
-if test -d .tst; then
- am__leading_dot=.
-else
- am__leading_dot=_
-fi
-rmdir .tst 2>/dev/null
-AC_SUBST([am__leading_dot])])
-
-# Check to see how 'make' treats includes. -*- Autoconf -*-
-
-# Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# serial 2
-
-# AM_MAKE_INCLUDE()
-# -----------------
-# Check to see how make treats includes.
-AC_DEFUN([AM_MAKE_INCLUDE],
-[am_make=${MAKE-make}
-cat > confinc << 'END'
-am__doit:
- @echo done
-.PHONY: am__doit
-END
-# If we don't find an include directive, just comment out the code.
-AC_MSG_CHECKING([for style of include used by $am_make])
-am__include="#"
-am__quote=
-_am_result=none
-# First try GNU make style include.
-echo "include confinc" > confmf
-# We grep out `Entering directory' and `Leaving directory'
-# messages which can occur if `w' ends up in MAKEFLAGS.
-# In particular we don't look at `^make:' because GNU make might
-# be invoked under some other name (usually "gmake"), in which
-# case it prints its new name instead of `make'.
-if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then
- am__include=include
- am__quote=
- _am_result=GNU
-fi
-# Now try BSD make style include.
-if test "$am__include" = "#"; then
- echo '.include "confinc"' > confmf
- if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then
- am__include=.include
- am__quote="\""
- _am_result=BSD
- fi
-fi
-AC_SUBST([am__include])
-AC_SUBST([am__quote])
-AC_MSG_RESULT([$_am_result])
-rm -f confinc confmf
-])
-
-# -*- Autoconf -*-
-
-
-# Copyright (C) 1997, 1999, 2000, 2001, 2003 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# serial 3
-
-# AM_MISSING_PROG(NAME, PROGRAM)
-# ------------------------------
-AC_DEFUN([AM_MISSING_PROG],
-[AC_REQUIRE([AM_MISSING_HAS_RUN])
-$1=${$1-"${am_missing_run}$2"}
-AC_SUBST($1)])
-
-
-# AM_MISSING_HAS_RUN
-# ------------------
-# Define MISSING if not defined so far and test if it supports --run.
-# If it does, set am_missing_run to use it, otherwise, to nothing.
-AC_DEFUN([AM_MISSING_HAS_RUN],
-[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
-test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing"
-# Use eval to expand $SHELL
-if eval "$MISSING --run true"; then
- am_missing_run="$MISSING --run "
-else
- am_missing_run=
- AC_MSG_WARN([`missing' script is too old or missing])
-fi
-])
-
-# AM_PROG_MKDIR_P
-# ---------------
-# Check whether `mkdir -p' is supported, fallback to mkinstalldirs otherwise.
-
-# Copyright (C) 2003, 2004 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# Automake 1.8 used `mkdir -m 0755 -p --' to ensure that directories
-# created by `make install' are always world readable, even if the
-# installer happens to have an overly restrictive umask (e.g. 077).
-# This was a mistake. There are at least two reasons why we must not
-# use `-m 0755':
-# - it causes special bits like SGID to be ignored,
-# - it may be too restrictive (some setups expect 775 directories).
-#
-# Do not use -m 0755 and let people choose whatever they expect by
-# setting umask.
-#
-# We cannot accept any implementation of `mkdir' that recognizes `-p'.
-# Some implementations (such as Solaris 8's) are not thread-safe: if a
-# parallel make tries to run `mkdir -p a/b' and `mkdir -p a/c'
-# concurrently, both version can detect that a/ is missing, but only
-# one can create it and the other will error out. Consequently we
-# restrict ourselves to GNU make (using the --version option ensures
-# this.)
-AC_DEFUN([AM_PROG_MKDIR_P],
-[if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then
- # We used to keeping the `.' as first argument, in order to
- # allow $(mkdir_p) to be used without argument. As in
- # $(mkdir_p) $(somedir)
- # where $(somedir) is conditionally defined. However this is wrong
- # for two reasons:
- # 1. if the package is installed by a user who cannot write `.'
- # make install will fail,
- # 2. the above comment should most certainly read
- # $(mkdir_p) $(DESTDIR)$(somedir)
- # so it does not work when $(somedir) is undefined and
- # $(DESTDIR) is not.
- # To support the latter case, we have to write
- # test -z "$(somedir)" || $(mkdir_p) $(DESTDIR)$(somedir),
- # so the `.' trick is pointless.
- mkdir_p='mkdir -p --'
-else
- # On NextStep and OpenStep, the `mkdir' command does not
- # recognize any option. It will interpret all options as
- # directories to create, and then abort because `.' already
- # exists.
- for d in ./-p ./--version;
- do
- test -d $d && rmdir $d
- done
- # $(mkinstalldirs) is defined by Automake if mkinstalldirs exists.
- if test -f "$ac_aux_dir/mkinstalldirs"; then
- mkdir_p='$(mkinstalldirs)'
- else
- mkdir_p='$(install_sh) -d'
- fi
-fi
-AC_SUBST([mkdir_p])])
-
-# Helper functions for option handling. -*- Autoconf -*-
-
-# Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# serial 2
-
-# _AM_MANGLE_OPTION(NAME)
-# -----------------------
-AC_DEFUN([_AM_MANGLE_OPTION],
-[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
-
-# _AM_SET_OPTION(NAME)
-# ------------------------------
-# Set option NAME. Presently that only means defining a flag for this option.
-AC_DEFUN([_AM_SET_OPTION],
-[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
-
-# _AM_SET_OPTIONS(OPTIONS)
-# ----------------------------------
-# OPTIONS is a space-separated list of Automake options.
-AC_DEFUN([_AM_SET_OPTIONS],
-[AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
-
-# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
-# -------------------------------------------
-# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
-AC_DEFUN([_AM_IF_OPTION],
-[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
-
-#
-# Check to make sure that the build environment is sane.
-#
-
-# Copyright (C) 1996, 1997, 2000, 2001, 2003 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# serial 3
-
-# AM_SANITY_CHECK
-# ---------------
-AC_DEFUN([AM_SANITY_CHECK],
-[AC_MSG_CHECKING([whether build environment is sane])
-# Just in case
-sleep 1
-echo timestamp > conftest.file
-# Do `set' in a subshell so we don't clobber the current shell's
-# arguments. Must try -L first in case configure is actually a
-# symlink; some systems play weird games with the mod time of symlinks
-# (eg FreeBSD returns the mod time of the symlink's containing
-# directory).
-if (
- set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null`
- if test "$[*]" = "X"; then
- # -L didn't work.
- set X `ls -t $srcdir/configure conftest.file`
- fi
- rm -f conftest.file
- if test "$[*]" != "X $srcdir/configure conftest.file" \
- && test "$[*]" != "X conftest.file $srcdir/configure"; then
-
- # If neither matched, then we have a broken ls. This can happen
- # if, for instance, CONFIG_SHELL is bash and it inherits a
- # broken ls alias from the environment. This has actually
- # happened. Such a system could not be considered "sane".
- AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
-alias in your environment])
- fi
-
- test "$[2]" = conftest.file
- )
-then
- # Ok.
- :
-else
- AC_MSG_ERROR([newly created file is older than distributed files!
-Check your system clock])
-fi
-AC_MSG_RESULT(yes)])
-
-# AM_PROG_INSTALL_STRIP
-
-# Copyright (C) 2001, 2003 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# One issue with vendor `install' (even GNU) is that you can't
-# specify the program used to strip binaries. This is especially
-# annoying in cross-compiling environments, where the build's strip
-# is unlikely to handle the host's binaries.
-# Fortunately install-sh will honor a STRIPPROG variable, so we
-# always use install-sh in `make install-strip', and initialize
-# STRIPPROG with the value of the STRIP variable (set by the user).
-AC_DEFUN([AM_PROG_INSTALL_STRIP],
-[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
-# Installed binaries are usually stripped using `strip' when the user
-# run `make install-strip'. However `strip' might not be the right
-# tool to use in cross-compilation environments, therefore Automake
-# will honor the `STRIP' environment variable to overrule this program.
-dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
-if test "$cross_compiling" != no; then
- AC_CHECK_TOOL([STRIP], [strip], :)
-fi
-INSTALL_STRIP_PROGRAM="\${SHELL} \$(install_sh) -c -s"
-AC_SUBST([INSTALL_STRIP_PROGRAM])])
-
-# Check how to create a tarball. -*- Autoconf -*-
-
-# Copyright (C) 2004 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# serial 1
-
-
-# _AM_PROG_TAR(FORMAT)
-# --------------------
-# Check how to create a tarball in format FORMAT.
-# FORMAT should be one of `v7', `ustar', or `pax'.
-#
-# Substitute a variable $(am__tar) that is a command
-# writing to stdout a FORMAT-tarball containing the directory
-# $tardir.
-# tardir=directory && $(am__tar) > result.tar
-#
-# Substitute a variable $(am__untar) that extract such
-# a tarball read from stdin.
-# $(am__untar) < result.tar
-AC_DEFUN([_AM_PROG_TAR],
-[# Always define AMTAR for backward compatibility.
-AM_MISSING_PROG([AMTAR], [tar])
-m4_if([$1], [v7],
- [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
- [m4_case([$1], [ustar],, [pax],,
- [m4_fatal([Unknown tar format])])
-AC_MSG_CHECKING([how to create a $1 tar archive])
-# Loop over all known methods to create a tar archive until one works.
-_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
-_am_tools=${am_cv_prog_tar_$1-$_am_tools}
-# Do not fold the above two line into one, because Tru64 sh and
-# Solaris sh will not grok spaces in the rhs of `-'.
-for _am_tool in $_am_tools
-do
- case $_am_tool in
- gnutar)
- for _am_tar in tar gnutar gtar;
- do
- AM_RUN_LOG([$_am_tar --version]) && break
- done
- am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
- am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
- am__untar="$_am_tar -xf -"
- ;;
- plaintar)
- # Must skip GNU tar: if it does not support --format= it doesn't create
- # ustar tarball either.
- (tar --version) >/dev/null 2>&1 && continue
- am__tar='tar chf - "$$tardir"'
- am__tar_='tar chf - "$tardir"'
- am__untar='tar xf -'
- ;;
- pax)
- am__tar='pax -L -x $1 -w "$$tardir"'
- am__tar_='pax -L -x $1 -w "$tardir"'
- am__untar='pax -r'
- ;;
- cpio)
- am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
- am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
- am__untar='cpio -i -H $1 -d'
- ;;
- none)
- am__tar=false
- am__tar_=false
- am__untar=false
- ;;
- esac
-
- # If the value was cached, stop now. We just wanted to have am__tar
- # and am__untar set.
- test -n "${am_cv_prog_tar_$1}" && break
-
- # tar/untar a dummy directory, and stop if the command works
- rm -rf conftest.dir
- mkdir conftest.dir
- echo GrepMe > conftest.dir/file
- AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
- rm -rf conftest.dir
- if test -s conftest.tar; then
- AM_RUN_LOG([$am__untar <conftest.tar])
- grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
- fi
-done
-rm -rf conftest.dir
-
-AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
-AC_MSG_RESULT([$am_cv_prog_tar_$1])])
-AC_SUBST([am__tar])
-AC_SUBST([am__untar])
-]) # _AM_PROG_TAR
-
diff --git a/moses-cmd/acsite.m4 b/moses-cmd/acsite.m4
deleted file mode 100644
index 36ed5c2bf..000000000
--- a/moses-cmd/acsite.m4
+++ /dev/null
@@ -1,3 +0,0 @@
-builtin(include,config/mysql-client.m4)
-builtin(include,config/mysql++.m4)
-
diff --git a/moses-cmd/config.h.in b/moses-cmd/config.h.in
deleted file mode 100644
index 10bc1419c..000000000
--- a/moses-cmd/config.h.in
+++ /dev/null
@@ -1,58 +0,0 @@
-/* config.h.in. Generated from configure.in by autoheader. */
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* flag for IRSTLM */
-#undef HAVE_IRSTLM
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* flag for SRILM */
-#undef HAVE_SRILM
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* Version number of package */
-#undef VERSION
diff --git a/moses-cmd/config.in b/moses-cmd/config.in
deleted file mode 100644
index 0f4b2d35b..000000000
--- a/moses-cmd/config.in
+++ /dev/null
@@ -1,93 +0,0 @@
-AC_INIT(src)
-
-AM_CONFIG_HEADER(config.h)
-AM_INIT_AUTOMAKE(moses, 0.1)
-
-AC_PROG_CXX
-AC_LANG_CPLUSPLUS
-AC_PROG_RANLIB
-#AM_PROG_LIBTOOL
-
-AC_ARG_WITH(srilm,
- [AC_HELP_STRING([--with-srilm=PATH], [(optional) path to SRI's LM toolkit])],
- [with_srilm=$withval],
- [with_srilm=no]
- )
-
-AC_ARG_WITH(boost,
- [AC_HELP_STRING([--with-srilm=PATH], [path to BOOST libraries])],
- [with_boost=$withval],
- [with_boost=no]
- )
-
-AC_ARG_WITH(moses,
- [AC_HELP_STRING([--with-moses=PATH], [path to moses library])],
- [with_moses=$withval],
- [with_moses=no]
- )
-
-AC_ARG_ENABLE(mysql, AC_HELP_STRING([--enable-mysql], [(optional) build in MySQL support])],
- [mysql_flag=yes], [mysql_flag=no])
-if test "x$with_boost" != 'xno'
-then
- CPPFLAGS="$CPPFLAGS -I${with_boost}"
- LDFLAGS="$LDFLAGS -L${with_boost}/lib -L${with_boost}/stage/lib"
-fi
-
-if test "x$with_moses" != 'xno'
-then
- CPPFLAGS="$CPPFLAGS -I${with_moses}/src"
- LDFLAGS="$LDFLAGS -L${with_moses}/src"
-fi
-LIBS="$LIBS -lmoses"
-
-AC_CHECK_HEADER([Manager.h], [], [AC_MSG_ERROR([Cannot find moses headers! Use --with-moses=PATH])])
-AC_CHECK_HEADER([boost/algorithm/string.hpp], [], [AC_MSG_ERROR([Cannot find boost. Use --with-boost=PATH])])
-AC_CHECK_HEADER([boost/iostreams/filtering_stream.hpp], [], [AC_MSG_ERROR([Cannot find boost. Use --with-boost=PATH])])
-
-if test "$mysql_flag" = 'yes'
-then
- AC_MYSQLCLIENT(,,
- [AC_ERROR([Could not locate mysql client libraries. Try --with-mysql-prefix/-include/-lib])])
- LDFLAGS="$LDFLAGS $MYSQLCLIENT_LDFLAGS"
- CPPFLAGS="$CPPFLAGS $MYSQLCLIENT_CPPFLAGS"
- LIBS="$LIBS $MYSQLCLIENT_LIBS"
-
- MYSQLPP_DEVEL
- LIBS="$LIBS -lmysqlpp"
-
- AM_CONDITIONAL([BUILD_MYSQL_SUPPORT], true)
-else
- AM_CONDITIONAL([BUILD_MYSQL_SUPPORT], false)
-fi
-
-if test "x$with_srilm" != 'xno'
-then
- SAVE_CPPFLAGS="$CPPFLAGS"
- CPPFLAGS="$CPPFLAGS -I${with_srilm}/include"
-
- AC_CHECK_HEADER(Ngram.h,
- [AC_DEFINE([HAVE_SRILM], [], [flag for SRILM])],
- [AC_MSG_ERROR([Cannot find SRILM!])])
-
- LIB_SRILM="-loolm -ldstruct -lmisc"
- # ROOT/lib/i686-m64/liboolm.a
- # ROOT/lib/i686-m64/libdstruct.a
- # ROOT/lib/i686-m64/libmisc.a
- MY_ARCH=`${with_srilm}/sbin/machine-type`
- LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH}"
- LIBS="$LIBS $LIB_SRILM"
- FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
- AM_CONDITIONAL([SRI_LM], true)
- AM_CONDITIONAL([INTERNAL_LM], false)
-else
- echo "Using internal language model (use --with-srilm to change)!"
- AM_CONDITIONAL([SRI_LM], false)
- AM_CONDITIONAL([INTERNAL_LM], true)
-fi
-
-LIBS="$LIBS -lboost_iostreams-gcc-mt -lboost_filesystem-gcc-mt -lboost_thread-gcc-mt -lz"
-FMT_LIBS="$FMT_LIBS libboost_iostreams.a libboost_filesystem.a libboost_thread.a"
-
-
-AC_OUTPUT(Makefile src/Makefile)
diff --git a/moses-cmd/config/mysql++.m4 b/moses-cmd/config/mysql++.m4
deleted file mode 100644
index 862a881a1..000000000
--- a/moses-cmd/config/mysql++.m4
+++ /dev/null
@@ -1,130 +0,0 @@
-#-######################################################################
-# mysql++.m4 - Example autoconf macro showing how to find MySQL++
-# library and header files.
-#
-# Copyright (c) 2004-2005 by Educational Technology Resources, Inc.
-#
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU Lesser General Public License as published
-# by the Free Software Foundation; either version 2.1 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with MySQL++; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
-# USA
-#-######################################################################
-
-dnl @synopsis MYSQLPP_DEVEL
-dnl
-dnl This macro tries to find the MySQL++ library and header files.
-dnl
-dnl We define the following configure script flags:
-dnl
-dnl --with-mysqlpp: Give prefix for both library and headers, and try
-dnl to guess subdirectory names for each. (e.g. tack /lib and
-dnl /include onto given dir name, and other common schemes.)
-dnl --with-mysqlpp-lib: Similar to --with-mysqlpp, but for library only.
-dnl --with-mysqlpp-include: Similar to --with-mysqlpp, but for headers
-dnl only.
-dnl
-dnl This macro depends on having the default compiler and linker flags
-dnl set up for building programs against the MySQL C API. The mysql.m4
-dnl macro in this directory fits this bill; run it first.
-dnl
-dnl @version 1.0, 2005/07/13
-dnl @author Warren Young <mysqlpp@etr-usa.com>
-
-AC_DEFUN([MYSQLPP_DEVEL],
-[
-AC_CACHE_CHECK([for MySQL++ libraries], ac_cv_mysqlpp_devel,
-[
- #
- # Set up configure script macros
- #
- AC_ARG_WITH(mysqlpp,
- [ --with-mysqlpp=<path> path containing MySQL++ header and library subdirs],
- [MYSQLPP_lib_check="$with_mysqlpp/lib $with_mysqlpp/lib/mysql++"
- MYSQLPP_inc_check="$with_mysqlpp/include $with_mysqlpp/include/mysql++"],
- [MYSQLPP_lib_check="/usr/local/mysql++/lib /usr/local/lib/mysql++ /opt/mysql++/lib /usr/lib/mysql++ /usr/local/lib /usr/lib"
- MYSQLPP_inc_check="/usr/local/mysql++/include /usr/local/include/mysql++ /opt/mysql++/include /usr/local/include/mysql++ /usr/local/include /usr/include/mysql++ /usr/include"])
- AC_ARG_WITH(mysqlpp-lib,
- [ --with-mysqlpp-lib=<path> directory path of MySQL++ library],
- [MYSQLPP_lib_check="$with_mysqlpp_lib $with_mysqlpp_lib/lib $with_mysqlpp_lib/lib/mysql"])
- AC_ARG_WITH(mysqlpp-include,
- [ --with-mysqlpp-include=<path> directory path of MySQL++ headers],
- [MYSQLPP_inc_check="$with_mysqlpp_include $with_mysqlpp_include/include $with_mysqlpp_include/include/mysql"])
-
- #
- # Look for MySQL++ library
- #
- MYSQLPP_libdir=
- for dir in $MYSQLPP_lib_check
- do
- if test -d "$dir" && \
- ( test -f "$dir/libmysqlpp.so" ||
- test -f "$dir/libmysqlpp.a" )
- then
- MYSQLPP_libdir=$dir
- break
- fi
- done
-
- if test -z "$MYSQLPP_libdir"
- then
- AC_MSG_ERROR([Didn't find the MySQL++ library dir in '$MYSQLPP_lib_check'])
- fi
-
- case "$MYSQLPP_libdir" in
- /* ) ;;
- * ) AC_MSG_ERROR([The MySQL++ library directory ($MYSQLPP_libdir) must be an absolute path.]) ;;
- esac
-
- AC_MSG_RESULT([lib in $MYSQLPP_libdir])
-
- case "$MYSQLPP_libdir" in
- /usr/lib) ;;
- *) LDFLAGS="$LDFLAGS -L${MYSQLPP_libdir} -Wl,-rpath ${MYSQLPP_libdir}" ;;
- esac
-
- #
- # Look for MySQL++ headers
- #
- AC_MSG_CHECKING([for MySQL++ include directory])
- MYSQLPP_incdir=
- for dir in $MYSQLPP_inc_check
- do
- if test -d "$dir" && test -f "$dir/mysql++.h"
- then
- MYSQLPP_incdir=$dir
- break
- fi
- done
-
- if test -z "$MYSQLPP_incdir"
- then
- AC_MSG_ERROR([Didn't find the MySQL++ header dir in '$MYSQLPP_inc_check'])
- fi
-
- case "$MYSQLPP_incdir" in
- /* ) ;;
- * ) AC_MSG_ERROR([The MySQL++ header directory ($MYSQLPP_incdir) must be an absolute path.]) ;;
- esac
-
- AC_MSG_RESULT([$MYSQLPP_incdir])
-
- CPPFLAGS="$CPPFLAGS -I${MYSQLPP_incdir}"
-
- AC_MSG_CHECKING([that we can build MySQL++ programs])
- AC_COMPILE_IFELSE(
- [AC_LANG_PROGRAM([#include <mysql++.h>],
- [std::string s; mysqlpp::escape_string(s)])],
- ac_cv_mysqlpp_devel=yes,
- AC_MSG_ERROR(no))
-])]) dnl End MYSQLPP_DEVEL
-
diff --git a/moses-cmd/config/mysql-client.m4 b/moses-cmd/config/mysql-client.m4
deleted file mode 100644
index f12965c6b..000000000
--- a/moses-cmd/config/mysql-client.m4
+++ /dev/null
@@ -1,133 +0,0 @@
-dnl Test for libmysqlclient and
-dnl define MYSQLCLIENT_CPPFLAGS, MYSQLCLIENT_LDFLAGS and MYSQLCLIENT_LIBS
-dnl usage:
-dnl AC_MYSQLCLIENT(
-dnl [MINIMUM-VERSION,
-dnl [ACTION-IF-FOUND [,
-dnl ACTION-IF-NOT-FOUND ]]])
-dnl
-
-AC_DEFUN(AC_MYSQLCLIENT,
-[
-AC_ARG_WITH(mysqlclient-prefix,
- [ --with-mysqlclient-prefix=PFX Prefix where mysqlclient is
-installed],
- mysqlclient_prefix="$withval",
- mysqlclient_prefix="")
-
-AC_ARG_WITH(mysqlclient-include, [ --with-mysqlclient-include=DIR Directory pointing
- to mysqlclient include files],
- mysqlclient_include="$withval",
- mysqlclient_include="")
-
-AC_ARG_WITH(mysqlclient-lib,
-[ --with-mysqlclient-lib=LIB Directory pointing to mysqlclient library
- (Note: -include and -lib do override
- paths found with -prefix)
-],
- mysqlclient_lib="$withval",
- mysqlclient_lib="")
-
- AC_MSG_CHECKING([for mysqlclient ifelse([$1], , ,[>= v$1])])
- MYSQLCLIENT_LDFLAGS=""
- MYSQLCLIENT_CPPFLAGS=""
- MYSQLCLIENT_LIBS="-lmysqlclient"
- mysqlclient_fail=""
-
- dnl test --with-mysqlclient-prefix
- for tryprefix in /usr /usr/local /usr/mysql /usr/local/mysql /usr/pkg $msqlclient_prefix; do
- #testloop
- for hloc in lib/mysql lib lib64/mysql lib64 ; do
- if test -e "$tryprefix/$hloc/libmysqlclient.so"; then
- MYSQLCLIENT_LDFLAGS="-L$tryprefix/$hloc"
- fi
- done
-
- for iloc in include/mysql include; do
- if test -e "$tryprefix/$iloc/mysql.h"; then
- MYSQLCLIENT_CPPFLAGS="-I$tryprefix/$iloc"
- fi
- done
- # testloop
- done
-
- dnl test --with-mysqlclient-include
- if test "x$mysqlclient_include" != "x" ; then
- echo "checking for mysql includes... "
- if test -d "$mysqlclient_include/mysql" ; then
- MYSQLCLIENT_CPPFLAGS="-I$mysqlclient_include"
- echo " found $MYSQLCLIENT_CPPFLAGS"
- elif test -d "$mysqlclient_include/include/mysql" ; then
- MYSQLCLIENT_CPPFLAGS="-I$mysqlclient_include/include"
- echo " found $MYSQLCLIENT_CPPFLAGS"
- elif test -d "$mysqlclient_include" ; then
- MYSQLCLIENT_CPPFLAGS="-I$mysqlclient_include"
- echo "found $MYSQLCLIENT_CPPFLAGS"
- else
- echo "not found! no include dir found in $mysqlclient_include"
- fi
- fi
-
- dnl test --with-mysqlclient-lib
- if test "x$mysqlclient_lib" != "x" ; then
- echo "checking for mysql libx... "
- if test -d "$mysqlclient_lib/lib/mysql" ; then
- MYSQLCLIENT_LDFLAGS="-L$mysqlclient_lib/lib/mysql"
- echo "found $MYSQLCLIENT_LDFLAGS"
- elif test -d "$mysqlclient_lib/lin" ; then
- MYSQLCLIENT_LDFLAGS="-L$mysqlclient_lib/lib"
- echo "found $MYSQLCLIENT_LDFLAGS"
- else
- MYSQLCLIENT_LDFLAGS="-L$mysqlclient_lib"
- echo "defaultd to $MYSQLCLIENT_LDFLAGS"
- fi
- fi
-
- ac_save_CPPFLAGS="$CPPFLAGS"
- ac_save_LDFLAGS="$LDFLAGS"
- ac_save_LIBS="$LIBS"
- CPPFLAGS="$CPPFLAGS $MYSQLCLIENT_CPPFLAGS"
- LDFLAGS="$LDFLAGS $MYSQLCLIENT_LDFLAGS"
- LIBS="$LIBS $MYSQLCLIENT_LIBS"
- dnl if no minimum version is given, just try to compile
- dnl else try to compile AND run
- AC_TRY_COMPILE([
- #include <mysql.h>
- ],[
- mysql_real_connect( 0, 0, 0, 0, 0, 0, 0, 0);
- ], [AC_MSG_RESULT(yes $MYSQLCLIENT_CPPFLAGS $MYSQLCLIENT_LDFLAGS)
- CPPFLAGS="$ac_save_CPPFLAGS"
- LDFLAGS="$ac_save_LDFLAGS"
- LIBS="$ac_save_LIBS"
- ifelse([$2], ,:,[$2])
- ],[
- echo "no"
- echo "can't compile a simple app with mysql_connnect in it.
-bad."
- mysqlclient_fail="yes"
- ])
-
- if test "x$mysqlclient_fail" != "x" ; then
- dnl AC_MSG_RESULT(no)
- echo
- echo "***"
- echo "*** mysqlclient test source had problems, check your config.log ."
- echo "*** Also try one of the following switches :"
- echo "*** --with-mysqlclient-prefix=PFX"
- echo "*** --with-mysqlclient-include=DIR"
- echo "*** --with-mysqlclient-lib=DIR"
- echo "***"
- CPPFLAGS="$ac_save_CPPFLAGS"
- LDFLAGS="$ac_save_LDFLAGS"
- LIBS="$ac_save_LIBS"
- ifelse([$3], ,:,[$3])
- fi
-
- CPPFLAGS="$ac_save_CPPFLAGS"
- LDFLAGS="$ac_save_LDFLAGS"
- LIBS="$ac_save_LIBS"
- AC_SUBST(MYSQLCLIENT_LDFLAGS)
- AC_SUBST(MYSQLCLIENT_CPPFLAGS)
- AC_SUBST(MYSQLCLIENT_LIBS)
-])
-
diff --git a/moses-cmd/configure b/moses-cmd/configure
deleted file mode 100755
index facfeff35..000000000
--- a/moses-cmd/configure
+++ /dev/null
@@ -1,5846 +0,0 @@
-#! /bin/sh
-# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.59.
-#
-# Copyright (C) 2003 Free Software Foundation, Inc.
-# This configure script is free software; the Free Software Foundation
-# gives unlimited permission to copy, distribute and modify it.
-## --------------------- ##
-## M4sh Initialization. ##
-## --------------------- ##
-
-# Be Bourne compatible
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
- emulate sh
- NULLCMD=:
- # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '${1+"$@"}'='"$@"'
-elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
- set -o posix
-fi
-DUALCASE=1; export DUALCASE # for MKS sh
-
-# Support unset when possible.
-if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
- as_unset=unset
-else
- as_unset=false
-fi
-
-
-# Work around bugs in pre-3.0 UWIN ksh.
-$as_unset ENV MAIL MAILPATH
-PS1='$ '
-PS2='> '
-PS4='+ '
-
-# NLS nuisances.
-for as_var in \
- LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
- LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
- LC_TELEPHONE LC_TIME
-do
- if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
- eval $as_var=C; export $as_var
- else
- $as_unset $as_var
- fi
-done
-
-# Required to use basename.
-if expr a : '\(a\)' >/dev/null 2>&1; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
- as_basename=basename
-else
- as_basename=false
-fi
-
-
-# Name of the executable.
-as_me=`$as_basename "$0" ||
-$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)$' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X/"$0" |
- sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
- /^X\/\(\/\/\)$/{ s//\1/; q; }
- /^X\/\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
-
-
-# PATH needs CR, and LINENO needs CR and PATH.
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
-
-# The user is always right.
-if test "${PATH_SEPARATOR+set}" != set; then
- echo "#! /bin/sh" >conf$$.sh
- echo "exit 0" >>conf$$.sh
- chmod +x conf$$.sh
- if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
- PATH_SEPARATOR=';'
- else
- PATH_SEPARATOR=:
- fi
- rm -f conf$$.sh
-fi
-
-
- as_lineno_1=$LINENO
- as_lineno_2=$LINENO
- as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
- test "x$as_lineno_1" != "x$as_lineno_2" &&
- test "x$as_lineno_3" = "x$as_lineno_2" || {
- # Find who we are. Look in the path if we contain no path at all
- # relative or not.
- case $0 in
- *[\\/]* ) as_myself=$0 ;;
- *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
-done
-
- ;;
- esac
- # We did not find ourselves, most probably we were run as `sh COMMAND'
- # in which case we are not to be found in the path.
- if test "x$as_myself" = x; then
- as_myself=$0
- fi
- if test ! -f "$as_myself"; then
- { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2
- { (exit 1); exit 1; }; }
- fi
- case $CONFIG_SHELL in
- '')
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for as_base in sh bash ksh sh5; do
- case $as_dir in
- /*)
- if ("$as_dir/$as_base" -c '
- as_lineno_1=$LINENO
- as_lineno_2=$LINENO
- as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
- test "x$as_lineno_1" != "x$as_lineno_2" &&
- test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
- $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
- $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
- CONFIG_SHELL=$as_dir/$as_base
- export CONFIG_SHELL
- exec "$CONFIG_SHELL" "$0" ${1+"$@"}
- fi;;
- esac
- done
-done
-;;
- esac
-
- # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
- # uniformly replaced by the line number. The first 'sed' inserts a
- # line-number line before each line; the second 'sed' does the real
- # work. The second script uses 'N' to pair each line-number line
- # with the numbered line, and appends trailing '-' during
- # substitution so that $LINENO is not a special case at line end.
- # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
- # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
- sed '=' <$as_myself |
- sed '
- N
- s,$,-,
- : loop
- s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
- t loop
- s,-$,,
- s,^['$as_cr_digits']*\n,,
- ' >$as_me.lineno &&
- chmod +x $as_me.lineno ||
- { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
- { (exit 1); exit 1; }; }
-
- # Don't try to exec as it changes $[0], causing all sort of problems
- # (the dirname of $[0] is not the place where we might find the
- # original and so on. Autoconf is especially sensible to this).
- . ./$as_me.lineno
- # Exit status is that of the last command.
- exit
-}
-
-
-case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
- *c*,-n*) ECHO_N= ECHO_C='
-' ECHO_T=' ' ;;
- *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
- *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
-esac
-
-if expr a : '\(a\)' >/dev/null 2>&1; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-rm -f conf$$ conf$$.exe conf$$.file
-echo >conf$$.file
-if ln -s conf$$.file conf$$ 2>/dev/null; then
- # We could just check for DJGPP; but this test a) works b) is more generic
- # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
- if test -f conf$$.exe; then
- # Don't use ln at all; we don't have any links
- as_ln_s='cp -p'
- else
- as_ln_s='ln -s'
- fi
-elif ln conf$$.file conf$$ 2>/dev/null; then
- as_ln_s=ln
-else
- as_ln_s='cp -p'
-fi
-rm -f conf$$ conf$$.exe conf$$.file
-
-if mkdir -p . 2>/dev/null; then
- as_mkdir_p=:
-else
- test -d ./-p && rmdir ./-p
- as_mkdir_p=false
-fi
-
-as_executable_p="test -f"
-
-# Sed expression to map a string onto a valid CPP name.
-as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
-
-# Sed expression to map a string onto a valid variable name.
-as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
-
-
-# IFS
-# We need space, tab and new line, in precisely that order.
-as_nl='
-'
-IFS=" $as_nl"
-
-# CDPATH.
-$as_unset CDPATH
-
-
-# Name of the host.
-# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
-# so uname gets run too.
-ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
-
-exec 6>&1
-
-#
-# Initializations.
-#
-ac_default_prefix=/usr/local
-ac_config_libobj_dir=.
-cross_compiling=no
-subdirs=
-MFLAGS=
-MAKEFLAGS=
-SHELL=${CONFIG_SHELL-/bin/sh}
-
-# Maximum number of lines to put in a shell here document.
-# This variable seems obsolete. It should probably be removed, and
-# only ac_max_sed_lines should be used.
-: ${ac_max_here_lines=38}
-
-# Identity of this package.
-PACKAGE_NAME=
-PACKAGE_TARNAME=
-PACKAGE_VERSION=
-PACKAGE_STRING=
-PACKAGE_BUGREPORT=
-
-ac_unique_file="src"
-# Factoring default headers for most tests.
-ac_includes_default="\
-#include <stdio.h>
-#if HAVE_SYS_TYPES_H
-# include <sys/types.h>
-#endif
-#if HAVE_SYS_STAT_H
-# include <sys/stat.h>
-#endif
-#if STDC_HEADERS
-# include <stdlib.h>
-# include <stddef.h>
-#else
-# if HAVE_STDLIB_H
-# include <stdlib.h>
-# endif
-#endif
-#if HAVE_STRING_H
-# if !STDC_HEADERS && HAVE_MEMORY_H
-# include <memory.h>
-# endif
-# include <string.h>
-#endif
-#if HAVE_STRINGS_H
-# include <strings.h>
-#endif
-#if HAVE_INTTYPES_H
-# include <inttypes.h>
-#else
-# if HAVE_STDINT_H
-# include <stdint.h>
-# endif
-#endif
-#if HAVE_UNISTD_H
-# include <unistd.h>
-#endif"
-
-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar CXX CXXFLAGS LDFLAGS CPPFLAGS ac_ct_CXX EXEEXT OBJEXT DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE RANLIB ac_ct_RANLIB CXXCPP EGREP MYSQLCLIENT_LDFLAGS MYSQLCLIENT_CPPFLAGS MYSQLCLIENT_LIBS BUILD_MYSQL_SUPPORT_TRUE BUILD_MYSQL_SUPPORT_FALSE SRI_LM_TRUE SRI_LM_FALSE INTERNAL_LM_TRUE INTERNAL_LM_FALSE IRST_LM_TRUE IRST_LM_FALSE LIBOBJS LTLIBOBJS'
-ac_subst_files=''
-
-# Initialize some variables set by options.
-ac_init_help=
-ac_init_version=false
-# The variables have the same names as the options, with
-# dashes changed to underlines.
-cache_file=/dev/null
-exec_prefix=NONE
-no_create=
-no_recursion=
-prefix=NONE
-program_prefix=NONE
-program_suffix=NONE
-program_transform_name=s,x,x,
-silent=
-site=
-srcdir=
-verbose=
-x_includes=NONE
-x_libraries=NONE
-
-# Installation directory options.
-# These are left unexpanded so users can "make install exec_prefix=/foo"
-# and all the variables that are supposed to be based on exec_prefix
-# by default will actually change.
-# Use braces instead of parens because sh, perl, etc. also accept them.
-bindir='${exec_prefix}/bin'
-sbindir='${exec_prefix}/sbin'
-libexecdir='${exec_prefix}/libexec'
-datadir='${prefix}/share'
-sysconfdir='${prefix}/etc'
-sharedstatedir='${prefix}/com'
-localstatedir='${prefix}/var'
-libdir='${exec_prefix}/lib'
-includedir='${prefix}/include'
-oldincludedir='/usr/include'
-infodir='${prefix}/info'
-mandir='${prefix}/man'
-
-ac_prev=
-for ac_option
-do
- # If the previous option needs an argument, assign it.
- if test -n "$ac_prev"; then
- eval "$ac_prev=\$ac_option"
- ac_prev=
- continue
- fi
-
- ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'`
-
- # Accept the important Cygnus configure options, so we can diagnose typos.
-
- case $ac_option in
-
- -bindir | --bindir | --bindi | --bind | --bin | --bi)
- ac_prev=bindir ;;
- -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
- bindir=$ac_optarg ;;
-
- -build | --build | --buil | --bui | --bu)
- ac_prev=build_alias ;;
- -build=* | --build=* | --buil=* | --bui=* | --bu=*)
- build_alias=$ac_optarg ;;
-
- -cache-file | --cache-file | --cache-fil | --cache-fi \
- | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
- ac_prev=cache_file ;;
- -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
- | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
- cache_file=$ac_optarg ;;
-
- --config-cache | -C)
- cache_file=config.cache ;;
-
- -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
- ac_prev=datadir ;;
- -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
- | --da=*)
- datadir=$ac_optarg ;;
-
- -disable-* | --disable-*)
- ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid feature name: $ac_feature" >&2
- { (exit 1); exit 1; }; }
- ac_feature=`echo $ac_feature | sed 's/-/_/g'`
- eval "enable_$ac_feature=no" ;;
-
- -enable-* | --enable-*)
- ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid feature name: $ac_feature" >&2
- { (exit 1); exit 1; }; }
- ac_feature=`echo $ac_feature | sed 's/-/_/g'`
- case $ac_option in
- *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
- *) ac_optarg=yes ;;
- esac
- eval "enable_$ac_feature='$ac_optarg'" ;;
-
- -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
- | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
- | --exec | --exe | --ex)
- ac_prev=exec_prefix ;;
- -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
- | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
- | --exec=* | --exe=* | --ex=*)
- exec_prefix=$ac_optarg ;;
-
- -gas | --gas | --ga | --g)
- # Obsolete; use --with-gas.
- with_gas=yes ;;
-
- -help | --help | --hel | --he | -h)
- ac_init_help=long ;;
- -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
- ac_init_help=recursive ;;
- -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
- ac_init_help=short ;;
-
- -host | --host | --hos | --ho)
- ac_prev=host_alias ;;
- -host=* | --host=* | --hos=* | --ho=*)
- host_alias=$ac_optarg ;;
-
- -includedir | --includedir | --includedi | --included | --include \
- | --includ | --inclu | --incl | --inc)
- ac_prev=includedir ;;
- -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
- | --includ=* | --inclu=* | --incl=* | --inc=*)
- includedir=$ac_optarg ;;
-
- -infodir | --infodir | --infodi | --infod | --info | --inf)
- ac_prev=infodir ;;
- -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
- infodir=$ac_optarg ;;
-
- -libdir | --libdir | --libdi | --libd)
- ac_prev=libdir ;;
- -libdir=* | --libdir=* | --libdi=* | --libd=*)
- libdir=$ac_optarg ;;
-
- -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
- | --libexe | --libex | --libe)
- ac_prev=libexecdir ;;
- -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
- | --libexe=* | --libex=* | --libe=*)
- libexecdir=$ac_optarg ;;
-
- -localstatedir | --localstatedir | --localstatedi | --localstated \
- | --localstate | --localstat | --localsta | --localst \
- | --locals | --local | --loca | --loc | --lo)
- ac_prev=localstatedir ;;
- -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
- | --localstate=* | --localstat=* | --localsta=* | --localst=* \
- | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
- localstatedir=$ac_optarg ;;
-
- -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
- ac_prev=mandir ;;
- -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
- mandir=$ac_optarg ;;
-
- -nfp | --nfp | --nf)
- # Obsolete; use --without-fp.
- with_fp=no ;;
-
- -no-create | --no-create | --no-creat | --no-crea | --no-cre \
- | --no-cr | --no-c | -n)
- no_create=yes ;;
-
- -no-recursion | --no-recursion | --no-recursio | --no-recursi \
- | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
- no_recursion=yes ;;
-
- -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
- | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
- | --oldin | --oldi | --old | --ol | --o)
- ac_prev=oldincludedir ;;
- -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
- | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
- | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
- oldincludedir=$ac_optarg ;;
-
- -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
- ac_prev=prefix ;;
- -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
- prefix=$ac_optarg ;;
-
- -program-prefix | --program-prefix | --program-prefi | --program-pref \
- | --program-pre | --program-pr | --program-p)
- ac_prev=program_prefix ;;
- -program-prefix=* | --program-prefix=* | --program-prefi=* \
- | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
- program_prefix=$ac_optarg ;;
-
- -program-suffix | --program-suffix | --program-suffi | --program-suff \
- | --program-suf | --program-su | --program-s)
- ac_prev=program_suffix ;;
- -program-suffix=* | --program-suffix=* | --program-suffi=* \
- | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
- program_suffix=$ac_optarg ;;
-
- -program-transform-name | --program-transform-name \
- | --program-transform-nam | --program-transform-na \
- | --program-transform-n | --program-transform- \
- | --program-transform | --program-transfor \
- | --program-transfo | --program-transf \
- | --program-trans | --program-tran \
- | --progr-tra | --program-tr | --program-t)
- ac_prev=program_transform_name ;;
- -program-transform-name=* | --program-transform-name=* \
- | --program-transform-nam=* | --program-transform-na=* \
- | --program-transform-n=* | --program-transform-=* \
- | --program-transform=* | --program-transfor=* \
- | --program-transfo=* | --program-transf=* \
- | --program-trans=* | --program-tran=* \
- | --progr-tra=* | --program-tr=* | --program-t=*)
- program_transform_name=$ac_optarg ;;
-
- -q | -quiet | --quiet | --quie | --qui | --qu | --q \
- | -silent | --silent | --silen | --sile | --sil)
- silent=yes ;;
-
- -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
- ac_prev=sbindir ;;
- -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
- | --sbi=* | --sb=*)
- sbindir=$ac_optarg ;;
-
- -sharedstatedir | --sharedstatedir | --sharedstatedi \
- | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
- | --sharedst | --shareds | --shared | --share | --shar \
- | --sha | --sh)
- ac_prev=sharedstatedir ;;
- -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
- | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
- | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
- | --sha=* | --sh=*)
- sharedstatedir=$ac_optarg ;;
-
- -site | --site | --sit)
- ac_prev=site ;;
- -site=* | --site=* | --sit=*)
- site=$ac_optarg ;;
-
- -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
- ac_prev=srcdir ;;
- -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
- srcdir=$ac_optarg ;;
-
- -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
- | --syscon | --sysco | --sysc | --sys | --sy)
- ac_prev=sysconfdir ;;
- -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
- | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
- sysconfdir=$ac_optarg ;;
-
- -target | --target | --targe | --targ | --tar | --ta | --t)
- ac_prev=target_alias ;;
- -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
- target_alias=$ac_optarg ;;
-
- -v | -verbose | --verbose | --verbos | --verbo | --verb)
- verbose=yes ;;
-
- -version | --version | --versio | --versi | --vers | -V)
- ac_init_version=: ;;
-
- -with-* | --with-*)
- ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid package name: $ac_package" >&2
- { (exit 1); exit 1; }; }
- ac_package=`echo $ac_package| sed 's/-/_/g'`
- case $ac_option in
- *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
- *) ac_optarg=yes ;;
- esac
- eval "with_$ac_package='$ac_optarg'" ;;
-
- -without-* | --without-*)
- ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid package name: $ac_package" >&2
- { (exit 1); exit 1; }; }
- ac_package=`echo $ac_package | sed 's/-/_/g'`
- eval "with_$ac_package=no" ;;
-
- --x)
- # Obsolete; use --with-x.
- with_x=yes ;;
-
- -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
- | --x-incl | --x-inc | --x-in | --x-i)
- ac_prev=x_includes ;;
- -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
- | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
- x_includes=$ac_optarg ;;
-
- -x-libraries | --x-libraries | --x-librarie | --x-librari \
- | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
- ac_prev=x_libraries ;;
- -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
- | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
- x_libraries=$ac_optarg ;;
-
- -*) { echo "$as_me: error: unrecognized option: $ac_option
-Try \`$0 --help' for more information." >&2
- { (exit 1); exit 1; }; }
- ;;
-
- *=*)
- ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
- # Reject names that are not valid shell variable names.
- expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
- { (exit 1); exit 1; }; }
- ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
- eval "$ac_envvar='$ac_optarg'"
- export $ac_envvar ;;
-
- *)
- # FIXME: should be removed in autoconf 3.0.
- echo "$as_me: WARNING: you should use --build, --host, --target" >&2
- expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
- echo "$as_me: WARNING: invalid host type: $ac_option" >&2
- : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
- ;;
-
- esac
-done
-
-if test -n "$ac_prev"; then
- ac_option=--`echo $ac_prev | sed 's/_/-/g'`
- { echo "$as_me: error: missing argument to $ac_option" >&2
- { (exit 1); exit 1; }; }
-fi
-
-# Be sure to have absolute paths.
-for ac_var in exec_prefix prefix
-do
- eval ac_val=$`echo $ac_var`
- case $ac_val in
- [\\/$]* | ?:[\\/]* | NONE | '' ) ;;
- *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
- { (exit 1); exit 1; }; };;
- esac
-done
-
-# Be sure to have absolute paths.
-for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \
- localstatedir libdir includedir oldincludedir infodir mandir
-do
- eval ac_val=$`echo $ac_var`
- case $ac_val in
- [\\/$]* | ?:[\\/]* ) ;;
- *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
- { (exit 1); exit 1; }; };;
- esac
-done
-
-# There might be people who depend on the old broken behavior: `$host'
-# used to hold the argument of --host etc.
-# FIXME: To remove some day.
-build=$build_alias
-host=$host_alias
-target=$target_alias
-
-# FIXME: To remove some day.
-if test "x$host_alias" != x; then
- if test "x$build_alias" = x; then
- cross_compiling=maybe
- echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
- If a cross compiler is detected then cross compile mode will be used." >&2
- elif test "x$build_alias" != "x$host_alias"; then
- cross_compiling=yes
- fi
-fi
-
-ac_tool_prefix=
-test -n "$host_alias" && ac_tool_prefix=$host_alias-
-
-test "$silent" = yes && exec 6>/dev/null
-
-
-# Find the source files, if location was not specified.
-if test -z "$srcdir"; then
- ac_srcdir_defaulted=yes
- # Try the directory containing this script, then its parent.
- ac_confdir=`(dirname "$0") 2>/dev/null ||
-$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$0" : 'X\(//\)[^/]' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$0" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- srcdir=$ac_confdir
- if test ! -r $srcdir/$ac_unique_file; then
- srcdir=..
- fi
-else
- ac_srcdir_defaulted=no
-fi
-if test ! -r $srcdir/$ac_unique_file; then
- if test "$ac_srcdir_defaulted" = yes; then
- { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2
- { (exit 1); exit 1; }; }
- else
- { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
- { (exit 1); exit 1; }; }
- fi
-fi
-(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null ||
- { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2
- { (exit 1); exit 1; }; }
-srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'`
-ac_env_build_alias_set=${build_alias+set}
-ac_env_build_alias_value=$build_alias
-ac_cv_env_build_alias_set=${build_alias+set}
-ac_cv_env_build_alias_value=$build_alias
-ac_env_host_alias_set=${host_alias+set}
-ac_env_host_alias_value=$host_alias
-ac_cv_env_host_alias_set=${host_alias+set}
-ac_cv_env_host_alias_value=$host_alias
-ac_env_target_alias_set=${target_alias+set}
-ac_env_target_alias_value=$target_alias
-ac_cv_env_target_alias_set=${target_alias+set}
-ac_cv_env_target_alias_value=$target_alias
-ac_env_CXX_set=${CXX+set}
-ac_env_CXX_value=$CXX
-ac_cv_env_CXX_set=${CXX+set}
-ac_cv_env_CXX_value=$CXX
-ac_env_CXXFLAGS_set=${CXXFLAGS+set}
-ac_env_CXXFLAGS_value=$CXXFLAGS
-ac_cv_env_CXXFLAGS_set=${CXXFLAGS+set}
-ac_cv_env_CXXFLAGS_value=$CXXFLAGS
-ac_env_LDFLAGS_set=${LDFLAGS+set}
-ac_env_LDFLAGS_value=$LDFLAGS
-ac_cv_env_LDFLAGS_set=${LDFLAGS+set}
-ac_cv_env_LDFLAGS_value=$LDFLAGS
-ac_env_CPPFLAGS_set=${CPPFLAGS+set}
-ac_env_CPPFLAGS_value=$CPPFLAGS
-ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set}
-ac_cv_env_CPPFLAGS_value=$CPPFLAGS
-ac_env_CXXCPP_set=${CXXCPP+set}
-ac_env_CXXCPP_value=$CXXCPP
-ac_cv_env_CXXCPP_set=${CXXCPP+set}
-ac_cv_env_CXXCPP_value=$CXXCPP
-
-#
-# Report the --help message.
-#
-if test "$ac_init_help" = "long"; then
- # Omit some internal or obsolete options to make the list less imposing.
- # This message is too long to be a string in the A/UX 3.1 sh.
- cat <<_ACEOF
-\`configure' configures this package to adapt to many kinds of systems.
-
-Usage: $0 [OPTION]... [VAR=VALUE]...
-
-To assign environment variables (e.g., CC, CFLAGS...), specify them as
-VAR=VALUE. See below for descriptions of some of the useful variables.
-
-Defaults for the options are specified in brackets.
-
-Configuration:
- -h, --help display this help and exit
- --help=short display options specific to this package
- --help=recursive display the short help of all the included packages
- -V, --version display version information and exit
- -q, --quiet, --silent do not print \`checking...' messages
- --cache-file=FILE cache test results in FILE [disabled]
- -C, --config-cache alias for \`--cache-file=config.cache'
- -n, --no-create do not create output files
- --srcdir=DIR find the sources in DIR [configure dir or \`..']
-
-_ACEOF
-
- cat <<_ACEOF
-Installation directories:
- --prefix=PREFIX install architecture-independent files in PREFIX
- [$ac_default_prefix]
- --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
- [PREFIX]
-
-By default, \`make install' will install all the files in
-\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
-an installation prefix other than \`$ac_default_prefix' using \`--prefix',
-for instance \`--prefix=\$HOME'.
-
-For better control, use the options below.
-
-Fine tuning of the installation directories:
- --bindir=DIR user executables [EPREFIX/bin]
- --sbindir=DIR system admin executables [EPREFIX/sbin]
- --libexecdir=DIR program executables [EPREFIX/libexec]
- --datadir=DIR read-only architecture-independent data [PREFIX/share]
- --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
- --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
- --localstatedir=DIR modifiable single-machine data [PREFIX/var]
- --libdir=DIR object code libraries [EPREFIX/lib]
- --includedir=DIR C header files [PREFIX/include]
- --oldincludedir=DIR C header files for non-gcc [/usr/include]
- --infodir=DIR info documentation [PREFIX/info]
- --mandir=DIR man documentation [PREFIX/man]
-_ACEOF
-
- cat <<\_ACEOF
-
-Program names:
- --program-prefix=PREFIX prepend PREFIX to installed program names
- --program-suffix=SUFFIX append SUFFIX to installed program names
- --program-transform-name=PROGRAM run sed PROGRAM on installed program names
-_ACEOF
-fi
-
-if test -n "$ac_init_help"; then
-
- cat <<\_ACEOF
-
-Optional Features:
- --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
- --enable-FEATURE[=ARG] include FEATURE [ARG=yes]
- --disable-dependency-tracking speeds up one-time build
- --enable-dependency-tracking do not reject slow dependency extractors
- --enable-profiling moses will dump profiling info
- --enable-optimization compile with -O3 flag
- --enable-mysql (optional) build in MySQL support]
-
-Optional Packages:
- --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
- --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
- --with-srilm=PATH (optional) path to SRI's LM toolkit
- --with-boost=PATH path to BOOST libraries
- --with-irstlm=PATH (optional) path to IRST's LM toolkit
- --with-moses=PATH path to moses library
- --with-mysqlclient-prefix=PFX Prefix where mysqlclient is
-installed
- --with-mysqlclient-include=DIR Directory pointing
- to mysqlclient include files
- --with-mysqlclient-lib=LIB Directory pointing to mysqlclient library
- (Note: -include and -lib do override
- paths found with -prefix)
-
- --with-mysqlpp=<path> path containing MySQL++ header and library subdirs
- --with-mysqlpp-lib=<path> directory path of MySQL++ library
- --with-mysqlpp-include=<path> directory path of MySQL++ headers
-
-Some influential environment variables:
- CXX C++ compiler command
- CXXFLAGS C++ compiler flags
- LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
- nonstandard directory <lib dir>
- CPPFLAGS C/C++ preprocessor flags, e.g. -I<include dir> if you have
- headers in a nonstandard directory <include dir>
- CXXCPP C++ preprocessor
-
-Use these variables to override the choices made by `configure' or to help
-it to find libraries and programs with nonstandard names/locations.
-
-_ACEOF
-fi
-
-if test "$ac_init_help" = "recursive"; then
- # If there are subdirs, report their specific --help.
- ac_popdir=`pwd`
- for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
- test -d $ac_dir || continue
- ac_builddir=.
-
-if test "$ac_dir" != .; then
- ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
- # A "../" for each directory in $ac_dir_suffix.
- ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
-else
- ac_dir_suffix= ac_top_builddir=
-fi
-
-case $srcdir in
- .) # No --srcdir option. We are building in place.
- ac_srcdir=.
- if test -z "$ac_top_builddir"; then
- ac_top_srcdir=.
- else
- ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
- fi ;;
- [\\/]* | ?:[\\/]* ) # Absolute path.
- ac_srcdir=$srcdir$ac_dir_suffix;
- ac_top_srcdir=$srcdir ;;
- *) # Relative path.
- ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
- ac_top_srcdir=$ac_top_builddir$srcdir ;;
-esac
-
-# Do not use `cd foo && pwd` to compute absolute paths, because
-# the directories may not exist.
-case `pwd` in
-.) ac_abs_builddir="$ac_dir";;
-*)
- case "$ac_dir" in
- .) ac_abs_builddir=`pwd`;;
- [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
- *) ac_abs_builddir=`pwd`/"$ac_dir";;
- esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_builddir=${ac_top_builddir}.;;
-*)
- case ${ac_top_builddir}. in
- .) ac_abs_top_builddir=$ac_abs_builddir;;
- [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
- *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
- esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_srcdir=$ac_srcdir;;
-*)
- case $ac_srcdir in
- .) ac_abs_srcdir=$ac_abs_builddir;;
- [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
- *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
- esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_srcdir=$ac_top_srcdir;;
-*)
- case $ac_top_srcdir in
- .) ac_abs_top_srcdir=$ac_abs_builddir;;
- [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
- *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
- esac;;
-esac
-
- cd $ac_dir
- # Check for guested configure; otherwise get Cygnus style configure.
- if test -f $ac_srcdir/configure.gnu; then
- echo
- $SHELL $ac_srcdir/configure.gnu --help=recursive
- elif test -f $ac_srcdir/configure; then
- echo
- $SHELL $ac_srcdir/configure --help=recursive
- elif test -f $ac_srcdir/configure.ac ||
- test -f $ac_srcdir/configure.in; then
- echo
- $ac_configure --help
- else
- echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
- fi
- cd $ac_popdir
- done
-fi
-
-test -n "$ac_init_help" && exit 0
-if $ac_init_version; then
- cat <<\_ACEOF
-
-Copyright (C) 2003 Free Software Foundation, Inc.
-This configure script is free software; the Free Software Foundation
-gives unlimited permission to copy, distribute and modify it.
-_ACEOF
- exit 0
-fi
-exec 5>config.log
-cat >&5 <<_ACEOF
-This file contains any messages produced by compilers while
-running configure, to aid debugging if configure makes a mistake.
-
-It was created by $as_me, which was
-generated by GNU Autoconf 2.59. Invocation command line was
-
- $ $0 $@
-
-_ACEOF
-{
-cat <<_ASUNAME
-## --------- ##
-## Platform. ##
-## --------- ##
-
-hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
-
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
-/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
-
-/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
-/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
-hostinfo = `(hostinfo) 2>/dev/null || echo unknown`
-/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
-/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
-/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
-
-_ASUNAME
-
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- echo "PATH: $as_dir"
-done
-
-} >&5
-
-cat >&5 <<_ACEOF
-
-
-## ----------- ##
-## Core tests. ##
-## ----------- ##
-
-_ACEOF
-
-
-# Keep a trace of the command line.
-# Strip out --no-create and --no-recursion so they do not pile up.
-# Strip out --silent because we don't want to record it for future runs.
-# Also quote any args containing shell meta-characters.
-# Make two passes to allow for proper duplicate-argument suppression.
-ac_configure_args=
-ac_configure_args0=
-ac_configure_args1=
-ac_sep=
-ac_must_keep_next=false
-for ac_pass in 1 2
-do
- for ac_arg
- do
- case $ac_arg in
- -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
- -q | -quiet | --quiet | --quie | --qui | --qu | --q \
- | -silent | --silent | --silen | --sile | --sil)
- continue ;;
- *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
- ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
- esac
- case $ac_pass in
- 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
- 2)
- ac_configure_args1="$ac_configure_args1 '$ac_arg'"
- if test $ac_must_keep_next = true; then
- ac_must_keep_next=false # Got value, back to normal.
- else
- case $ac_arg in
- *=* | --config-cache | -C | -disable-* | --disable-* \
- | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
- | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
- | -with-* | --with-* | -without-* | --without-* | --x)
- case "$ac_configure_args0 " in
- "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
- esac
- ;;
- -* ) ac_must_keep_next=true ;;
- esac
- fi
- ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'"
- # Get rid of the leading space.
- ac_sep=" "
- ;;
- esac
- done
-done
-$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
-$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
-
-# When interrupted or exit'd, cleanup temporary files, and complete
-# config.log. We remove comments because anyway the quotes in there
-# would cause problems or look ugly.
-# WARNING: Be sure not to use single quotes in there, as some shells,
-# such as our DU 5.0 friend, will then `close' the trap.
-trap 'exit_status=$?
- # Save into config.log some information that might help in debugging.
- {
- echo
-
- cat <<\_ASBOX
-## ---------------- ##
-## Cache variables. ##
-## ---------------- ##
-_ASBOX
- echo
- # The following way of writing the cache mishandles newlines in values,
-{
- (set) 2>&1 |
- case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in
- *ac_space=\ *)
- sed -n \
- "s/'"'"'/'"'"'\\\\'"'"''"'"'/g;
- s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p"
- ;;
- *)
- sed -n \
- "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
- ;;
- esac;
-}
- echo
-
- cat <<\_ASBOX
-## ----------------- ##
-## Output variables. ##
-## ----------------- ##
-_ASBOX
- echo
- for ac_var in $ac_subst_vars
- do
- eval ac_val=$`echo $ac_var`
- echo "$ac_var='"'"'$ac_val'"'"'"
- done | sort
- echo
-
- if test -n "$ac_subst_files"; then
- cat <<\_ASBOX
-## ------------- ##
-## Output files. ##
-## ------------- ##
-_ASBOX
- echo
- for ac_var in $ac_subst_files
- do
- eval ac_val=$`echo $ac_var`
- echo "$ac_var='"'"'$ac_val'"'"'"
- done | sort
- echo
- fi
-
- if test -s confdefs.h; then
- cat <<\_ASBOX
-## ----------- ##
-## confdefs.h. ##
-## ----------- ##
-_ASBOX
- echo
- sed "/^$/d" confdefs.h | sort
- echo
- fi
- test "$ac_signal" != 0 &&
- echo "$as_me: caught signal $ac_signal"
- echo "$as_me: exit $exit_status"
- } >&5
- rm -f core *.core &&
- rm -rf conftest* confdefs* conf$$* $ac_clean_files &&
- exit $exit_status
- ' 0
-for ac_signal in 1 2 13 15; do
- trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
-done
-ac_signal=0
-
-# confdefs.h avoids OS command line length limits that DEFS can exceed.
-rm -rf conftest* confdefs.h
-# AIX cpp loses on an empty file, so make sure it contains at least a newline.
-echo >confdefs.h
-
-# Predefined preprocessor variables.
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_NAME "$PACKAGE_NAME"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_VERSION "$PACKAGE_VERSION"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_STRING "$PACKAGE_STRING"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
-_ACEOF
-
-
-# Let the site file select an alternate cache file if it wants to.
-# Prefer explicitly selected file to automatically selected ones.
-if test -z "$CONFIG_SITE"; then
- if test "x$prefix" != xNONE; then
- CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
- else
- CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
- fi
-fi
-for ac_site_file in $CONFIG_SITE; do
- if test -r "$ac_site_file"; then
- { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
-echo "$as_me: loading site script $ac_site_file" >&6;}
- sed 's/^/| /' "$ac_site_file" >&5
- . "$ac_site_file"
- fi
-done
-
-if test -r "$cache_file"; then
- # Some versions of bash will fail to source /dev/null (special
- # files actually), so we avoid doing that.
- if test -f "$cache_file"; then
- { echo "$as_me:$LINENO: loading cache $cache_file" >&5
-echo "$as_me: loading cache $cache_file" >&6;}
- case $cache_file in
- [\\/]* | ?:[\\/]* ) . $cache_file;;
- *) . ./$cache_file;;
- esac
- fi
-else
- { echo "$as_me:$LINENO: creating cache $cache_file" >&5
-echo "$as_me: creating cache $cache_file" >&6;}
- >$cache_file
-fi
-
-# Check that the precious variables saved in the cache have kept the same
-# value.
-ac_cache_corrupted=false
-for ac_var in `(set) 2>&1 |
- sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do
- eval ac_old_set=\$ac_cv_env_${ac_var}_set
- eval ac_new_set=\$ac_env_${ac_var}_set
- eval ac_old_val="\$ac_cv_env_${ac_var}_value"
- eval ac_new_val="\$ac_env_${ac_var}_value"
- case $ac_old_set,$ac_new_set in
- set,)
- { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
-echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
- ac_cache_corrupted=: ;;
- ,set)
- { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
-echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
- ac_cache_corrupted=: ;;
- ,);;
- *)
- if test "x$ac_old_val" != "x$ac_new_val"; then
- { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
-echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
- { echo "$as_me:$LINENO: former value: $ac_old_val" >&5
-echo "$as_me: former value: $ac_old_val" >&2;}
- { echo "$as_me:$LINENO: current value: $ac_new_val" >&5
-echo "$as_me: current value: $ac_new_val" >&2;}
- ac_cache_corrupted=:
- fi;;
- esac
- # Pass precious variables to config.status.
- if test "$ac_new_set" = set; then
- case $ac_new_val in
- *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
- ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
- *) ac_arg=$ac_var=$ac_new_val ;;
- esac
- case " $ac_configure_args " in
- *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
- *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
- esac
- fi
-done
-if $ac_cache_corrupted; then
- { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
-echo "$as_me: error: changes in the environment can compromise the build" >&2;}
- { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
-echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ac_config_headers="$ac_config_headers config.h"
-
-am__api_version="1.9"
-ac_aux_dir=
-for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
- if test -f $ac_dir/install-sh; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/install-sh -c"
- break
- elif test -f $ac_dir/install.sh; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/install.sh -c"
- break
- elif test -f $ac_dir/shtool; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/shtool install -c"
- break
- fi
-done
-if test -z "$ac_aux_dir"; then
- { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." >&5
-echo "$as_me: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." >&2;}
- { (exit 1); exit 1; }; }
-fi
-ac_config_guess="$SHELL $ac_aux_dir/config.guess"
-ac_config_sub="$SHELL $ac_aux_dir/config.sub"
-ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure.
-
-# Find a good install program. We prefer a C program (faster),
-# so one script is as good as another. But avoid the broken or
-# incompatible versions:
-# SysV /etc/install, /usr/sbin/install
-# SunOS /usr/etc/install
-# IRIX /sbin/install
-# AIX /bin/install
-# AmigaOS /C/install, which installs bootblocks on floppy discs
-# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
-# AFS /usr/afsws/bin/install, which mishandles nonexistent args
-# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
-# OS/2's system install, which has a completely different semantic
-# ./install, which can be erroneously created by make from ./install.sh.
-echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5
-echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6
-if test -z "$INSTALL"; then
-if test "${ac_cv_path_install+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- # Account for people who put trailing slashes in PATH elements.
-case $as_dir/ in
- ./ | .// | /cC/* | \
- /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
- ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \
- /usr/ucb/* ) ;;
- *)
- # OSF1 and SCO ODT 3.0 have their own names for install.
- # Don't use installbsd from OSF since it installs stuff as root
- # by default.
- for ac_prog in ginstall scoinst install; do
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
- if test $ac_prog = install &&
- grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
- # AIX install. It has an incompatible calling convention.
- :
- elif test $ac_prog = install &&
- grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
- # program-specific install script used by HP pwplus--don't use.
- :
- else
- ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
- break 3
- fi
- fi
- done
- done
- ;;
-esac
-done
-
-
-fi
- if test "${ac_cv_path_install+set}" = set; then
- INSTALL=$ac_cv_path_install
- else
- # As a last resort, use the slow shell script. We don't cache a
- # path for INSTALL within a source directory, because that will
- # break other packages using the cache if that directory is
- # removed, or if the path is relative.
- INSTALL=$ac_install_sh
- fi
-fi
-echo "$as_me:$LINENO: result: $INSTALL" >&5
-echo "${ECHO_T}$INSTALL" >&6
-
-# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
-# It thinks the first close brace ends the variable substitution.
-test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
-
-test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
-
-test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
-
-echo "$as_me:$LINENO: checking whether build environment is sane" >&5
-echo $ECHO_N "checking whether build environment is sane... $ECHO_C" >&6
-# Just in case
-sleep 1
-echo timestamp > conftest.file
-# Do `set' in a subshell so we don't clobber the current shell's
-# arguments. Must try -L first in case configure is actually a
-# symlink; some systems play weird games with the mod time of symlinks
-# (eg FreeBSD returns the mod time of the symlink's containing
-# directory).
-if (
- set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null`
- if test "$*" = "X"; then
- # -L didn't work.
- set X `ls -t $srcdir/configure conftest.file`
- fi
- rm -f conftest.file
- if test "$*" != "X $srcdir/configure conftest.file" \
- && test "$*" != "X conftest.file $srcdir/configure"; then
-
- # If neither matched, then we have a broken ls. This can happen
- # if, for instance, CONFIG_SHELL is bash and it inherits a
- # broken ls alias from the environment. This has actually
- # happened. Such a system could not be considered "sane".
- { { echo "$as_me:$LINENO: error: ls -t appears to fail. Make sure there is not a broken
-alias in your environment" >&5
-echo "$as_me: error: ls -t appears to fail. Make sure there is not a broken
-alias in your environment" >&2;}
- { (exit 1); exit 1; }; }
- fi
-
- test "$2" = conftest.file
- )
-then
- # Ok.
- :
-else
- { { echo "$as_me:$LINENO: error: newly created file is older than distributed files!
-Check your system clock" >&5
-echo "$as_me: error: newly created file is older than distributed files!
-Check your system clock" >&2;}
- { (exit 1); exit 1; }; }
-fi
-echo "$as_me:$LINENO: result: yes" >&5
-echo "${ECHO_T}yes" >&6
-test "$program_prefix" != NONE &&
- program_transform_name="s,^,$program_prefix,;$program_transform_name"
-# Use a double $ so make ignores it.
-test "$program_suffix" != NONE &&
- program_transform_name="s,\$,$program_suffix,;$program_transform_name"
-# Double any \ or $. echo might interpret backslashes.
-# By default was `s,x,x', remove it if useless.
-cat <<\_ACEOF >conftest.sed
-s/[\\$]/&&/g;s/;s,x,x,$//
-_ACEOF
-program_transform_name=`echo $program_transform_name | sed -f conftest.sed`
-rm conftest.sed
-
-# expand $ac_aux_dir to an absolute path
-am_aux_dir=`cd $ac_aux_dir && pwd`
-
-test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing"
-# Use eval to expand $SHELL
-if eval "$MISSING --run true"; then
- am_missing_run="$MISSING --run "
-else
- am_missing_run=
- { echo "$as_me:$LINENO: WARNING: \`missing' script is too old or missing" >&5
-echo "$as_me: WARNING: \`missing' script is too old or missing" >&2;}
-fi
-
-if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then
- # We used to keeping the `.' as first argument, in order to
- # allow $(mkdir_p) to be used without argument. As in
- # $(mkdir_p) $(somedir)
- # where $(somedir) is conditionally defined. However this is wrong
- # for two reasons:
- # 1. if the package is installed by a user who cannot write `.'
- # make install will fail,
- # 2. the above comment should most certainly read
- # $(mkdir_p) $(DESTDIR)$(somedir)
- # so it does not work when $(somedir) is undefined and
- # $(DESTDIR) is not.
- # To support the latter case, we have to write
- # test -z "$(somedir)" || $(mkdir_p) $(DESTDIR)$(somedir),
- # so the `.' trick is pointless.
- mkdir_p='mkdir -p --'
-else
- # On NextStep and OpenStep, the `mkdir' command does not
- # recognize any option. It will interpret all options as
- # directories to create, and then abort because `.' already
- # exists.
- for d in ./-p ./--version;
- do
- test -d $d && rmdir $d
- done
- # $(mkinstalldirs) is defined by Automake if mkinstalldirs exists.
- if test -f "$ac_aux_dir/mkinstalldirs"; then
- mkdir_p='$(mkinstalldirs)'
- else
- mkdir_p='$(install_sh) -d'
- fi
-fi
-
-for ac_prog in gawk mawk nawk awk
-do
- # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_AWK+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$AWK"; then
- ac_cv_prog_AWK="$AWK" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_AWK="$ac_prog"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-AWK=$ac_cv_prog_AWK
-if test -n "$AWK"; then
- echo "$as_me:$LINENO: result: $AWK" >&5
-echo "${ECHO_T}$AWK" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- test -n "$AWK" && break
-done
-
-echo "$as_me:$LINENO: checking whether ${MAKE-make} sets \$(MAKE)" >&5
-echo $ECHO_N "checking whether ${MAKE-make} sets \$(MAKE)... $ECHO_C" >&6
-set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y,:./+-,___p_,'`
-if eval "test \"\${ac_cv_prog_make_${ac_make}_set+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.make <<\_ACEOF
-all:
- @echo 'ac_maketemp="$(MAKE)"'
-_ACEOF
-# GNU make sometimes prints "make[1]: Entering...", which would confuse us.
-eval `${MAKE-make} -f conftest.make 2>/dev/null | grep temp=`
-if test -n "$ac_maketemp"; then
- eval ac_cv_prog_make_${ac_make}_set=yes
-else
- eval ac_cv_prog_make_${ac_make}_set=no
-fi
-rm -f conftest.make
-fi
-if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then
- echo "$as_me:$LINENO: result: yes" >&5
-echo "${ECHO_T}yes" >&6
- SET_MAKE=
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
- SET_MAKE="MAKE=${MAKE-make}"
-fi
-
-rm -rf .tst 2>/dev/null
-mkdir .tst 2>/dev/null
-if test -d .tst; then
- am__leading_dot=.
-else
- am__leading_dot=_
-fi
-rmdir .tst 2>/dev/null
-
-# test to see if srcdir already configured
-if test "`cd $srcdir && pwd`" != "`pwd`" &&
- test -f $srcdir/config.status; then
- { { echo "$as_me:$LINENO: error: source directory already configured; run \"make distclean\" there first" >&5
-echo "$as_me: error: source directory already configured; run \"make distclean\" there first" >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-# test whether we have cygpath
-if test -z "$CYGPATH_W"; then
- if (cygpath --version) >/dev/null 2>/dev/null; then
- CYGPATH_W='cygpath -w'
- else
- CYGPATH_W=echo
- fi
-fi
-
-
-# Define the identity of the package.
- PACKAGE=moses
- VERSION=0.1
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE "$PACKAGE"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define VERSION "$VERSION"
-_ACEOF
-
-# Some tools Automake needs.
-
-ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"}
-
-
-AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
-
-
-AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"}
-
-
-AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
-
-
-MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
-
-install_sh=${install_sh-"$am_aux_dir/install-sh"}
-
-# Installed binaries are usually stripped using `strip' when the user
-# run `make install-strip'. However `strip' might not be the right
-# tool to use in cross-compilation environments, therefore Automake
-# will honor the `STRIP' environment variable to overrule this program.
-if test "$cross_compiling" != no; then
- if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
-set dummy ${ac_tool_prefix}strip; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_STRIP+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$STRIP"; then
- ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_STRIP="${ac_tool_prefix}strip"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-STRIP=$ac_cv_prog_STRIP
-if test -n "$STRIP"; then
- echo "$as_me:$LINENO: result: $STRIP" >&5
-echo "${ECHO_T}$STRIP" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
-fi
-if test -z "$ac_cv_prog_STRIP"; then
- ac_ct_STRIP=$STRIP
- # Extract the first word of "strip", so it can be a program name with args.
-set dummy strip; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$ac_ct_STRIP"; then
- ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_STRIP="strip"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
- test -z "$ac_cv_prog_ac_ct_STRIP" && ac_cv_prog_ac_ct_STRIP=":"
-fi
-fi
-ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
-if test -n "$ac_ct_STRIP"; then
- echo "$as_me:$LINENO: result: $ac_ct_STRIP" >&5
-echo "${ECHO_T}$ac_ct_STRIP" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- STRIP=$ac_ct_STRIP
-else
- STRIP="$ac_cv_prog_STRIP"
-fi
-
-fi
-INSTALL_STRIP_PROGRAM="\${SHELL} \$(install_sh) -c -s"
-
-# We need awk for the "check" target. The system "awk" is bad on
-# some platforms.
-# Always define AMTAR for backward compatibility.
-
-AMTAR=${AMTAR-"${am_missing_run}tar"}
-
-am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'
-
-
-
-
-
-
-ac_ext=cc
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-if test -n "$ac_tool_prefix"; then
- for ac_prog in $CCC g++ c++ gpp aCC CC cxx cc++ cl FCC KCC RCC xlC_r xlC
- do
- # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
-set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_CXX+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$CXX"; then
- ac_cv_prog_CXX="$CXX" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-CXX=$ac_cv_prog_CXX
-if test -n "$CXX"; then
- echo "$as_me:$LINENO: result: $CXX" >&5
-echo "${ECHO_T}$CXX" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- test -n "$CXX" && break
- done
-fi
-if test -z "$CXX"; then
- ac_ct_CXX=$CXX
- for ac_prog in $CCC g++ c++ gpp aCC CC cxx cc++ cl FCC KCC RCC xlC_r xlC
-do
- # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$ac_ct_CXX"; then
- ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_CXX="$ac_prog"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
-if test -n "$ac_ct_CXX"; then
- echo "$as_me:$LINENO: result: $ac_ct_CXX" >&5
-echo "${ECHO_T}$ac_ct_CXX" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- test -n "$ac_ct_CXX" && break
-done
-test -n "$ac_ct_CXX" || ac_ct_CXX="g++"
-
- CXX=$ac_ct_CXX
-fi
-
-
-# Provide some information about the compiler.
-echo "$as_me:$LINENO:" \
- "checking for C++ compiler version" >&5
-ac_compiler=`set X $ac_compile; echo $2`
-{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5
- (eval $ac_compiler --version </dev/null >&5) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }
-{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5
- (eval $ac_compiler -v </dev/null >&5) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }
-{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5
- (eval $ac_compiler -V </dev/null >&5) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }
-
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-ac_clean_files_save=$ac_clean_files
-ac_clean_files="$ac_clean_files a.out a.exe b.out"
-# Try to create an executable without -o first, disregard a.out.
-# It will help us diagnose broken compilers, and finding out an intuition
-# of exeext.
-echo "$as_me:$LINENO: checking for C++ compiler default output file name" >&5
-echo $ECHO_N "checking for C++ compiler default output file name... $ECHO_C" >&6
-ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
-if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5
- (eval $ac_link_default) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; then
- # Find the output, starting from the most likely. This scheme is
-# not robust to junk in `.', hence go to wildcards (a.*) only as a last
-# resort.
-
-# Be careful to initialize this variable, since it used to be cached.
-# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile.
-ac_cv_exeext=
-# b.out is created by i960 compilers.
-for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out
-do
- test -f "$ac_file" || continue
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj )
- ;;
- conftest.$ac_ext )
- # This is the source file.
- ;;
- [ab].out )
- # We found the default executable, but exeext='' is most
- # certainly right.
- break;;
- *.* )
- ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
- # FIXME: I believe we export ac_cv_exeext for Libtool,
- # but it would be cool to find out if it's true. Does anybody
- # maintain Libtool? --akim.
- export ac_cv_exeext
- break;;
- * )
- break;;
- esac
-done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-{ { echo "$as_me:$LINENO: error: C++ compiler cannot create executables
-See \`config.log' for more details." >&5
-echo "$as_me: error: C++ compiler cannot create executables
-See \`config.log' for more details." >&2;}
- { (exit 77); exit 77; }; }
-fi
-
-ac_exeext=$ac_cv_exeext
-echo "$as_me:$LINENO: result: $ac_file" >&5
-echo "${ECHO_T}$ac_file" >&6
-
-# Check the compiler produces executables we can run. If not, either
-# the compiler is broken, or we cross compile.
-echo "$as_me:$LINENO: checking whether the C++ compiler works" >&5
-echo $ECHO_N "checking whether the C++ compiler works... $ECHO_C" >&6
-# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
-# If not cross compiling, check that we can run a simple program.
-if test "$cross_compiling" != yes; then
- if { ac_try='./$ac_file'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- cross_compiling=no
- else
- if test "$cross_compiling" = maybe; then
- cross_compiling=yes
- else
- { { echo "$as_me:$LINENO: error: cannot run C++ compiled programs.
-If you meant to cross compile, use \`--host'.
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot run C++ compiled programs.
-If you meant to cross compile, use \`--host'.
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
- fi
- fi
-fi
-echo "$as_me:$LINENO: result: yes" >&5
-echo "${ECHO_T}yes" >&6
-
-rm -f a.out a.exe conftest$ac_cv_exeext b.out
-ac_clean_files=$ac_clean_files_save
-# Check the compiler produces executables we can run. If not, either
-# the compiler is broken, or we cross compile.
-echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
-echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6
-echo "$as_me:$LINENO: result: $cross_compiling" >&5
-echo "${ECHO_T}$cross_compiling" >&6
-
-echo "$as_me:$LINENO: checking for suffix of executables" >&5
-echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; then
- # If both `conftest.exe' and `conftest' are `present' (well, observable)
-# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
-# work properly (i.e., refer to `conftest.exe'), while it won't with
-# `rm'.
-for ac_file in conftest.exe conftest conftest.*; do
- test -f "$ac_file" || continue
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;;
- *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
- export ac_cv_exeext
- break;;
- * ) break;;
- esac
-done
-else
- { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-rm -f conftest$ac_cv_exeext
-echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
-echo "${ECHO_T}$ac_cv_exeext" >&6
-
-rm -f conftest.$ac_ext
-EXEEXT=$ac_cv_exeext
-ac_exeext=$EXEEXT
-echo "$as_me:$LINENO: checking for suffix of object files" >&5
-echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6
-if test "${ac_cv_objext+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.o conftest.obj
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; then
- for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;;
- *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
- break;;
- esac
-done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute suffix of object files: cannot compile
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-rm -f conftest.$ac_cv_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
-echo "${ECHO_T}$ac_cv_objext" >&6
-OBJEXT=$ac_cv_objext
-ac_objext=$OBJEXT
-echo "$as_me:$LINENO: checking whether we are using the GNU C++ compiler" >&5
-echo $ECHO_N "checking whether we are using the GNU C++ compiler... $ECHO_C" >&6
-if test "${ac_cv_cxx_compiler_gnu+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-int
-main ()
-{
-#ifndef __GNUC__
- choke me
-#endif
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_compiler_gnu=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_compiler_gnu=no
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
-
-fi
-echo "$as_me:$LINENO: result: $ac_cv_cxx_compiler_gnu" >&5
-echo "${ECHO_T}$ac_cv_cxx_compiler_gnu" >&6
-GXX=`test $ac_compiler_gnu = yes && echo yes`
-ac_test_CXXFLAGS=${CXXFLAGS+set}
-ac_save_CXXFLAGS=$CXXFLAGS
-CXXFLAGS="-g"
-echo "$as_me:$LINENO: checking whether $CXX accepts -g" >&5
-echo $ECHO_N "checking whether $CXX accepts -g... $ECHO_C" >&6
-if test "${ac_cv_prog_cxx_g+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_prog_cxx_g=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_prog_cxx_g=no
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_prog_cxx_g" >&5
-echo "${ECHO_T}$ac_cv_prog_cxx_g" >&6
-if test "$ac_test_CXXFLAGS" = set; then
- CXXFLAGS=$ac_save_CXXFLAGS
-elif test $ac_cv_prog_cxx_g = yes; then
- if test "$GXX" = yes; then
- CXXFLAGS="-g -O2"
- else
- CXXFLAGS="-g"
- fi
-else
- if test "$GXX" = yes; then
- CXXFLAGS="-O2"
- else
- CXXFLAGS=
- fi
-fi
-for ac_declaration in \
- '' \
- 'extern "C" void std::exit (int) throw (); using std::exit;' \
- 'extern "C" void std::exit (int); using std::exit;' \
- 'extern "C" void exit (int) throw ();' \
- 'extern "C" void exit (int);' \
- 'void exit (int);'
-do
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_declaration
-#include <stdlib.h>
-int
-main ()
-{
-exit (42);
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- :
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-continue
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_declaration
-int
-main ()
-{
-exit (42);
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-done
-rm -f conftest*
-if test -n "$ac_declaration"; then
- echo '#ifdef __cplusplus' >>confdefs.h
- echo $ac_declaration >>confdefs.h
- echo '#endif' >>confdefs.h
-fi
-
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-DEPDIR="${am__leading_dot}deps"
-
- ac_config_commands="$ac_config_commands depfiles"
-
-
-am_make=${MAKE-make}
-cat > confinc << 'END'
-am__doit:
- @echo done
-.PHONY: am__doit
-END
-# If we don't find an include directive, just comment out the code.
-echo "$as_me:$LINENO: checking for style of include used by $am_make" >&5
-echo $ECHO_N "checking for style of include used by $am_make... $ECHO_C" >&6
-am__include="#"
-am__quote=
-_am_result=none
-# First try GNU make style include.
-echo "include confinc" > confmf
-# We grep out `Entering directory' and `Leaving directory'
-# messages which can occur if `w' ends up in MAKEFLAGS.
-# In particular we don't look at `^make:' because GNU make might
-# be invoked under some other name (usually "gmake"), in which
-# case it prints its new name instead of `make'.
-if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then
- am__include=include
- am__quote=
- _am_result=GNU
-fi
-# Now try BSD make style include.
-if test "$am__include" = "#"; then
- echo '.include "confinc"' > confmf
- if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then
- am__include=.include
- am__quote="\""
- _am_result=BSD
- fi
-fi
-
-
-echo "$as_me:$LINENO: result: $_am_result" >&5
-echo "${ECHO_T}$_am_result" >&6
-rm -f confinc confmf
-
-# Check whether --enable-dependency-tracking or --disable-dependency-tracking was given.
-if test "${enable_dependency_tracking+set}" = set; then
- enableval="$enable_dependency_tracking"
-
-fi;
-if test "x$enable_dependency_tracking" != xno; then
- am_depcomp="$ac_aux_dir/depcomp"
- AMDEPBACKSLASH='\'
-fi
-
-
-if test "x$enable_dependency_tracking" != xno; then
- AMDEP_TRUE=
- AMDEP_FALSE='#'
-else
- AMDEP_TRUE='#'
- AMDEP_FALSE=
-fi
-
-
-
-
-depcc="$CXX" am_compiler_list=
-
-echo "$as_me:$LINENO: checking dependency style of $depcc" >&5
-echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6
-if test "${am_cv_CXX_dependencies_compiler_type+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
- # We make a subdir and do the tests there. Otherwise we can end up
- # making bogus files that we don't know about and never remove. For
- # instance it was reported that on HP-UX the gcc test will end up
- # making a dummy file named `D' -- because `-MD' means `put the output
- # in D'.
- mkdir conftest.dir
- # Copy depcomp to subdir because otherwise we won't find it if we're
- # using a relative directory.
- cp "$am_depcomp" conftest.dir
- cd conftest.dir
- # We will build objects and dependencies in a subdirectory because
- # it helps to detect inapplicable dependency modes. For instance
- # both Tru64's cc and ICC support -MD to output dependencies as a
- # side effect of compilation, but ICC will put the dependencies in
- # the current directory while Tru64 will put them in the object
- # directory.
- mkdir sub
-
- am_cv_CXX_dependencies_compiler_type=none
- if test "$am_compiler_list" = ""; then
- am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp`
- fi
- for depmode in $am_compiler_list; do
- # Setup a source with many dependencies, because some compilers
- # like to wrap large dependency lists on column 80 (with \), and
- # we should not choose a depcomp mode which is confused by this.
- #
- # We need to recreate these files for each test, as the compiler may
- # overwrite some of them when testing with obscure command lines.
- # This happens at least with the AIX C compiler.
- : > sub/conftest.c
- for i in 1 2 3 4 5 6; do
- echo '#include "conftst'$i'.h"' >> sub/conftest.c
- # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
- # Solaris 8's {/usr,}/bin/sh.
- touch sub/conftst$i.h
- done
- echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
-
- case $depmode in
- nosideeffect)
- # after this tag, mechanisms are not by side-effect, so they'll
- # only be used when explicitly requested
- if test "x$enable_dependency_tracking" = xyes; then
- continue
- else
- break
- fi
- ;;
- none) break ;;
- esac
- # We check with `-c' and `-o' for the sake of the "dashmstdout"
- # mode. It turns out that the SunPro C++ compiler does not properly
- # handle `-M -o', and we need to detect this.
- if depmode=$depmode \
- source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \
- depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
- $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \
- >/dev/null 2>conftest.err &&
- grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
- grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 &&
- ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
- # icc doesn't choke on unknown options, it will just issue warnings
- # or remarks (even with -Werror). So we grep stderr for any message
- # that says an option was ignored or not supported.
- # When given -MP, icc 7.0 and 7.1 complain thusly:
- # icc: Command line warning: ignoring option '-M'; no argument required
- # The diagnosis changed in icc 8.0:
- # icc: Command line remark: option '-MP' not supported
- if (grep 'ignoring option' conftest.err ||
- grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
- am_cv_CXX_dependencies_compiler_type=$depmode
- break
- fi
- fi
- done
-
- cd ..
- rm -rf conftest.dir
-else
- am_cv_CXX_dependencies_compiler_type=none
-fi
-
-fi
-echo "$as_me:$LINENO: result: $am_cv_CXX_dependencies_compiler_type" >&5
-echo "${ECHO_T}$am_cv_CXX_dependencies_compiler_type" >&6
-CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type
-
-
-
-if
- test "x$enable_dependency_tracking" != xno \
- && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then
- am__fastdepCXX_TRUE=
- am__fastdepCXX_FALSE='#'
-else
- am__fastdepCXX_TRUE='#'
- am__fastdepCXX_FALSE=
-fi
-
-
-ac_ext=cc
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
-set dummy ${ac_tool_prefix}ranlib; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_RANLIB+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$RANLIB"; then
- ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-RANLIB=$ac_cv_prog_RANLIB
-if test -n "$RANLIB"; then
- echo "$as_me:$LINENO: result: $RANLIB" >&5
-echo "${ECHO_T}$RANLIB" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
-fi
-if test -z "$ac_cv_prog_RANLIB"; then
- ac_ct_RANLIB=$RANLIB
- # Extract the first word of "ranlib", so it can be a program name with args.
-set dummy ranlib; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$ac_ct_RANLIB"; then
- ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_RANLIB="ranlib"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
- test -z "$ac_cv_prog_ac_ct_RANLIB" && ac_cv_prog_ac_ct_RANLIB=":"
-fi
-fi
-ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
-if test -n "$ac_ct_RANLIB"; then
- echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5
-echo "${ECHO_T}$ac_ct_RANLIB" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- RANLIB=$ac_ct_RANLIB
-else
- RANLIB="$ac_cv_prog_RANLIB"
-fi
-
-#AM_PROG_LIBTOOL
-
-
-# Check whether --with-srilm or --without-srilm was given.
-if test "${with_srilm+set}" = set; then
- withval="$with_srilm"
- with_srilm=$withval
-else
- with_srilm=no
-
-fi;
-
-
-# Check whether --with-boost or --without-boost was given.
-if test "${with_boost+set}" = set; then
- withval="$with_boost"
- with_boost=$withval
-else
- with_boost=no
-
-fi;
-
-
-# Check whether --with-irstlm or --without-irstlm was given.
-if test "${with_irstlm+set}" = set; then
- withval="$with_irstlm"
- with_irstlm=$withval
-else
- with_irstlm=no
-
-fi;
-
-
-# Check whether --with-moses or --without-moses was given.
-if test "${with_moses+set}" = set; then
- withval="$with_moses"
- with_moses=$withval
-else
- with_moses=no
-
-fi;
-
-# Check whether --enable-profiling or --disable-profiling was given.
-if test "${enable_profiling+set}" = set; then
- enableval="$enable_profiling"
- CPPFLAGS="$CPPFLAGS -pg"; LDFLAGS="$LDFLAGS -pg"
-
-fi;
-
-# Check whether --enable-optimization or --disable-optimization was given.
-if test "${enable_optimization+set}" = set; then
- enableval="$enable_optimization"
- CPPFLAGS="$CPPFLAGS -O3"; LDFLAGS="$LDFLAGS -O3"
-
-fi;
-
-# Check whether --enable-mysql or --disable-mysql was given.
-if test "${enable_mysql+set}" = set; then
- enableval="$enable_mysql"
- mysql_flag=yes
-else
- mysql_flag=no
-fi;
-
-if test "x$with_boost" != 'xno'
-then
- CPPFLAGS="$CPPFLAGS -I${with_boost} -I${with_boost}/include"
- LDFLAGS="$LDFLAGS -L${with_boost}/lib -L${with_boost}/stage/lib"
-fi
-
-if test "x$with_moses" != 'xno'
-then
- CPPFLAGS="$CPPFLAGS -I${with_moses}/src"
- LDFLAGS="$LDFLAGS -L${with_moses}/src"
-fi
-LIBS="$LIBS -lmoses"
-
-
-ac_ext=cc
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-echo "$as_me:$LINENO: checking how to run the C++ preprocessor" >&5
-echo $ECHO_N "checking how to run the C++ preprocessor... $ECHO_C" >&6
-if test -z "$CXXCPP"; then
- if test "${ac_cv_prog_CXXCPP+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- # Double quotes because CXXCPP needs to be expanded
- for CXXCPP in "$CXX -E" "/lib/cpp"
- do
- ac_preproc_ok=false
-for ac_cxx_preproc_warn_flag in '' yes
-do
- # Use a header file that comes with gcc, so configuring glibc
- # with a fresh cross-compiler works.
- # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- # <limits.h> exists even on freestanding compilers.
- # On the NeXT, cc -E runs the code through the compiler's parser,
- # not just through cpp. "Syntax error" is here to catch this case.
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
- Syntax error
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_cxx_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- :
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.$ac_ext
-
- # OK, works on sane cases. Now check whether non-existent headers
- # can be detected and how.
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <ac_nonexistent.h>
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_cxx_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- # Broken: success on invalid input.
-continue
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then
- break
-fi
-
- done
- ac_cv_prog_CXXCPP=$CXXCPP
-
-fi
- CXXCPP=$ac_cv_prog_CXXCPP
-else
- ac_cv_prog_CXXCPP=$CXXCPP
-fi
-echo "$as_me:$LINENO: result: $CXXCPP" >&5
-echo "${ECHO_T}$CXXCPP" >&6
-ac_preproc_ok=false
-for ac_cxx_preproc_warn_flag in '' yes
-do
- # Use a header file that comes with gcc, so configuring glibc
- # with a fresh cross-compiler works.
- # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- # <limits.h> exists even on freestanding compilers.
- # On the NeXT, cc -E runs the code through the compiler's parser,
- # not just through cpp. "Syntax error" is here to catch this case.
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
- Syntax error
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_cxx_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- :
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.$ac_ext
-
- # OK, works on sane cases. Now check whether non-existent headers
- # can be detected and how.
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <ac_nonexistent.h>
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_cxx_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- # Broken: success on invalid input.
-continue
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then
- :
-else
- { { echo "$as_me:$LINENO: error: C++ preprocessor \"$CXXCPP\" fails sanity check
-See \`config.log' for more details." >&5
-echo "$as_me: error: C++ preprocessor \"$CXXCPP\" fails sanity check
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-ac_ext=cc
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-
-echo "$as_me:$LINENO: checking for egrep" >&5
-echo $ECHO_N "checking for egrep... $ECHO_C" >&6
-if test "${ac_cv_prog_egrep+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if echo a | (grep -E '(a|b)') >/dev/null 2>&1
- then ac_cv_prog_egrep='grep -E'
- else ac_cv_prog_egrep='egrep'
- fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5
-echo "${ECHO_T}$ac_cv_prog_egrep" >&6
- EGREP=$ac_cv_prog_egrep
-
-
-echo "$as_me:$LINENO: checking for ANSI C header files" >&5
-echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6
-if test "${ac_cv_header_stdc+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <float.h>
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_header_stdc=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_header_stdc=no
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-
-if test $ac_cv_header_stdc = yes; then
- # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <string.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "memchr" >/dev/null 2>&1; then
- :
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <stdlib.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "free" >/dev/null 2>&1; then
- :
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
- if test "$cross_compiling" = yes; then
- :
-else
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <ctype.h>
-#if ((' ' & 0x0FF) == 0x020)
-# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
-# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
-#else
-# define ISLOWER(c) \
- (('a' <= (c) && (c) <= 'i') \
- || ('j' <= (c) && (c) <= 'r') \
- || ('s' <= (c) && (c) <= 'z'))
-# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
-#endif
-
-#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
-int
-main ()
-{
- int i;
- for (i = 0; i < 256; i++)
- if (XOR (islower (i), ISLOWER (i))
- || toupper (i) != TOUPPER (i))
- exit(2);
- exit (0);
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- :
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-ac_cv_header_stdc=no
-fi
-rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
-echo "${ECHO_T}$ac_cv_header_stdc" >&6
-if test $ac_cv_header_stdc = yes; then
-
-cat >>confdefs.h <<\_ACEOF
-#define STDC_HEADERS 1
-_ACEOF
-
-fi
-
-# On IRIX 5.3, sys/types and inttypes.h are conflicting.
-
-
-
-
-
-
-
-
-
-for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
- inttypes.h stdint.h unistd.h
-do
-as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_header" >&5
-echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
-if eval "test \"\${$as_ac_Header+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-
-#include <$ac_header>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_Header=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_Header=no"
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
-if test `eval echo '${'$as_ac_Header'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-
-done
-
-
-if test "${ac_cv_header_Manager_h+set}" = set; then
- echo "$as_me:$LINENO: checking for Manager.h" >&5
-echo $ECHO_N "checking for Manager.h... $ECHO_C" >&6
-if test "${ac_cv_header_Manager_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_Manager_h" >&5
-echo "${ECHO_T}$ac_cv_header_Manager_h" >&6
-else
- # Is the header compilable?
-echo "$as_me:$LINENO: checking Manager.h usability" >&5
-echo $ECHO_N "checking Manager.h usability... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-#include <Manager.h>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_header_compiler=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_header_compiler=no
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
-echo "${ECHO_T}$ac_header_compiler" >&6
-
-# Is the header present?
-echo "$as_me:$LINENO: checking Manager.h presence" >&5
-echo $ECHO_N "checking Manager.h presence... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <Manager.h>
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_cxx_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- ac_header_preproc=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_header_preproc=no
-fi
-rm -f conftest.err conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
-echo "${ECHO_T}$ac_header_preproc" >&6
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in
- yes:no: )
- { echo "$as_me:$LINENO: WARNING: Manager.h: accepted by the compiler, rejected by the preprocessor!" >&5
-echo "$as_me: WARNING: Manager.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { echo "$as_me:$LINENO: WARNING: Manager.h: proceeding with the compiler's result" >&5
-echo "$as_me: WARNING: Manager.h: proceeding with the compiler's result" >&2;}
- ac_header_preproc=yes
- ;;
- no:yes:* )
- { echo "$as_me:$LINENO: WARNING: Manager.h: present but cannot be compiled" >&5
-echo "$as_me: WARNING: Manager.h: present but cannot be compiled" >&2;}
- { echo "$as_me:$LINENO: WARNING: Manager.h: check for missing prerequisite headers?" >&5
-echo "$as_me: WARNING: Manager.h: check for missing prerequisite headers?" >&2;}
- { echo "$as_me:$LINENO: WARNING: Manager.h: see the Autoconf documentation" >&5
-echo "$as_me: WARNING: Manager.h: see the Autoconf documentation" >&2;}
- { echo "$as_me:$LINENO: WARNING: Manager.h: section \"Present But Cannot Be Compiled\"" >&5
-echo "$as_me: WARNING: Manager.h: section \"Present But Cannot Be Compiled\"" >&2;}
- { echo "$as_me:$LINENO: WARNING: Manager.h: proceeding with the preprocessor's result" >&5
-echo "$as_me: WARNING: Manager.h: proceeding with the preprocessor's result" >&2;}
- { echo "$as_me:$LINENO: WARNING: Manager.h: in the future, the compiler will take precedence" >&5
-echo "$as_me: WARNING: Manager.h: in the future, the compiler will take precedence" >&2;}
- (
- cat <<\_ASBOX
-## ------------------------------------------ ##
-## Report this to the AC_PACKAGE_NAME lists. ##
-## ------------------------------------------ ##
-_ASBOX
- ) |
- sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
-echo "$as_me:$LINENO: checking for Manager.h" >&5
-echo $ECHO_N "checking for Manager.h... $ECHO_C" >&6
-if test "${ac_cv_header_Manager_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_cv_header_Manager_h=$ac_header_preproc
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_Manager_h" >&5
-echo "${ECHO_T}$ac_cv_header_Manager_h" >&6
-
-fi
-if test $ac_cv_header_Manager_h = yes; then
- :
-else
- { { echo "$as_me:$LINENO: error: Cannot find moses headers! Use --with-moses=PATH" >&5
-echo "$as_me: error: Cannot find moses headers! Use --with-moses=PATH" >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-
-if test "${ac_cv_header_boost_algorithm_string_hpp+set}" = set; then
- echo "$as_me:$LINENO: checking for boost/algorithm/string.hpp" >&5
-echo $ECHO_N "checking for boost/algorithm/string.hpp... $ECHO_C" >&6
-if test "${ac_cv_header_boost_algorithm_string_hpp+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_boost_algorithm_string_hpp" >&5
-echo "${ECHO_T}$ac_cv_header_boost_algorithm_string_hpp" >&6
-else
- # Is the header compilable?
-echo "$as_me:$LINENO: checking boost/algorithm/string.hpp usability" >&5
-echo $ECHO_N "checking boost/algorithm/string.hpp usability... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-#include <boost/algorithm/string.hpp>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_header_compiler=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_header_compiler=no
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
-echo "${ECHO_T}$ac_header_compiler" >&6
-
-# Is the header present?
-echo "$as_me:$LINENO: checking boost/algorithm/string.hpp presence" >&5
-echo $ECHO_N "checking boost/algorithm/string.hpp presence... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <boost/algorithm/string.hpp>
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_cxx_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- ac_header_preproc=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_header_preproc=no
-fi
-rm -f conftest.err conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
-echo "${ECHO_T}$ac_header_preproc" >&6
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in
- yes:no: )
- { echo "$as_me:$LINENO: WARNING: boost/algorithm/string.hpp: accepted by the compiler, rejected by the preprocessor!" >&5
-echo "$as_me: WARNING: boost/algorithm/string.hpp: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/algorithm/string.hpp: proceeding with the compiler's result" >&5
-echo "$as_me: WARNING: boost/algorithm/string.hpp: proceeding with the compiler's result" >&2;}
- ac_header_preproc=yes
- ;;
- no:yes:* )
- { echo "$as_me:$LINENO: WARNING: boost/algorithm/string.hpp: present but cannot be compiled" >&5
-echo "$as_me: WARNING: boost/algorithm/string.hpp: present but cannot be compiled" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/algorithm/string.hpp: check for missing prerequisite headers?" >&5
-echo "$as_me: WARNING: boost/algorithm/string.hpp: check for missing prerequisite headers?" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/algorithm/string.hpp: see the Autoconf documentation" >&5
-echo "$as_me: WARNING: boost/algorithm/string.hpp: see the Autoconf documentation" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/algorithm/string.hpp: section \"Present But Cannot Be Compiled\"" >&5
-echo "$as_me: WARNING: boost/algorithm/string.hpp: section \"Present But Cannot Be Compiled\"" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/algorithm/string.hpp: proceeding with the preprocessor's result" >&5
-echo "$as_me: WARNING: boost/algorithm/string.hpp: proceeding with the preprocessor's result" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/algorithm/string.hpp: in the future, the compiler will take precedence" >&5
-echo "$as_me: WARNING: boost/algorithm/string.hpp: in the future, the compiler will take precedence" >&2;}
- (
- cat <<\_ASBOX
-## ------------------------------------------ ##
-## Report this to the AC_PACKAGE_NAME lists. ##
-## ------------------------------------------ ##
-_ASBOX
- ) |
- sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
-echo "$as_me:$LINENO: checking for boost/algorithm/string.hpp" >&5
-echo $ECHO_N "checking for boost/algorithm/string.hpp... $ECHO_C" >&6
-if test "${ac_cv_header_boost_algorithm_string_hpp+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_cv_header_boost_algorithm_string_hpp=$ac_header_preproc
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_boost_algorithm_string_hpp" >&5
-echo "${ECHO_T}$ac_cv_header_boost_algorithm_string_hpp" >&6
-
-fi
-if test $ac_cv_header_boost_algorithm_string_hpp = yes; then
- :
-else
- { { echo "$as_me:$LINENO: error: Cannot find boost. Use --with-boost=PATH" >&5
-echo "$as_me: error: Cannot find boost. Use --with-boost=PATH" >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-
-if test "${ac_cv_header_boost_iostreams_filtering_stream_hpp+set}" = set; then
- echo "$as_me:$LINENO: checking for boost/iostreams/filtering_stream.hpp" >&5
-echo $ECHO_N "checking for boost/iostreams/filtering_stream.hpp... $ECHO_C" >&6
-if test "${ac_cv_header_boost_iostreams_filtering_stream_hpp+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_boost_iostreams_filtering_stream_hpp" >&5
-echo "${ECHO_T}$ac_cv_header_boost_iostreams_filtering_stream_hpp" >&6
-else
- # Is the header compilable?
-echo "$as_me:$LINENO: checking boost/iostreams/filtering_stream.hpp usability" >&5
-echo $ECHO_N "checking boost/iostreams/filtering_stream.hpp usability... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-#include <boost/iostreams/filtering_stream.hpp>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_header_compiler=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_header_compiler=no
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
-echo "${ECHO_T}$ac_header_compiler" >&6
-
-# Is the header present?
-echo "$as_me:$LINENO: checking boost/iostreams/filtering_stream.hpp presence" >&5
-echo $ECHO_N "checking boost/iostreams/filtering_stream.hpp presence... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <boost/iostreams/filtering_stream.hpp>
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_cxx_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- ac_header_preproc=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_header_preproc=no
-fi
-rm -f conftest.err conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
-echo "${ECHO_T}$ac_header_preproc" >&6
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in
- yes:no: )
- { echo "$as_me:$LINENO: WARNING: boost/iostreams/filtering_stream.hpp: accepted by the compiler, rejected by the preprocessor!" >&5
-echo "$as_me: WARNING: boost/iostreams/filtering_stream.hpp: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/iostreams/filtering_stream.hpp: proceeding with the compiler's result" >&5
-echo "$as_me: WARNING: boost/iostreams/filtering_stream.hpp: proceeding with the compiler's result" >&2;}
- ac_header_preproc=yes
- ;;
- no:yes:* )
- { echo "$as_me:$LINENO: WARNING: boost/iostreams/filtering_stream.hpp: present but cannot be compiled" >&5
-echo "$as_me: WARNING: boost/iostreams/filtering_stream.hpp: present but cannot be compiled" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/iostreams/filtering_stream.hpp: check for missing prerequisite headers?" >&5
-echo "$as_me: WARNING: boost/iostreams/filtering_stream.hpp: check for missing prerequisite headers?" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/iostreams/filtering_stream.hpp: see the Autoconf documentation" >&5
-echo "$as_me: WARNING: boost/iostreams/filtering_stream.hpp: see the Autoconf documentation" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/iostreams/filtering_stream.hpp: section \"Present But Cannot Be Compiled\"" >&5
-echo "$as_me: WARNING: boost/iostreams/filtering_stream.hpp: section \"Present But Cannot Be Compiled\"" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/iostreams/filtering_stream.hpp: proceeding with the preprocessor's result" >&5
-echo "$as_me: WARNING: boost/iostreams/filtering_stream.hpp: proceeding with the preprocessor's result" >&2;}
- { echo "$as_me:$LINENO: WARNING: boost/iostreams/filtering_stream.hpp: in the future, the compiler will take precedence" >&5
-echo "$as_me: WARNING: boost/iostreams/filtering_stream.hpp: in the future, the compiler will take precedence" >&2;}
- (
- cat <<\_ASBOX
-## ------------------------------------------ ##
-## Report this to the AC_PACKAGE_NAME lists. ##
-## ------------------------------------------ ##
-_ASBOX
- ) |
- sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
-echo "$as_me:$LINENO: checking for boost/iostreams/filtering_stream.hpp" >&5
-echo $ECHO_N "checking for boost/iostreams/filtering_stream.hpp... $ECHO_C" >&6
-if test "${ac_cv_header_boost_iostreams_filtering_stream_hpp+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_cv_header_boost_iostreams_filtering_stream_hpp=$ac_header_preproc
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_boost_iostreams_filtering_stream_hpp" >&5
-echo "${ECHO_T}$ac_cv_header_boost_iostreams_filtering_stream_hpp" >&6
-
-fi
-if test $ac_cv_header_boost_iostreams_filtering_stream_hpp = yes; then
- :
-else
- { { echo "$as_me:$LINENO: error: Cannot find boost. Use --with-boost=PATH" >&5
-echo "$as_me: error: Cannot find boost. Use --with-boost=PATH" >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-
-
-if test "$mysql_flag" = 'yes'
-then
-
-
-# Check whether --with-mysqlclient-prefix or --without-mysqlclient-prefix was given.
-if test "${with_mysqlclient_prefix+set}" = set; then
- withval="$with_mysqlclient_prefix"
- mysqlclient_prefix="$withval"
-else
- mysqlclient_prefix=""
-fi;
-
-
-# Check whether --with-mysqlclient-include or --without-mysqlclient-include was given.
-if test "${with_mysqlclient_include+set}" = set; then
- withval="$with_mysqlclient_include"
- mysqlclient_include="$withval"
-else
- mysqlclient_include=""
-fi;
-
-
-# Check whether --with-mysqlclient-lib or --without-mysqlclient-lib was given.
-if test "${with_mysqlclient_lib+set}" = set; then
- withval="$with_mysqlclient_lib"
- mysqlclient_lib="$withval"
-else
- mysqlclient_lib=""
-fi;
-
- echo "$as_me:$LINENO: checking for mysqlclient " >&5
-echo $ECHO_N "checking for mysqlclient ... $ECHO_C" >&6
- MYSQLCLIENT_LDFLAGS=""
- MYSQLCLIENT_CPPFLAGS=""
- MYSQLCLIENT_LIBS="-lmysqlclient"
- mysqlclient_fail=""
-
- for tryprefix in /usr /usr/local /usr/mysql /usr/local/mysql /usr/pkg $msqlclient_prefix; do
- #testloop
- for hloc in lib/mysql lib lib64/mysql lib64 ; do
- if test -e "$tryprefix/$hloc/libmysqlclient.so"; then
- MYSQLCLIENT_LDFLAGS="-L$tryprefix/$hloc"
- fi
- done
-
- for iloc in include/mysql include; do
- if test -e "$tryprefix/$iloc/mysql.h"; then
- MYSQLCLIENT_CPPFLAGS="-I$tryprefix/$iloc"
- fi
- done
- # testloop
- done
-
- if test "x$mysqlclient_include" != "x" ; then
- echo "checking for mysql includes... "
- if test -d "$mysqlclient_include/mysql" ; then
- MYSQLCLIENT_CPPFLAGS="-I$mysqlclient_include"
- echo " found $MYSQLCLIENT_CPPFLAGS"
- elif test -d "$mysqlclient_include/include/mysql" ; then
- MYSQLCLIENT_CPPFLAGS="-I$mysqlclient_include/include"
- echo " found $MYSQLCLIENT_CPPFLAGS"
- elif test -d "$mysqlclient_include" ; then
- MYSQLCLIENT_CPPFLAGS="-I$mysqlclient_include"
- echo "found $MYSQLCLIENT_CPPFLAGS"
- else
- echo "not found! no include dir found in $mysqlclient_include"
- fi
- fi
-
- if test "x$mysqlclient_lib" != "x" ; then
- echo "checking for mysql libx... "
- if test -d "$mysqlclient_lib/lib/mysql" ; then
- MYSQLCLIENT_LDFLAGS="-L$mysqlclient_lib/lib/mysql"
- echo "found $MYSQLCLIENT_LDFLAGS"
- elif test -d "$mysqlclient_lib/lin" ; then
- MYSQLCLIENT_LDFLAGS="-L$mysqlclient_lib/lib"
- echo "found $MYSQLCLIENT_LDFLAGS"
- else
- MYSQLCLIENT_LDFLAGS="-L$mysqlclient_lib"
- echo "defaultd to $MYSQLCLIENT_LDFLAGS"
- fi
- fi
-
- ac_save_CPPFLAGS="$CPPFLAGS"
- ac_save_LDFLAGS="$LDFLAGS"
- ac_save_LIBS="$LIBS"
- CPPFLAGS="$CPPFLAGS $MYSQLCLIENT_CPPFLAGS"
- LDFLAGS="$LDFLAGS $MYSQLCLIENT_LDFLAGS"
- LIBS="$LIBS $MYSQLCLIENT_LIBS"
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
- #include <mysql.h>
-
-int
-main ()
-{
-
- mysql_real_connect( 0, 0, 0, 0, 0, 0, 0, 0);
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- echo "$as_me:$LINENO: result: yes $MYSQLCLIENT_CPPFLAGS $MYSQLCLIENT_LDFLAGS" >&5
-echo "${ECHO_T}yes $MYSQLCLIENT_CPPFLAGS $MYSQLCLIENT_LDFLAGS" >&6
- CPPFLAGS="$ac_save_CPPFLAGS"
- LDFLAGS="$ac_save_LDFLAGS"
- LIBS="$ac_save_LIBS"
- :
-
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-
- echo "no"
- echo "can't compile a simple app with mysql_connnect in it.
-bad."
- mysqlclient_fail="yes"
-
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-
- if test "x$mysqlclient_fail" != "x" ; then
- echo
- echo "***"
- echo "*** mysqlclient test source had problems, check your config.log ."
- echo "*** Also try one of the following switches :"
- echo "*** --with-mysqlclient-prefix=PFX"
- echo "*** --with-mysqlclient-include=DIR"
- echo "*** --with-mysqlclient-lib=DIR"
- echo "***"
- CPPFLAGS="$ac_save_CPPFLAGS"
- LDFLAGS="$ac_save_LDFLAGS"
- LIBS="$ac_save_LIBS"
- { { echo "$as_me:$LINENO: error: Could not locate mysql client libraries. Try --with-mysql-prefix/-include/-lib" >&5
-echo "$as_me: error: Could not locate mysql client libraries. Try --with-mysql-prefix/-include/-lib" >&2;}
- { (exit 1); exit 1; }; }
- fi
-
- CPPFLAGS="$ac_save_CPPFLAGS"
- LDFLAGS="$ac_save_LDFLAGS"
- LIBS="$ac_save_LIBS"
-
-
-
-
- LDFLAGS="$LDFLAGS $MYSQLCLIENT_LDFLAGS"
- CPPFLAGS="$CPPFLAGS $MYSQLCLIENT_CPPFLAGS"
- LIBS="$LIBS $MYSQLCLIENT_LIBS"
-
-
-echo "$as_me:$LINENO: checking for MySQL++ libraries" >&5
-echo $ECHO_N "checking for MySQL++ libraries... $ECHO_C" >&6
-if test "${ac_cv_mysqlpp_devel+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-
- #
- # Set up configure script macros
- #
-
-# Check whether --with-mysqlpp or --without-mysqlpp was given.
-if test "${with_mysqlpp+set}" = set; then
- withval="$with_mysqlpp"
- MYSQLPP_lib_check="$with_mysqlpp/lib $with_mysqlpp/lib/mysql++"
- MYSQLPP_inc_check="$with_mysqlpp/include $with_mysqlpp/include/mysql++"
-else
- MYSQLPP_lib_check="/usr/local/mysql++/lib /usr/local/lib/mysql++ /opt/mysql++/lib /usr/lib/mysql++ /usr/local/lib /usr/lib"
- MYSQLPP_inc_check="/usr/local/mysql++/include /usr/local/include/mysql++ /opt/mysql++/include /usr/local/include/mysql++ /usr/local/include /usr/include/mysql++ /usr/include"
-fi;
-
-# Check whether --with-mysqlpp-lib or --without-mysqlpp-lib was given.
-if test "${with_mysqlpp_lib+set}" = set; then
- withval="$with_mysqlpp_lib"
- MYSQLPP_lib_check="$with_mysqlpp_lib $with_mysqlpp_lib/lib $with_mysqlpp_lib/lib/mysql"
-fi;
-
-# Check whether --with-mysqlpp-include or --without-mysqlpp-include was given.
-if test "${with_mysqlpp_include+set}" = set; then
- withval="$with_mysqlpp_include"
- MYSQLPP_inc_check="$with_mysqlpp_include $with_mysqlpp_include/include $with_mysqlpp_include/include/mysql"
-fi;
-
- #
- # Look for MySQL++ library
- #
- MYSQLPP_libdir=
- for dir in $MYSQLPP_lib_check
- do
- if test -d "$dir" && \
- ( test -f "$dir/libmysqlpp.so" ||
- test -f "$dir/libmysqlpp.a" )
- then
- MYSQLPP_libdir=$dir
- break
- fi
- done
-
- if test -z "$MYSQLPP_libdir"
- then
- { { echo "$as_me:$LINENO: error: Didn't find the MySQL++ library dir in '$MYSQLPP_lib_check'" >&5
-echo "$as_me: error: Didn't find the MySQL++ library dir in '$MYSQLPP_lib_check'" >&2;}
- { (exit 1); exit 1; }; }
- fi
-
- case "$MYSQLPP_libdir" in
- /* ) ;;
- * ) { { echo "$as_me:$LINENO: error: The MySQL++ library directory ($MYSQLPP_libdir) must be an absolute path." >&5
-echo "$as_me: error: The MySQL++ library directory ($MYSQLPP_libdir) must be an absolute path." >&2;}
- { (exit 1); exit 1; }; } ;;
- esac
-
- echo "$as_me:$LINENO: result: lib in $MYSQLPP_libdir" >&5
-echo "${ECHO_T}lib in $MYSQLPP_libdir" >&6
-
- case "$MYSQLPP_libdir" in
- /usr/lib) ;;
- *) LDFLAGS="$LDFLAGS -L${MYSQLPP_libdir} -Wl,-rpath ${MYSQLPP_libdir}" ;;
- esac
-
- #
- # Look for MySQL++ headers
- #
- echo "$as_me:$LINENO: checking for MySQL++ include directory" >&5
-echo $ECHO_N "checking for MySQL++ include directory... $ECHO_C" >&6
- MYSQLPP_incdir=
- for dir in $MYSQLPP_inc_check
- do
- if test -d "$dir" && test -f "$dir/mysql++.h"
- then
- MYSQLPP_incdir=$dir
- break
- fi
- done
-
- if test -z "$MYSQLPP_incdir"
- then
- { { echo "$as_me:$LINENO: error: Didn't find the MySQL++ header dir in '$MYSQLPP_inc_check'" >&5
-echo "$as_me: error: Didn't find the MySQL++ header dir in '$MYSQLPP_inc_check'" >&2;}
- { (exit 1); exit 1; }; }
- fi
-
- case "$MYSQLPP_incdir" in
- /* ) ;;
- * ) { { echo "$as_me:$LINENO: error: The MySQL++ header directory ($MYSQLPP_incdir) must be an absolute path." >&5
-echo "$as_me: error: The MySQL++ header directory ($MYSQLPP_incdir) must be an absolute path." >&2;}
- { (exit 1); exit 1; }; } ;;
- esac
-
- echo "$as_me:$LINENO: result: $MYSQLPP_incdir" >&5
-echo "${ECHO_T}$MYSQLPP_incdir" >&6
-
- CPPFLAGS="$CPPFLAGS -I${MYSQLPP_incdir}"
-
- echo "$as_me:$LINENO: checking that we can build MySQL++ programs" >&5
-echo $ECHO_N "checking that we can build MySQL++ programs... $ECHO_C" >&6
- cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <mysql++.h>
-int
-main ()
-{
-std::string s; mysqlpp::escape_string(s)
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_mysqlpp_devel=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-{ { echo "$as_me:$LINENO: error: no" >&5
-echo "$as_me: error: no" >&2;}
- { (exit 1); exit 1; }; }
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-
-fi
-echo "$as_me:$LINENO: result: $ac_cv_mysqlpp_devel" >&5
-echo "${ECHO_T}$ac_cv_mysqlpp_devel" >&6
- LIBS="$LIBS -lmysqlpp"
-
-
-
-if true; then
- BUILD_MYSQL_SUPPORT_TRUE=
- BUILD_MYSQL_SUPPORT_FALSE='#'
-else
- BUILD_MYSQL_SUPPORT_TRUE='#'
- BUILD_MYSQL_SUPPORT_FALSE=
-fi
-
-else
-
-
-if false; then
- BUILD_MYSQL_SUPPORT_TRUE=
- BUILD_MYSQL_SUPPORT_FALSE='#'
-else
- BUILD_MYSQL_SUPPORT_TRUE='#'
- BUILD_MYSQL_SUPPORT_FALSE=
-fi
-
-fi
-
-if test "x$with_srilm" != 'xno'
-then
- SAVE_CPPFLAGS="$CPPFLAGS"
- CPPFLAGS="$CPPFLAGS -I${with_srilm}/include"
-
- if test "${ac_cv_header_Ngram_h+set}" = set; then
- echo "$as_me:$LINENO: checking for Ngram.h" >&5
-echo $ECHO_N "checking for Ngram.h... $ECHO_C" >&6
-if test "${ac_cv_header_Ngram_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_Ngram_h" >&5
-echo "${ECHO_T}$ac_cv_header_Ngram_h" >&6
-else
- # Is the header compilable?
-echo "$as_me:$LINENO: checking Ngram.h usability" >&5
-echo $ECHO_N "checking Ngram.h usability... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-#include <Ngram.h>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_header_compiler=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_header_compiler=no
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
-echo "${ECHO_T}$ac_header_compiler" >&6
-
-# Is the header present?
-echo "$as_me:$LINENO: checking Ngram.h presence" >&5
-echo $ECHO_N "checking Ngram.h presence... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <Ngram.h>
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_cxx_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- ac_header_preproc=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_header_preproc=no
-fi
-rm -f conftest.err conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
-echo "${ECHO_T}$ac_header_preproc" >&6
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in
- yes:no: )
- { echo "$as_me:$LINENO: WARNING: Ngram.h: accepted by the compiler, rejected by the preprocessor!" >&5
-echo "$as_me: WARNING: Ngram.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { echo "$as_me:$LINENO: WARNING: Ngram.h: proceeding with the compiler's result" >&5
-echo "$as_me: WARNING: Ngram.h: proceeding with the compiler's result" >&2;}
- ac_header_preproc=yes
- ;;
- no:yes:* )
- { echo "$as_me:$LINENO: WARNING: Ngram.h: present but cannot be compiled" >&5
-echo "$as_me: WARNING: Ngram.h: present but cannot be compiled" >&2;}
- { echo "$as_me:$LINENO: WARNING: Ngram.h: check for missing prerequisite headers?" >&5
-echo "$as_me: WARNING: Ngram.h: check for missing prerequisite headers?" >&2;}
- { echo "$as_me:$LINENO: WARNING: Ngram.h: see the Autoconf documentation" >&5
-echo "$as_me: WARNING: Ngram.h: see the Autoconf documentation" >&2;}
- { echo "$as_me:$LINENO: WARNING: Ngram.h: section \"Present But Cannot Be Compiled\"" >&5
-echo "$as_me: WARNING: Ngram.h: section \"Present But Cannot Be Compiled\"" >&2;}
- { echo "$as_me:$LINENO: WARNING: Ngram.h: proceeding with the preprocessor's result" >&5
-echo "$as_me: WARNING: Ngram.h: proceeding with the preprocessor's result" >&2;}
- { echo "$as_me:$LINENO: WARNING: Ngram.h: in the future, the compiler will take precedence" >&5
-echo "$as_me: WARNING: Ngram.h: in the future, the compiler will take precedence" >&2;}
- (
- cat <<\_ASBOX
-## ------------------------------------------ ##
-## Report this to the AC_PACKAGE_NAME lists. ##
-## ------------------------------------------ ##
-_ASBOX
- ) |
- sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
-echo "$as_me:$LINENO: checking for Ngram.h" >&5
-echo $ECHO_N "checking for Ngram.h... $ECHO_C" >&6
-if test "${ac_cv_header_Ngram_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_cv_header_Ngram_h=$ac_header_preproc
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_Ngram_h" >&5
-echo "${ECHO_T}$ac_cv_header_Ngram_h" >&6
-
-fi
-if test $ac_cv_header_Ngram_h = yes; then
-
-cat >>confdefs.h <<\_ACEOF
-#define HAVE_SRILM
-_ACEOF
-
-else
- { { echo "$as_me:$LINENO: error: Cannot find SRILM!" >&5
-echo "$as_me: error: Cannot find SRILM!" >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-
-
- LIB_SRILM="-loolm -ldstruct -lmisc"
- # ROOT/lib/i686-m64/liboolm.a
- # ROOT/lib/i686-m64/libdstruct.a
- # ROOT/lib/i686-m64/libmisc.a
- MY_ARCH=`${with_srilm}/sbin/machine-type`
- LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH}"
- LIBS="$LIBS $LIB_SRILM"
- FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
-
-
-if true; then
- SRI_LM_TRUE=
- SRI_LM_FALSE='#'
-else
- SRI_LM_TRUE='#'
- SRI_LM_FALSE=
-fi
-
-
-
-if false; then
- INTERNAL_LM_TRUE=
- INTERNAL_LM_FALSE='#'
-else
- INTERNAL_LM_TRUE='#'
- INTERNAL_LM_FALSE=
-fi
-
-
-
-if false; then
- IRST_LM_TRUE=
- IRST_LM_FALSE='#'
-else
- IRST_LM_TRUE='#'
- IRST_LM_FALSE=
-fi
-
-else if test "x$with_irstlm" != 'xno'
-then
- SAVE_CPPFLAGS="$CPPFLAGS"
- CPPFLAGS="$CPPFLAGS -I${with_irstlm}/include"
-
- if test "${ac_cv_header_ngram_h+set}" = set; then
- echo "$as_me:$LINENO: checking for ngram.h" >&5
-echo $ECHO_N "checking for ngram.h... $ECHO_C" >&6
-if test "${ac_cv_header_ngram_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_ngram_h" >&5
-echo "${ECHO_T}$ac_cv_header_ngram_h" >&6
-else
- # Is the header compilable?
-echo "$as_me:$LINENO: checking ngram.h usability" >&5
-echo $ECHO_N "checking ngram.h usability... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-#include <ngram.h>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_cxx_werror_flag"
- || test ! -s conftest.err'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_header_compiler=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_header_compiler=no
-fi
-rm -f conftest.err conftest.$ac_objext conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
-echo "${ECHO_T}$ac_header_compiler" >&6
-
-# Is the header present?
-echo "$as_me:$LINENO: checking ngram.h presence" >&5
-echo $ECHO_N "checking ngram.h presence... $ECHO_C" >&6
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <ngram.h>
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_cxx_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- ac_header_preproc=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_header_preproc=no
-fi
-rm -f conftest.err conftest.$ac_ext
-echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
-echo "${ECHO_T}$ac_header_preproc" >&6
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in
- yes:no: )
- { echo "$as_me:$LINENO: WARNING: ngram.h: accepted by the compiler, rejected by the preprocessor!" >&5
-echo "$as_me: WARNING: ngram.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { echo "$as_me:$LINENO: WARNING: ngram.h: proceeding with the compiler's result" >&5
-echo "$as_me: WARNING: ngram.h: proceeding with the compiler's result" >&2;}
- ac_header_preproc=yes
- ;;
- no:yes:* )
- { echo "$as_me:$LINENO: WARNING: ngram.h: present but cannot be compiled" >&5
-echo "$as_me: WARNING: ngram.h: present but cannot be compiled" >&2;}
- { echo "$as_me:$LINENO: WARNING: ngram.h: check for missing prerequisite headers?" >&5
-echo "$as_me: WARNING: ngram.h: check for missing prerequisite headers?" >&2;}
- { echo "$as_me:$LINENO: WARNING: ngram.h: see the Autoconf documentation" >&5
-echo "$as_me: WARNING: ngram.h: see the Autoconf documentation" >&2;}
- { echo "$as_me:$LINENO: WARNING: ngram.h: section \"Present But Cannot Be Compiled\"" >&5
-echo "$as_me: WARNING: ngram.h: section \"Present But Cannot Be Compiled\"" >&2;}
- { echo "$as_me:$LINENO: WARNING: ngram.h: proceeding with the preprocessor's result" >&5
-echo "$as_me: WARNING: ngram.h: proceeding with the preprocessor's result" >&2;}
- { echo "$as_me:$LINENO: WARNING: ngram.h: in the future, the compiler will take precedence" >&5
-echo "$as_me: WARNING: ngram.h: in the future, the compiler will take precedence" >&2;}
- (
- cat <<\_ASBOX
-## ------------------------------------------ ##
-## Report this to the AC_PACKAGE_NAME lists. ##
-## ------------------------------------------ ##
-_ASBOX
- ) |
- sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
-echo "$as_me:$LINENO: checking for ngram.h" >&5
-echo $ECHO_N "checking for ngram.h... $ECHO_C" >&6
-if test "${ac_cv_header_ngram_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_cv_header_ngram_h=$ac_header_preproc
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_ngram_h" >&5
-echo "${ECHO_T}$ac_cv_header_ngram_h" >&6
-
-fi
-if test $ac_cv_header_ngram_h = yes; then
-
-cat >>confdefs.h <<\_ACEOF
-#define HAVE_IRSTLM
-_ACEOF
-
-else
- { { echo "$as_me:$LINENO: error: Cannot find IRST-LM!" >&5
-echo "$as_me: error: Cannot find IRST-LM!" >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-
-
- LIB_SRILM="-lirstlm"
- LDFLAGS="$LDFLAGS -L${with_irstlm}/lib"
- LIBS="$LIBS $LIB_SRILM"
- FMTLIBS="$FMTLIBS libirstlm.a"
-
-
-if false; then
- SRI_LM_TRUE=
- SRI_LM_FALSE='#'
-else
- SRI_LM_TRUE='#'
- SRI_LM_FALSE=
-fi
-
-
-
-if false; then
- INTERNAL_LM_TRUE=
- INTERNAL_LM_FALSE='#'
-else
- INTERNAL_LM_TRUE='#'
- INTERNAL_LM_FALSE=
-fi
-
-
-
-if true; then
- IRST_LM_TRUE=
- IRST_LM_FALSE='#'
-else
- IRST_LM_TRUE='#'
- IRST_LM_FALSE=
-fi
-
-else
- echo "Using internal language model (use --with-srilm or --with-irstlm to change)!"
-
-
-if false; then
- SRI_LM_TRUE=
- SRI_LM_FALSE='#'
-else
- SRI_LM_TRUE='#'
- SRI_LM_FALSE=
-fi
-
-
-
-if true; then
- INTERNAL_LM_TRUE=
- INTERNAL_LM_FALSE='#'
-else
- INTERNAL_LM_TRUE='#'
- INTERNAL_LM_FALSE=
-fi
-
-
-
-if false; then
- IRST_LM_TRUE=
- IRST_LM_FALSE='#'
-else
- IRST_LM_TRUE='#'
- IRST_LM_FALSE=
-fi
-
-fi
-fi
-
-LIBS="$LIBS -lboost_iostreams-gcc-mt -lboost_filesystem-gcc-mt -lboost_thread-gcc-mt -lz"
-FMT_LIBS="$FMT_LIBS libboost_iostreams.a libboost_filesystem.a libboost_thread.a"
-
-
- ac_config_files="$ac_config_files Makefile src/Makefile"
-cat >confcache <<\_ACEOF
-# This file is a shell script that caches the results of configure
-# tests run on this system so they can be shared between configure
-# scripts and configure runs, see configure's option --config-cache.
-# It is not useful on other systems. If it contains results you don't
-# want to keep, you may remove or edit it.
-#
-# config.status only pays attention to the cache file if you give it
-# the --recheck option to rerun configure.
-#
-# `ac_cv_env_foo' variables (set or unset) will be overridden when
-# loading this file, other *unset* `ac_cv_foo' will be assigned the
-# following values.
-
-_ACEOF
-
-# The following way of writing the cache mishandles newlines in values,
-# but we know of no workaround that is simple, portable, and efficient.
-# So, don't put newlines in cache variables' values.
-# Ultrix sh set writes to stderr and can't be redirected directly,
-# and sets the high bit in the cache file unless we assign to the vars.
-{
- (set) 2>&1 |
- case `(ac_space=' '; set | grep ac_space) 2>&1` in
- *ac_space=\ *)
- # `set' does not quote correctly, so add quotes (double-quote
- # substitution turns \\\\ into \\, and sed turns \\ into \).
- sed -n \
- "s/'/'\\\\''/g;
- s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
- ;;
- *)
- # `set' quotes correctly as required by POSIX, so do not add quotes.
- sed -n \
- "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
- ;;
- esac;
-} |
- sed '
- t clear
- : clear
- s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
- t end
- /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
- : end' >>confcache
-if diff $cache_file confcache >/dev/null 2>&1; then :; else
- if test -w $cache_file; then
- test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file"
- cat confcache >$cache_file
- else
- echo "not updating unwritable cache $cache_file"
- fi
-fi
-rm -f confcache
-
-test "x$prefix" = xNONE && prefix=$ac_default_prefix
-# Let make expand exec_prefix.
-test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
-
-# VPATH may cause trouble with some makes, so we remove $(srcdir),
-# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
-# trailing colons and then remove the whole line if VPATH becomes empty
-# (actually we leave an empty line to preserve line numbers).
-if test "x$srcdir" = x.; then
- ac_vpsub='/^[ ]*VPATH[ ]*=/{
-s/:*\$(srcdir):*/:/;
-s/:*\${srcdir}:*/:/;
-s/:*@srcdir@:*/:/;
-s/^\([^=]*=[ ]*\):*/\1/;
-s/:*$//;
-s/^[^=]*=[ ]*$//;
-}'
-fi
-
-DEFS=-DHAVE_CONFIG_H
-
-ac_libobjs=
-ac_ltlibobjs=
-for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
- # 1. Remove the extension, and $U if already installed.
- ac_i=`echo "$ac_i" |
- sed 's/\$U\././;s/\.o$//;s/\.obj$//'`
- # 2. Add them.
- ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext"
- ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo'
-done
-LIBOBJS=$ac_libobjs
-
-LTLIBOBJS=$ac_ltlibobjs
-
-
-if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"AMDEP\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"AMDEP\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCXX\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"am__fastdepCXX\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${BUILD_MYSQL_SUPPORT_TRUE}" && test -z "${BUILD_MYSQL_SUPPORT_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"BUILD_MYSQL_SUPPORT\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"BUILD_MYSQL_SUPPORT\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${BUILD_MYSQL_SUPPORT_TRUE}" && test -z "${BUILD_MYSQL_SUPPORT_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"BUILD_MYSQL_SUPPORT\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"BUILD_MYSQL_SUPPORT\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${SRI_LM_TRUE}" && test -z "${SRI_LM_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"SRI_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"SRI_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${INTERNAL_LM_TRUE}" && test -z "${INTERNAL_LM_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"INTERNAL_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"INTERNAL_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${IRST_LM_TRUE}" && test -z "${IRST_LM_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"IRST_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"IRST_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${SRI_LM_TRUE}" && test -z "${SRI_LM_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"SRI_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"SRI_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${INTERNAL_LM_TRUE}" && test -z "${INTERNAL_LM_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"INTERNAL_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"INTERNAL_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${IRST_LM_TRUE}" && test -z "${IRST_LM_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"IRST_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"IRST_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${SRI_LM_TRUE}" && test -z "${SRI_LM_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"SRI_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"SRI_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${INTERNAL_LM_TRUE}" && test -z "${INTERNAL_LM_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"INTERNAL_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"INTERNAL_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-if test -z "${IRST_LM_TRUE}" && test -z "${IRST_LM_FALSE}"; then
- { { echo "$as_me:$LINENO: error: conditional \"IRST_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&5
-echo "$as_me: error: conditional \"IRST_LM\" was never defined.
-Usually this means the macro was only invoked conditionally." >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-: ${CONFIG_STATUS=./config.status}
-ac_clean_files_save=$ac_clean_files
-ac_clean_files="$ac_clean_files $CONFIG_STATUS"
-{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
-echo "$as_me: creating $CONFIG_STATUS" >&6;}
-cat >$CONFIG_STATUS <<_ACEOF
-#! $SHELL
-# Generated by $as_me.
-# Run this file to recreate the current configuration.
-# Compiler output produced by configure, useful for debugging
-# configure, is in config.log if it exists.
-
-debug=false
-ac_cs_recheck=false
-ac_cs_silent=false
-SHELL=\${CONFIG_SHELL-$SHELL}
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-## --------------------- ##
-## M4sh Initialization. ##
-## --------------------- ##
-
-# Be Bourne compatible
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
- emulate sh
- NULLCMD=:
- # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '${1+"$@"}'='"$@"'
-elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
- set -o posix
-fi
-DUALCASE=1; export DUALCASE # for MKS sh
-
-# Support unset when possible.
-if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
- as_unset=unset
-else
- as_unset=false
-fi
-
-
-# Work around bugs in pre-3.0 UWIN ksh.
-$as_unset ENV MAIL MAILPATH
-PS1='$ '
-PS2='> '
-PS4='+ '
-
-# NLS nuisances.
-for as_var in \
- LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
- LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
- LC_TELEPHONE LC_TIME
-do
- if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
- eval $as_var=C; export $as_var
- else
- $as_unset $as_var
- fi
-done
-
-# Required to use basename.
-if expr a : '\(a\)' >/dev/null 2>&1; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
- as_basename=basename
-else
- as_basename=false
-fi
-
-
-# Name of the executable.
-as_me=`$as_basename "$0" ||
-$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)$' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X/"$0" |
- sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
- /^X\/\(\/\/\)$/{ s//\1/; q; }
- /^X\/\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
-
-
-# PATH needs CR, and LINENO needs CR and PATH.
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
-
-# The user is always right.
-if test "${PATH_SEPARATOR+set}" != set; then
- echo "#! /bin/sh" >conf$$.sh
- echo "exit 0" >>conf$$.sh
- chmod +x conf$$.sh
- if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
- PATH_SEPARATOR=';'
- else
- PATH_SEPARATOR=:
- fi
- rm -f conf$$.sh
-fi
-
-
- as_lineno_1=$LINENO
- as_lineno_2=$LINENO
- as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
- test "x$as_lineno_1" != "x$as_lineno_2" &&
- test "x$as_lineno_3" = "x$as_lineno_2" || {
- # Find who we are. Look in the path if we contain no path at all
- # relative or not.
- case $0 in
- *[\\/]* ) as_myself=$0 ;;
- *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
-done
-
- ;;
- esac
- # We did not find ourselves, most probably we were run as `sh COMMAND'
- # in which case we are not to be found in the path.
- if test "x$as_myself" = x; then
- as_myself=$0
- fi
- if test ! -f "$as_myself"; then
- { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
-echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
- { (exit 1); exit 1; }; }
- fi
- case $CONFIG_SHELL in
- '')
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for as_base in sh bash ksh sh5; do
- case $as_dir in
- /*)
- if ("$as_dir/$as_base" -c '
- as_lineno_1=$LINENO
- as_lineno_2=$LINENO
- as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
- test "x$as_lineno_1" != "x$as_lineno_2" &&
- test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
- $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
- $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
- CONFIG_SHELL=$as_dir/$as_base
- export CONFIG_SHELL
- exec "$CONFIG_SHELL" "$0" ${1+"$@"}
- fi;;
- esac
- done
-done
-;;
- esac
-
- # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
- # uniformly replaced by the line number. The first 'sed' inserts a
- # line-number line before each line; the second 'sed' does the real
- # work. The second script uses 'N' to pair each line-number line
- # with the numbered line, and appends trailing '-' during
- # substitution so that $LINENO is not a special case at line end.
- # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
- # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
- sed '=' <$as_myself |
- sed '
- N
- s,$,-,
- : loop
- s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
- t loop
- s,-$,,
- s,^['$as_cr_digits']*\n,,
- ' >$as_me.lineno &&
- chmod +x $as_me.lineno ||
- { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
-echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
- { (exit 1); exit 1; }; }
-
- # Don't try to exec as it changes $[0], causing all sort of problems
- # (the dirname of $[0] is not the place where we might find the
- # original and so on. Autoconf is especially sensible to this).
- . ./$as_me.lineno
- # Exit status is that of the last command.
- exit
-}
-
-
-case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
- *c*,-n*) ECHO_N= ECHO_C='
-' ECHO_T=' ' ;;
- *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
- *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
-esac
-
-if expr a : '\(a\)' >/dev/null 2>&1; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-rm -f conf$$ conf$$.exe conf$$.file
-echo >conf$$.file
-if ln -s conf$$.file conf$$ 2>/dev/null; then
- # We could just check for DJGPP; but this test a) works b) is more generic
- # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
- if test -f conf$$.exe; then
- # Don't use ln at all; we don't have any links
- as_ln_s='cp -p'
- else
- as_ln_s='ln -s'
- fi
-elif ln conf$$.file conf$$ 2>/dev/null; then
- as_ln_s=ln
-else
- as_ln_s='cp -p'
-fi
-rm -f conf$$ conf$$.exe conf$$.file
-
-if mkdir -p . 2>/dev/null; then
- as_mkdir_p=:
-else
- test -d ./-p && rmdir ./-p
- as_mkdir_p=false
-fi
-
-as_executable_p="test -f"
-
-# Sed expression to map a string onto a valid CPP name.
-as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
-
-# Sed expression to map a string onto a valid variable name.
-as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
-
-
-# IFS
-# We need space, tab and new line, in precisely that order.
-as_nl='
-'
-IFS=" $as_nl"
-
-# CDPATH.
-$as_unset CDPATH
-
-exec 6>&1
-
-# Open the log real soon, to keep \$[0] and so on meaningful, and to
-# report actual input values of CONFIG_FILES etc. instead of their
-# values after options handling. Logging --version etc. is OK.
-exec 5>>config.log
-{
- echo
- sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
-## Running $as_me. ##
-_ASBOX
-} >&5
-cat >&5 <<_CSEOF
-
-This file was extended by $as_me, which was
-generated by GNU Autoconf 2.59. Invocation command line was
-
- CONFIG_FILES = $CONFIG_FILES
- CONFIG_HEADERS = $CONFIG_HEADERS
- CONFIG_LINKS = $CONFIG_LINKS
- CONFIG_COMMANDS = $CONFIG_COMMANDS
- $ $0 $@
-
-_CSEOF
-echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
-echo >&5
-_ACEOF
-
-# Files that config.status was made for.
-if test -n "$ac_config_files"; then
- echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS
-fi
-
-if test -n "$ac_config_headers"; then
- echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS
-fi
-
-if test -n "$ac_config_links"; then
- echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS
-fi
-
-if test -n "$ac_config_commands"; then
- echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS
-fi
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-
-ac_cs_usage="\
-\`$as_me' instantiates files from templates according to the
-current configuration.
-
-Usage: $0 [OPTIONS] [FILE]...
-
- -h, --help print this help, then exit
- -V, --version print version number, then exit
- -q, --quiet do not print progress messages
- -d, --debug don't remove temporary files
- --recheck update $as_me by reconfiguring in the same conditions
- --file=FILE[:TEMPLATE]
- instantiate the configuration file FILE
- --header=FILE[:TEMPLATE]
- instantiate the configuration header FILE
-
-Configuration files:
-$config_files
-
-Configuration headers:
-$config_headers
-
-Configuration commands:
-$config_commands
-
-Report bugs to <bug-autoconf@gnu.org>."
-_ACEOF
-
-cat >>$CONFIG_STATUS <<_ACEOF
-ac_cs_version="\\
-config.status
-configured by $0, generated by GNU Autoconf 2.59,
- with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
-
-Copyright (C) 2003 Free Software Foundation, Inc.
-This config.status script is free software; the Free Software Foundation
-gives unlimited permission to copy, distribute and modify it."
-srcdir=$srcdir
-INSTALL="$INSTALL"
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-# If no file are specified by the user, then we need to provide default
-# value. By we need to know if files were specified by the user.
-ac_need_defaults=:
-while test $# != 0
-do
- case $1 in
- --*=*)
- ac_option=`expr "x$1" : 'x\([^=]*\)='`
- ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
- ac_shift=:
- ;;
- -*)
- ac_option=$1
- ac_optarg=$2
- ac_shift=shift
- ;;
- *) # This is not an option, so the user has probably given explicit
- # arguments.
- ac_option=$1
- ac_need_defaults=false;;
- esac
-
- case $ac_option in
- # Handling of the options.
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
- -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
- ac_cs_recheck=: ;;
- --version | --vers* | -V )
- echo "$ac_cs_version"; exit 0 ;;
- --he | --h)
- # Conflict between --help and --header
- { { echo "$as_me:$LINENO: error: ambiguous option: $1
-Try \`$0 --help' for more information." >&5
-echo "$as_me: error: ambiguous option: $1
-Try \`$0 --help' for more information." >&2;}
- { (exit 1); exit 1; }; };;
- --help | --hel | -h )
- echo "$ac_cs_usage"; exit 0 ;;
- --debug | --d* | -d )
- debug=: ;;
- --file | --fil | --fi | --f )
- $ac_shift
- CONFIG_FILES="$CONFIG_FILES $ac_optarg"
- ac_need_defaults=false;;
- --header | --heade | --head | --hea )
- $ac_shift
- CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
- ac_need_defaults=false;;
- -q | -quiet | --quiet | --quie | --qui | --qu | --q \
- | -silent | --silent | --silen | --sile | --sil | --si | --s)
- ac_cs_silent=: ;;
-
- # This is an error.
- -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
-Try \`$0 --help' for more information." >&5
-echo "$as_me: error: unrecognized option: $1
-Try \`$0 --help' for more information." >&2;}
- { (exit 1); exit 1; }; } ;;
-
- *) ac_config_targets="$ac_config_targets $1" ;;
-
- esac
- shift
-done
-
-ac_configure_extra_args=
-
-if $ac_cs_silent; then
- exec 6>/dev/null
- ac_configure_extra_args="$ac_configure_extra_args --silent"
-fi
-
-_ACEOF
-cat >>$CONFIG_STATUS <<_ACEOF
-if \$ac_cs_recheck; then
- echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
- exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
-fi
-
-_ACEOF
-
-cat >>$CONFIG_STATUS <<_ACEOF
-#
-# INIT-COMMANDS section.
-#
-
-AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"
-
-_ACEOF
-
-
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-for ac_config_target in $ac_config_targets
-do
- case "$ac_config_target" in
- # Handling of arguments.
- "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
- "src/Makefile" ) CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
- "depfiles" ) CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
- "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
- *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
-echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
- { (exit 1); exit 1; }; };;
- esac
-done
-
-# If the user did not use the arguments to specify the items to instantiate,
-# then the envvar interface is used. Set only those that are not.
-# We use the long form for the default assignment because of an extremely
-# bizarre bug on SunOS 4.1.3.
-if $ac_need_defaults; then
- test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
- test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
- test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
-fi
-
-# Have a temporary directory for convenience. Make it in the build tree
-# simply because there is no reason to put it here, and in addition,
-# creating and moving files from /tmp can sometimes cause problems.
-# Create a temporary directory, and hook for its removal unless debugging.
-$debug ||
-{
- trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
- trap '{ (exit 1); exit 1; }' 1 2 13 15
-}
-
-# Create a (secure) tmp directory for tmp files.
-
-{
- tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
- test -n "$tmp" && test -d "$tmp"
-} ||
-{
- tmp=./confstat$$-$RANDOM
- (umask 077 && mkdir $tmp)
-} ||
-{
- echo "$me: cannot create a temporary directory in ." >&2
- { (exit 1); exit 1; }
-}
-
-_ACEOF
-
-cat >>$CONFIG_STATUS <<_ACEOF
-
-#
-# CONFIG_FILES section.
-#
-
-# No need to generate the scripts if there are no CONFIG_FILES.
-# This happens for instance when ./config.status config.h
-if test -n "\$CONFIG_FILES"; then
- # Protect against being on the right side of a sed subst in config.status.
- sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g;
- s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF
-s,@SHELL@,$SHELL,;t t
-s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t
-s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t
-s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t
-s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t
-s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t
-s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t
-s,@exec_prefix@,$exec_prefix,;t t
-s,@prefix@,$prefix,;t t
-s,@program_transform_name@,$program_transform_name,;t t
-s,@bindir@,$bindir,;t t
-s,@sbindir@,$sbindir,;t t
-s,@libexecdir@,$libexecdir,;t t
-s,@datadir@,$datadir,;t t
-s,@sysconfdir@,$sysconfdir,;t t
-s,@sharedstatedir@,$sharedstatedir,;t t
-s,@localstatedir@,$localstatedir,;t t
-s,@libdir@,$libdir,;t t
-s,@includedir@,$includedir,;t t
-s,@oldincludedir@,$oldincludedir,;t t
-s,@infodir@,$infodir,;t t
-s,@mandir@,$mandir,;t t
-s,@build_alias@,$build_alias,;t t
-s,@host_alias@,$host_alias,;t t
-s,@target_alias@,$target_alias,;t t
-s,@DEFS@,$DEFS,;t t
-s,@ECHO_C@,$ECHO_C,;t t
-s,@ECHO_N@,$ECHO_N,;t t
-s,@ECHO_T@,$ECHO_T,;t t
-s,@LIBS@,$LIBS,;t t
-s,@INSTALL_PROGRAM@,$INSTALL_PROGRAM,;t t
-s,@INSTALL_SCRIPT@,$INSTALL_SCRIPT,;t t
-s,@INSTALL_DATA@,$INSTALL_DATA,;t t
-s,@CYGPATH_W@,$CYGPATH_W,;t t
-s,@PACKAGE@,$PACKAGE,;t t
-s,@VERSION@,$VERSION,;t t
-s,@ACLOCAL@,$ACLOCAL,;t t
-s,@AUTOCONF@,$AUTOCONF,;t t
-s,@AUTOMAKE@,$AUTOMAKE,;t t
-s,@AUTOHEADER@,$AUTOHEADER,;t t
-s,@MAKEINFO@,$MAKEINFO,;t t
-s,@install_sh@,$install_sh,;t t
-s,@STRIP@,$STRIP,;t t
-s,@ac_ct_STRIP@,$ac_ct_STRIP,;t t
-s,@INSTALL_STRIP_PROGRAM@,$INSTALL_STRIP_PROGRAM,;t t
-s,@mkdir_p@,$mkdir_p,;t t
-s,@AWK@,$AWK,;t t
-s,@SET_MAKE@,$SET_MAKE,;t t
-s,@am__leading_dot@,$am__leading_dot,;t t
-s,@AMTAR@,$AMTAR,;t t
-s,@am__tar@,$am__tar,;t t
-s,@am__untar@,$am__untar,;t t
-s,@CXX@,$CXX,;t t
-s,@CXXFLAGS@,$CXXFLAGS,;t t
-s,@LDFLAGS@,$LDFLAGS,;t t
-s,@CPPFLAGS@,$CPPFLAGS,;t t
-s,@ac_ct_CXX@,$ac_ct_CXX,;t t
-s,@EXEEXT@,$EXEEXT,;t t
-s,@OBJEXT@,$OBJEXT,;t t
-s,@DEPDIR@,$DEPDIR,;t t
-s,@am__include@,$am__include,;t t
-s,@am__quote@,$am__quote,;t t
-s,@AMDEP_TRUE@,$AMDEP_TRUE,;t t
-s,@AMDEP_FALSE@,$AMDEP_FALSE,;t t
-s,@AMDEPBACKSLASH@,$AMDEPBACKSLASH,;t t
-s,@CXXDEPMODE@,$CXXDEPMODE,;t t
-s,@am__fastdepCXX_TRUE@,$am__fastdepCXX_TRUE,;t t
-s,@am__fastdepCXX_FALSE@,$am__fastdepCXX_FALSE,;t t
-s,@RANLIB@,$RANLIB,;t t
-s,@ac_ct_RANLIB@,$ac_ct_RANLIB,;t t
-s,@CXXCPP@,$CXXCPP,;t t
-s,@EGREP@,$EGREP,;t t
-s,@MYSQLCLIENT_LDFLAGS@,$MYSQLCLIENT_LDFLAGS,;t t
-s,@MYSQLCLIENT_CPPFLAGS@,$MYSQLCLIENT_CPPFLAGS,;t t
-s,@MYSQLCLIENT_LIBS@,$MYSQLCLIENT_LIBS,;t t
-s,@BUILD_MYSQL_SUPPORT_TRUE@,$BUILD_MYSQL_SUPPORT_TRUE,;t t
-s,@BUILD_MYSQL_SUPPORT_FALSE@,$BUILD_MYSQL_SUPPORT_FALSE,;t t
-s,@SRI_LM_TRUE@,$SRI_LM_TRUE,;t t
-s,@SRI_LM_FALSE@,$SRI_LM_FALSE,;t t
-s,@INTERNAL_LM_TRUE@,$INTERNAL_LM_TRUE,;t t
-s,@INTERNAL_LM_FALSE@,$INTERNAL_LM_FALSE,;t t
-s,@IRST_LM_TRUE@,$IRST_LM_TRUE,;t t
-s,@IRST_LM_FALSE@,$IRST_LM_FALSE,;t t
-s,@LIBOBJS@,$LIBOBJS,;t t
-s,@LTLIBOBJS@,$LTLIBOBJS,;t t
-CEOF
-
-_ACEOF
-
- cat >>$CONFIG_STATUS <<\_ACEOF
- # Split the substitutions into bite-sized pieces for seds with
- # small command number limits, like on Digital OSF/1 and HP-UX.
- ac_max_sed_lines=48
- ac_sed_frag=1 # Number of current file.
- ac_beg=1 # First line for current file.
- ac_end=$ac_max_sed_lines # Line after last line for current file.
- ac_more_lines=:
- ac_sed_cmds=
- while $ac_more_lines; do
- if test $ac_beg -gt 1; then
- sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
- else
- sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
- fi
- if test ! -s $tmp/subs.frag; then
- ac_more_lines=false
- else
- # The purpose of the label and of the branching condition is to
- # speed up the sed processing (if there are no `@' at all, there
- # is no need to browse any of the substitutions).
- # These are the two extra sed commands mentioned above.
- (echo ':t
- /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
- if test -z "$ac_sed_cmds"; then
- ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
- else
- ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
- fi
- ac_sed_frag=`expr $ac_sed_frag + 1`
- ac_beg=$ac_end
- ac_end=`expr $ac_end + $ac_max_sed_lines`
- fi
- done
- if test -z "$ac_sed_cmds"; then
- ac_sed_cmds=cat
- fi
-fi # test -n "$CONFIG_FILES"
-
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
-for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
- # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
- case $ac_file in
- - | *:- | *:-:* ) # input from stdin
- cat >$tmp/stdin
- ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
- ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
- *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
- ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
- * ) ac_file_in=$ac_file.in ;;
- esac
-
- # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
- ac_dir=`(dirname "$ac_file") 2>/dev/null ||
-$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$ac_file" : 'X\(//\)[^/]' \| \
- X"$ac_file" : 'X\(//\)$' \| \
- X"$ac_file" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$ac_file" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- { if $as_mkdir_p; then
- mkdir -p "$ac_dir"
- else
- as_dir="$ac_dir"
- as_dirs=
- while test ! -d "$as_dir"; do
- as_dirs="$as_dir $as_dirs"
- as_dir=`(dirname "$as_dir") 2>/dev/null ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- done
- test ! -n "$as_dirs" || mkdir $as_dirs
- fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
-echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
- { (exit 1); exit 1; }; }; }
-
- ac_builddir=.
-
-if test "$ac_dir" != .; then
- ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
- # A "../" for each directory in $ac_dir_suffix.
- ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
-else
- ac_dir_suffix= ac_top_builddir=
-fi
-
-case $srcdir in
- .) # No --srcdir option. We are building in place.
- ac_srcdir=.
- if test -z "$ac_top_builddir"; then
- ac_top_srcdir=.
- else
- ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
- fi ;;
- [\\/]* | ?:[\\/]* ) # Absolute path.
- ac_srcdir=$srcdir$ac_dir_suffix;
- ac_top_srcdir=$srcdir ;;
- *) # Relative path.
- ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
- ac_top_srcdir=$ac_top_builddir$srcdir ;;
-esac
-
-# Do not use `cd foo && pwd` to compute absolute paths, because
-# the directories may not exist.
-case `pwd` in
-.) ac_abs_builddir="$ac_dir";;
-*)
- case "$ac_dir" in
- .) ac_abs_builddir=`pwd`;;
- [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
- *) ac_abs_builddir=`pwd`/"$ac_dir";;
- esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_builddir=${ac_top_builddir}.;;
-*)
- case ${ac_top_builddir}. in
- .) ac_abs_top_builddir=$ac_abs_builddir;;
- [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
- *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
- esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_srcdir=$ac_srcdir;;
-*)
- case $ac_srcdir in
- .) ac_abs_srcdir=$ac_abs_builddir;;
- [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
- *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
- esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_srcdir=$ac_top_srcdir;;
-*)
- case $ac_top_srcdir in
- .) ac_abs_top_srcdir=$ac_abs_builddir;;
- [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
- *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
- esac;;
-esac
-
-
- case $INSTALL in
- [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
- *) ac_INSTALL=$ac_top_builddir$INSTALL ;;
- esac
-
- if test x"$ac_file" != x-; then
- { echo "$as_me:$LINENO: creating $ac_file" >&5
-echo "$as_me: creating $ac_file" >&6;}
- rm -f "$ac_file"
- fi
- # Let's still pretend it is `configure' which instantiates (i.e., don't
- # use $as_me), people would be surprised to read:
- # /* config.h. Generated by config.status. */
- if test x"$ac_file" = x-; then
- configure_input=
- else
- configure_input="$ac_file. "
- fi
- configure_input=$configure_input"Generated from `echo $ac_file_in |
- sed 's,.*/,,'` by configure."
-
- # First look for the input files in the build tree, otherwise in the
- # src tree.
- ac_file_inputs=`IFS=:
- for f in $ac_file_in; do
- case $f in
- -) echo $tmp/stdin ;;
- [\\/$]*)
- # Absolute (can't be DOS-style, as IFS=:)
- test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
- { (exit 1); exit 1; }; }
- echo "$f";;
- *) # Relative
- if test -f "$f"; then
- # Build tree
- echo "$f"
- elif test -f "$srcdir/$f"; then
- # Source tree
- echo "$srcdir/$f"
- else
- # /dev/null tree
- { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
- { (exit 1); exit 1; }; }
- fi;;
- esac
- done` || { (exit 1); exit 1; }
-_ACEOF
-cat >>$CONFIG_STATUS <<_ACEOF
- sed "$ac_vpsub
-$extrasub
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
-:t
-/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
-s,@configure_input@,$configure_input,;t t
-s,@srcdir@,$ac_srcdir,;t t
-s,@abs_srcdir@,$ac_abs_srcdir,;t t
-s,@top_srcdir@,$ac_top_srcdir,;t t
-s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t
-s,@builddir@,$ac_builddir,;t t
-s,@abs_builddir@,$ac_abs_builddir,;t t
-s,@top_builddir@,$ac_top_builddir,;t t
-s,@abs_top_builddir@,$ac_abs_top_builddir,;t t
-s,@INSTALL@,$ac_INSTALL,;t t
-" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
- rm -f $tmp/stdin
- if test x"$ac_file" != x-; then
- mv $tmp/out $ac_file
- else
- cat $tmp/out
- rm -f $tmp/out
- fi
-
-done
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
-
-#
-# CONFIG_HEADER section.
-#
-
-# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
-# NAME is the cpp macro being defined and VALUE is the value it is being given.
-#
-# ac_d sets the value in "#define NAME VALUE" lines.
-ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)'
-ac_dB='[ ].*$,\1#\2'
-ac_dC=' '
-ac_dD=',;t'
-# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
-ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
-ac_uB='$,\1#\2define\3'
-ac_uC=' '
-ac_uD=',;t'
-
-for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue
- # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
- case $ac_file in
- - | *:- | *:-:* ) # input from stdin
- cat >$tmp/stdin
- ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
- ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
- *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
- ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
- * ) ac_file_in=$ac_file.in ;;
- esac
-
- test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5
-echo "$as_me: creating $ac_file" >&6;}
-
- # First look for the input files in the build tree, otherwise in the
- # src tree.
- ac_file_inputs=`IFS=:
- for f in $ac_file_in; do
- case $f in
- -) echo $tmp/stdin ;;
- [\\/$]*)
- # Absolute (can't be DOS-style, as IFS=:)
- test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
- { (exit 1); exit 1; }; }
- # Do quote $f, to prevent DOS paths from being IFS'd.
- echo "$f";;
- *) # Relative
- if test -f "$f"; then
- # Build tree
- echo "$f"
- elif test -f "$srcdir/$f"; then
- # Source tree
- echo "$srcdir/$f"
- else
- # /dev/null tree
- { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
- { (exit 1); exit 1; }; }
- fi;;
- esac
- done` || { (exit 1); exit 1; }
- # Remove the trailing spaces.
- sed 's/[ ]*$//' $ac_file_inputs >$tmp/in
-
-_ACEOF
-
-# Transform confdefs.h into two sed scripts, `conftest.defines' and
-# `conftest.undefs', that substitutes the proper values into
-# config.h.in to produce config.h. The first handles `#define'
-# templates, and the second `#undef' templates.
-# And first: Protect against being on the right side of a sed subst in
-# config.status. Protect against being in an unquoted here document
-# in config.status.
-rm -f conftest.defines conftest.undefs
-# Using a here document instead of a string reduces the quoting nightmare.
-# Putting comments in sed scripts is not portable.
-#
-# `end' is used to avoid that the second main sed command (meant for
-# 0-ary CPP macros) applies to n-ary macro definitions.
-# See the Autoconf documentation for `clear'.
-cat >confdef2sed.sed <<\_ACEOF
-s/[\\&,]/\\&/g
-s,[\\$`],\\&,g
-t clear
-: clear
-s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp
-t end
-s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp
-: end
-_ACEOF
-# If some macros were called several times there might be several times
-# the same #defines, which is useless. Nevertheless, we may not want to
-# sort them, since we want the *last* AC-DEFINE to be honored.
-uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines
-sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs
-rm -f confdef2sed.sed
-
-# This sed command replaces #undef with comments. This is necessary, for
-# example, in the case of _POSIX_SOURCE, which is predefined and required
-# on some systems where configure will not decide to define it.
-cat >>conftest.undefs <<\_ACEOF
-s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */,
-_ACEOF
-
-# Break up conftest.defines because some shells have a limit on the size
-# of here documents, and old seds have small limits too (100 cmds).
-echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS
-echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS
-echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS
-echo ' :' >>$CONFIG_STATUS
-rm -f conftest.tail
-while grep . conftest.defines >/dev/null
-do
- # Write a limited-size here document to $tmp/defines.sed.
- echo ' cat >$tmp/defines.sed <<CEOF' >>$CONFIG_STATUS
- # Speed up: don't consider the non `#define' lines.
- echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS
- # Work around the forget-to-reset-the-flag bug.
- echo 't clr' >>$CONFIG_STATUS
- echo ': clr' >>$CONFIG_STATUS
- sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS
- echo 'CEOF
- sed -f $tmp/defines.sed $tmp/in >$tmp/out
- rm -f $tmp/in
- mv $tmp/out $tmp/in
-' >>$CONFIG_STATUS
- sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail
- rm -f conftest.defines
- mv conftest.tail conftest.defines
-done
-rm -f conftest.defines
-echo ' fi # grep' >>$CONFIG_STATUS
-echo >>$CONFIG_STATUS
-
-# Break up conftest.undefs because some shells have a limit on the size
-# of here documents, and old seds have small limits too (100 cmds).
-echo ' # Handle all the #undef templates' >>$CONFIG_STATUS
-rm -f conftest.tail
-while grep . conftest.undefs >/dev/null
-do
- # Write a limited-size here document to $tmp/undefs.sed.
- echo ' cat >$tmp/undefs.sed <<CEOF' >>$CONFIG_STATUS
- # Speed up: don't consider the non `#undef'
- echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS
- # Work around the forget-to-reset-the-flag bug.
- echo 't clr' >>$CONFIG_STATUS
- echo ': clr' >>$CONFIG_STATUS
- sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS
- echo 'CEOF
- sed -f $tmp/undefs.sed $tmp/in >$tmp/out
- rm -f $tmp/in
- mv $tmp/out $tmp/in
-' >>$CONFIG_STATUS
- sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail
- rm -f conftest.undefs
- mv conftest.tail conftest.undefs
-done
-rm -f conftest.undefs
-
-cat >>$CONFIG_STATUS <<\_ACEOF
- # Let's still pretend it is `configure' which instantiates (i.e., don't
- # use $as_me), people would be surprised to read:
- # /* config.h. Generated by config.status. */
- if test x"$ac_file" = x-; then
- echo "/* Generated by configure. */" >$tmp/config.h
- else
- echo "/* $ac_file. Generated by configure. */" >$tmp/config.h
- fi
- cat $tmp/in >>$tmp/config.h
- rm -f $tmp/in
- if test x"$ac_file" != x-; then
- if diff $ac_file $tmp/config.h >/dev/null 2>&1; then
- { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
-echo "$as_me: $ac_file is unchanged" >&6;}
- else
- ac_dir=`(dirname "$ac_file") 2>/dev/null ||
-$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$ac_file" : 'X\(//\)[^/]' \| \
- X"$ac_file" : 'X\(//\)$' \| \
- X"$ac_file" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$ac_file" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- { if $as_mkdir_p; then
- mkdir -p "$ac_dir"
- else
- as_dir="$ac_dir"
- as_dirs=
- while test ! -d "$as_dir"; do
- as_dirs="$as_dir $as_dirs"
- as_dir=`(dirname "$as_dir") 2>/dev/null ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- done
- test ! -n "$as_dirs" || mkdir $as_dirs
- fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
-echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
- { (exit 1); exit 1; }; }; }
-
- rm -f $ac_file
- mv $tmp/config.h $ac_file
- fi
- else
- cat $tmp/config.h
- rm -f $tmp/config.h
- fi
-# Compute $ac_file's index in $config_headers.
-_am_stamp_count=1
-for _am_header in $config_headers :; do
- case $_am_header in
- $ac_file | $ac_file:* )
- break ;;
- * )
- _am_stamp_count=`expr $_am_stamp_count + 1` ;;
- esac
-done
-echo "timestamp for $ac_file" >`(dirname $ac_file) 2>/dev/null ||
-$as_expr X$ac_file : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X$ac_file : 'X\(//\)[^/]' \| \
- X$ac_file : 'X\(//\)$' \| \
- X$ac_file : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X$ac_file |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`/stamp-h$_am_stamp_count
-done
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
-
-#
-# CONFIG_COMMANDS section.
-#
-for ac_file in : $CONFIG_COMMANDS; do test "x$ac_file" = x: && continue
- ac_dest=`echo "$ac_file" | sed 's,:.*,,'`
- ac_source=`echo "$ac_file" | sed 's,[^:]*:,,'`
- ac_dir=`(dirname "$ac_dest") 2>/dev/null ||
-$as_expr X"$ac_dest" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$ac_dest" : 'X\(//\)[^/]' \| \
- X"$ac_dest" : 'X\(//\)$' \| \
- X"$ac_dest" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$ac_dest" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- { if $as_mkdir_p; then
- mkdir -p "$ac_dir"
- else
- as_dir="$ac_dir"
- as_dirs=
- while test ! -d "$as_dir"; do
- as_dirs="$as_dir $as_dirs"
- as_dir=`(dirname "$as_dir") 2>/dev/null ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- done
- test ! -n "$as_dirs" || mkdir $as_dirs
- fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
-echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
- { (exit 1); exit 1; }; }; }
-
- ac_builddir=.
-
-if test "$ac_dir" != .; then
- ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
- # A "../" for each directory in $ac_dir_suffix.
- ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
-else
- ac_dir_suffix= ac_top_builddir=
-fi
-
-case $srcdir in
- .) # No --srcdir option. We are building in place.
- ac_srcdir=.
- if test -z "$ac_top_builddir"; then
- ac_top_srcdir=.
- else
- ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
- fi ;;
- [\\/]* | ?:[\\/]* ) # Absolute path.
- ac_srcdir=$srcdir$ac_dir_suffix;
- ac_top_srcdir=$srcdir ;;
- *) # Relative path.
- ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
- ac_top_srcdir=$ac_top_builddir$srcdir ;;
-esac
-
-# Do not use `cd foo && pwd` to compute absolute paths, because
-# the directories may not exist.
-case `pwd` in
-.) ac_abs_builddir="$ac_dir";;
-*)
- case "$ac_dir" in
- .) ac_abs_builddir=`pwd`;;
- [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
- *) ac_abs_builddir=`pwd`/"$ac_dir";;
- esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_builddir=${ac_top_builddir}.;;
-*)
- case ${ac_top_builddir}. in
- .) ac_abs_top_builddir=$ac_abs_builddir;;
- [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
- *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
- esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_srcdir=$ac_srcdir;;
-*)
- case $ac_srcdir in
- .) ac_abs_srcdir=$ac_abs_builddir;;
- [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
- *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
- esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_srcdir=$ac_top_srcdir;;
-*)
- case $ac_top_srcdir in
- .) ac_abs_top_srcdir=$ac_abs_builddir;;
- [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
- *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
- esac;;
-esac
-
-
- { echo "$as_me:$LINENO: executing $ac_dest commands" >&5
-echo "$as_me: executing $ac_dest commands" >&6;}
- case $ac_dest in
- depfiles ) test x"$AMDEP_TRUE" != x"" || for mf in $CONFIG_FILES; do
- # Strip MF so we end up with the name of the file.
- mf=`echo "$mf" | sed -e 's/:.*$//'`
- # Check whether this is an Automake generated Makefile or not.
- # We used to match only the files named `Makefile.in', but
- # some people rename them; so instead we look at the file content.
- # Grep'ing the first line is not enough: some people post-process
- # each Makefile.in and add a new line on top of each file to say so.
- # So let's grep whole file.
- if grep '^#.*generated by automake' $mf > /dev/null 2>&1; then
- dirpart=`(dirname "$mf") 2>/dev/null ||
-$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$mf" : 'X\(//\)[^/]' \| \
- X"$mf" : 'X\(//\)$' \| \
- X"$mf" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$mf" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- else
- continue
- fi
- # Extract the definition of DEPDIR, am__include, and am__quote
- # from the Makefile without running `make'.
- DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
- test -z "$DEPDIR" && continue
- am__include=`sed -n 's/^am__include = //p' < "$mf"`
- test -z "am__include" && continue
- am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
- # When using ansi2knr, U may be empty or an underscore; expand it
- U=`sed -n 's/^U = //p' < "$mf"`
- # Find all dependency output files, they are included files with
- # $(DEPDIR) in their names. We invoke sed twice because it is the
- # simplest approach to changing $(DEPDIR) to its actual value in the
- # expansion.
- for file in `sed -n "
- s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
- sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
- # Make sure the directory exists.
- test -f "$dirpart/$file" && continue
- fdir=`(dirname "$file") 2>/dev/null ||
-$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$file" : 'X\(//\)[^/]' \| \
- X"$file" : 'X\(//\)$' \| \
- X"$file" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$file" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- { if $as_mkdir_p; then
- mkdir -p $dirpart/$fdir
- else
- as_dir=$dirpart/$fdir
- as_dirs=
- while test ! -d "$as_dir"; do
- as_dirs="$as_dir $as_dirs"
- as_dir=`(dirname "$as_dir") 2>/dev/null ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- done
- test ! -n "$as_dirs" || mkdir $as_dirs
- fi || { { echo "$as_me:$LINENO: error: cannot create directory $dirpart/$fdir" >&5
-echo "$as_me: error: cannot create directory $dirpart/$fdir" >&2;}
- { (exit 1); exit 1; }; }; }
-
- # echo "creating $dirpart/$file"
- echo '# dummy' > "$dirpart/$file"
- done
-done
- ;;
- esac
-done
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-
-{ (exit 0); exit 0; }
-_ACEOF
-chmod +x $CONFIG_STATUS
-ac_clean_files=$ac_clean_files_save
-
-
-# configure is writing to config.log, and then calls config.status.
-# config.status does its own redirection, appending to config.log.
-# Unfortunately, on DOS this fails, as config.log is still kept open
-# by configure, so config.status won't be able to write to it; its
-# output is simply discarded. So we exec the FD to /dev/null,
-# effectively closing config.log, so it can be properly (re)opened and
-# appended to by config.status. When coming back to configure, we
-# need to make the FD available again.
-if test "$no_create" != yes; then
- ac_cs_success=:
- ac_config_status_args=
- test "$silent" = yes &&
- ac_config_status_args="$ac_config_status_args --quiet"
- exec 5>/dev/null
- $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
- exec 5>>config.log
- # Use ||, not &&, to avoid exiting from the if with $? = 1, which
- # would make configure fail if this is the last instruction.
- $ac_cs_success || { (exit 1); exit 1; }
-fi
-
diff --git a/moses-cmd/configure.in b/moses-cmd/configure.in
deleted file mode 100644
index ea1f8bd74..000000000
--- a/moses-cmd/configure.in
+++ /dev/null
@@ -1,129 +0,0 @@
-AC_INIT(src)
-
-AM_CONFIG_HEADER(config.h)
-AM_INIT_AUTOMAKE(moses, 0.1)
-
-AC_PROG_CXX
-AC_LANG_CPLUSPLUS
-AC_PROG_RANLIB
-#AM_PROG_LIBTOOL
-
-AC_ARG_WITH(srilm,
- [AC_HELP_STRING([--with-srilm=PATH], [(optional) path to SRI's LM toolkit])],
- [with_srilm=$withval],
- [with_srilm=no]
- )
-
-AC_ARG_WITH(boost,
- [AC_HELP_STRING([--with-boost=PATH], [path to BOOST libraries])],
- [with_boost=$withval],
- [with_boost=no]
- )
-
-AC_ARG_WITH(irstlm,
- [AC_HELP_STRING([--with-irstlm=PATH], [(optional) path to IRST's LM toolkit])],
- [with_irstlm=$withval],
- [with_irstlm=no]
- )
-
-AC_ARG_WITH(moses,
- [AC_HELP_STRING([--with-moses=PATH], [path to moses library])],
- [with_moses=$withval],
- [with_moses=no]
- )
-
-AC_ARG_ENABLE(profiling,
- [AC_HELP_STRING([--enable-profiling], [moses will dump profiling info])],
- [CPPFLAGS="$CPPFLAGS -pg"; LDFLAGS="$LDFLAGS -pg" ]
- )
-
-AC_ARG_ENABLE(optimization,
- [AC_HELP_STRING([--enable-optimization], [compile with -O3 flag])],
- [CPPFLAGS="$CPPFLAGS -O3"; LDFLAGS="$LDFLAGS -O3" ]
- )
-
-AC_ARG_ENABLE(mysql, AC_HELP_STRING([--enable-mysql], [(optional) build in MySQL support])],
- [mysql_flag=yes], [mysql_flag=no])
-
-if test "x$with_boost" != 'xno'
-then
- CPPFLAGS="$CPPFLAGS -I${with_boost} -I${with_boost}/include"
- LDFLAGS="$LDFLAGS -L${with_boost}/lib -L${with_boost}/stage/lib"
-fi
-
-if test "x$with_moses" != 'xno'
-then
- CPPFLAGS="$CPPFLAGS -I${with_moses}/src"
- LDFLAGS="$LDFLAGS -L${with_moses}/src"
-fi
-LIBS="$LIBS -lmoses"
-
-AC_CHECK_HEADER([Manager.h], [], [AC_MSG_ERROR([Cannot find moses headers! Use --with-moses=PATH])])
-AC_CHECK_HEADER([boost/algorithm/string.hpp], [], [AC_MSG_ERROR([Cannot find boost. Use --with-boost=PATH])])
-AC_CHECK_HEADER([boost/iostreams/filtering_stream.hpp], [], [AC_MSG_ERROR([Cannot find boost. Use --with-boost=PATH])])
-
-if test "$mysql_flag" = 'yes'
-then
- AC_MYSQLCLIENT(,,
- [AC_ERROR([Could not locate mysql client libraries. Try --with-mysql-prefix/-include/-lib])])
- LDFLAGS="$LDFLAGS $MYSQLCLIENT_LDFLAGS"
- CPPFLAGS="$CPPFLAGS $MYSQLCLIENT_CPPFLAGS"
- LIBS="$LIBS $MYSQLCLIENT_LIBS"
-
- MYSQLPP_DEVEL
- LIBS="$LIBS -lmysqlpp"
-
- AM_CONDITIONAL([BUILD_MYSQL_SUPPORT], true)
-else
- AM_CONDITIONAL([BUILD_MYSQL_SUPPORT], false)
-fi
-
-if test "x$with_srilm" != 'xno'
-then
- SAVE_CPPFLAGS="$CPPFLAGS"
- CPPFLAGS="$CPPFLAGS -I${with_srilm}/include"
-
- AC_CHECK_HEADER(Ngram.h,
- [AC_DEFINE([HAVE_SRILM], [], [flag for SRILM])],
- [AC_MSG_ERROR([Cannot find SRILM!])])
-
- LIB_SRILM="-loolm -ldstruct -lmisc"
- # ROOT/lib/i686-m64/liboolm.a
- # ROOT/lib/i686-m64/libdstruct.a
- # ROOT/lib/i686-m64/libmisc.a
- MY_ARCH=`${with_srilm}/sbin/machine-type`
- LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH}"
- LIBS="$LIBS $LIB_SRILM"
- FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
- AM_CONDITIONAL([SRI_LM], true)
- AM_CONDITIONAL([INTERNAL_LM], false)
- AM_CONDITIONAL([IRST_LM], false)
-else if test "x$with_irstlm" != 'xno'
-then
- SAVE_CPPFLAGS="$CPPFLAGS"
- CPPFLAGS="$CPPFLAGS -I${with_irstlm}/include"
-
- AC_CHECK_HEADER(n_gram.h,
- [AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])],
- [AC_MSG_ERROR([Cannot find IRST-LM!])])
-
- LIB_SRILM="-lirstlm"
- LDFLAGS="$LDFLAGS -L${with_irstlm}/lib"
- LIBS="$LIBS $LIB_SRILM"
- FMTLIBS="$FMTLIBS libirstlm.a"
- AM_CONDITIONAL([SRI_LM], false)
- AM_CONDITIONAL([INTERNAL_LM], false)
- AM_CONDITIONAL([IRST_LM], true)
-else
- echo "Using internal language model (use --with-srilm or --with-irstlm to change)!"
- AM_CONDITIONAL([SRI_LM], false)
- AM_CONDITIONAL([INTERNAL_LM], true)
- AM_CONDITIONAL([IRST_LM], false)
-fi
-fi
-
-LIBS="$LIBS -lboost_iostreams-gcc-mt -lboost_filesystem-gcc-mt -lboost_thread-gcc-mt -lz"
-FMT_LIBS="$FMT_LIBS libboost_iostreams.a libboost_filesystem.a libboost_thread.a"
-
-
-AC_OUTPUT(Makefile src/Makefile)
diff --git a/moses-cmd/depcomp b/moses-cmd/depcomp
deleted file mode 100755
index 4c20c6c94..000000000
--- a/moses-cmd/depcomp
+++ /dev/null
@@ -1,441 +0,0 @@
-#! /bin/sh
-
-# depcomp - compile a program generating dependencies as side-effects
-# Copyright 1999, 2000 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
-
-if test -z "$depmode" || test -z "$source" || test -z "$object"; then
- echo "depcomp: Variables source, object and depmode must be set" 1>&2
- exit 1
-fi
-# `libtool' can also be set to `yes' or `no'.
-
-depfile=${depfile-`echo "$object" | sed 's,\([^/]*\)$,.deps/\1,;s/\.\([^.]*\)$/.P\1/'`}
-tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
-
-rm -f "$tmpdepfile"
-
-# Some modes work just like other modes, but use different flags. We
-# parameterize here, but still list the modes in the big case below,
-# to make depend.m4 easier to write. Note that we *cannot* use a case
-# here, because this file can only contain one case statement.
-if test "$depmode" = hp; then
- # HP compiler uses -M and no extra arg.
- gccflag=-M
- depmode=gcc
-fi
-
-if test "$depmode" = dashXmstdout; then
- # This is just like dashmstdout with a different argument.
- dashmflag=-xM
- depmode=dashmstdout
-fi
-
-case "$depmode" in
-gcc3)
-## gcc 3 implements dependency tracking that does exactly what
-## we want. Yay! Note: for some reason libtool 1.4 doesn't like
-## it if -MD -MP comes after the -MF stuff. Hmm.
- "$@" -MT "$object" -MD -MP -MF "$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- mv "$tmpdepfile" "$depfile"
- ;;
-
-gcc)
-## There are various ways to get dependency output from gcc. Here's
-## why we pick this rather obscure method:
-## - Don't want to use -MD because we'd like the dependencies to end
-## up in a subdir. Having to rename by hand is ugly.
-## (We might end up doing this anyway to support other compilers.)
-## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
-## -MM, not -M (despite what the docs say).
-## - Using -M directly means running the compiler twice (even worse
-## than renaming).
- if test -z "$gccflag"; then
- gccflag=-MD,
- fi
- "$@" -Wp,"$gccflag$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
-## The second -e expression handles DOS-style file names with drive letters.
- sed -e 's/^[^:]*: / /' \
- -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
-## This next piece of magic avoids the `deleted header file' problem.
-## The problem is that when a header file which appears in a .P file
-## is deleted, the dependency causes make to die (because there is
-## typically no way to rebuild the header). We avoid this by adding
-## dummy dependencies for each header file. Too bad gcc doesn't do
-## this for us directly.
- tr ' ' '
-' < "$tmpdepfile" |
-## Some versions of gcc put a space before the `:'. On the theory
-## that the space means something, we add a space to the output as
-## well.
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-hp)
- # This case exists only to let depend.m4 do its work. It works by
- # looking at the text of this script. This case will never be run,
- # since it is checked for above.
- exit 1
- ;;
-
-sgi)
- if test "$libtool" = yes; then
- "$@" "-Wp,-MDupdate,$tmpdepfile"
- else
- "$@" -MDupdate "$tmpdepfile"
- fi
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
-
- if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
- echo "$object : \\" > "$depfile"
-
- # Clip off the initial element (the dependent). Don't try to be
- # clever and replace this with sed code, as IRIX sed won't handle
- # lines with more than a fixed number of characters (4096 in
- # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
- # the IRIX cc adds comments like `#:fec' to the end of the
- # dependency line.
- tr ' ' '
-' < "$tmpdepfile" \
- | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
- tr '
-' ' ' >> $depfile
- echo >> $depfile
-
- # The second pass generates a dummy entry for each header file.
- tr ' ' '
-' < "$tmpdepfile" \
- | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
- >> $depfile
- else
- # The sourcefile does not contain any dependencies, so just
- # store a dummy comment line, to avoid errors with the Makefile
- # "include basename.Plo" scheme.
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-aix)
- # The C for AIX Compiler uses -M and outputs the dependencies
- # in a .u file. This file always lives in the current directory.
- # Also, the AIX compiler puts `$object:' at the start of each line;
- # $object doesn't have directory information.
- stripped=`echo "$object" | sed -e 's,^.*/,,' -e 's/\(.*\)\..*$/\1/'`
- tmpdepfile="$stripped.u"
- outname="$stripped.o"
- if test "$libtool" = yes; then
- "$@" -Wc,-M
- else
- "$@" -M
- fi
-
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
-
- if test -f "$tmpdepfile"; then
- # Each line is of the form `foo.o: dependent.h'.
- # Do two passes, one to just change these to
- # `$object: dependent.h' and one to simply `dependent.h:'.
- sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile"
- sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile"
- else
- # The sourcefile does not contain any dependencies, so just
- # store a dummy comment line, to avoid errors with the Makefile
- # "include basename.Plo" scheme.
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-icc)
- # Must come before tru64.
-
- # Intel's C compiler understands `-MD -MF file'. However
- # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
- # will fill foo.d with something like
- # foo.o: sub/foo.c
- # foo.o: sub/foo.h
- # which is wrong. We want:
- # sub/foo.o: sub/foo.c
- # sub/foo.o: sub/foo.h
- # sub/foo.c:
- # sub/foo.h:
-
- "$@" -MD -MF "$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
- # Each line is of the form `foo.o: dependent.h'.
- # Do two passes, one to just change these to
- # `$object: dependent.h' and one to simply `dependent.h:'.
- sed -e "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
- sed -e "s,^[^:]*: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-tru64)
- # The Tru64 AIX compiler uses -MD to generate dependencies as a side
- # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
- # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
- # dependencies in `foo.d' instead, so we check for that too.
- # Subdirectories are respected.
-
- tmpdepfile1="$object.d"
- tmpdepfile2=`echo "$object" | sed -e 's/.o$/.d/'`
- if test "$libtool" = yes; then
- "$@" -Wc,-MD
- else
- "$@" -MD
- fi
-
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile1" "$tmpdepfile2"
- exit $stat
- fi
-
- if test -f "$tmpdepfile1"; then
- tmpdepfile="$tmpdepfile1"
- else
- tmpdepfile="$tmpdepfile2"
- fi
- if test -f "$tmpdepfile"; then
- sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
- # That's a space and a tab in the [].
- sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
- else
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-#nosideeffect)
- # This comment above is used by automake to tell side-effect
- # dependency tracking mechanisms from slower ones.
-
-dashmstdout)
- # Important note: in order to support this mode, a compiler *must*
- # always write the proprocessed file to stdout, regardless of -o,
- # because we must use -o when running libtool.
- test -z "$dashmflag" && dashmflag=-M
- ( IFS=" "
- case " $* " in
- *" --mode=compile "*) # this is libtool, let us make it quiet
- for arg
- do # cycle over the arguments
- case "$arg" in
- "--mode=compile")
- # insert --quiet before "--mode=compile"
- set fnord "$@" --quiet
- shift # fnord
- ;;
- esac
- set fnord "$@" "$arg"
- shift # fnord
- shift # "$arg"
- done
- ;;
- esac
- "$@" $dashmflag | sed 's:^[^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
- ) &
- proc=$!
- "$@"
- stat=$?
- wait "$proc"
- if test "$stat" != 0; then exit $stat; fi
- rm -f "$depfile"
- cat < "$tmpdepfile" > "$depfile"
- tr ' ' '
-' < "$tmpdepfile" | \
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-dashXmstdout)
- # This case only exists to satisfy depend.m4. It is never actually
- # run, as this mode is specially recognized in the preamble.
- exit 1
- ;;
-
-makedepend)
- # X makedepend
- (
- shift
- cleared=no
- for arg in "$@"; do
- case $cleared in no)
- set ""; shift
- cleared=yes
- esac
- case "$arg" in
- -D*|-I*)
- set fnord "$@" "$arg"; shift;;
- -*)
- ;;
- *)
- set fnord "$@" "$arg"; shift;;
- esac
- done
- obj_suffix="`echo $object | sed 's/^.*\././'`"
- touch "$tmpdepfile"
- ${MAKEDEPEND-makedepend} 2>/dev/null -o"$obj_suffix" -f"$tmpdepfile" "$@"
- ) &
- proc=$!
- "$@"
- stat=$?
- wait "$proc"
- if test "$stat" != 0; then exit $stat; fi
- rm -f "$depfile"
- cat < "$tmpdepfile" > "$depfile"
- tail +3 "$tmpdepfile" | tr ' ' '
-' | \
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile" "$tmpdepfile".bak
- ;;
-
-cpp)
- # Important note: in order to support this mode, a compiler *must*
- # always write the proprocessed file to stdout, regardless of -o,
- # because we must use -o when running libtool.
- ( IFS=" "
- case " $* " in
- *" --mode=compile "*)
- for arg
- do # cycle over the arguments
- case $arg in
- "--mode=compile")
- # insert --quiet before "--mode=compile"
- set fnord "$@" --quiet
- shift # fnord
- ;;
- esac
- set fnord "$@" "$arg"
- shift # fnord
- shift # "$arg"
- done
- ;;
- esac
- "$@" -E |
- sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
- sed '$ s: \\$::' > "$tmpdepfile"
- ) &
- proc=$!
- "$@"
- stat=$?
- wait "$proc"
- if test "$stat" != 0; then exit $stat; fi
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- cat < "$tmpdepfile" >> "$depfile"
- sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-msvisualcpp)
- # Important note: in order to support this mode, a compiler *must*
- # always write the proprocessed file to stdout, regardless of -o,
- # because we must use -o when running libtool.
- ( IFS=" "
- case " $* " in
- *" --mode=compile "*)
- for arg
- do # cycle over the arguments
- case $arg in
- "--mode=compile")
- # insert --quiet before "--mode=compile"
- set fnord "$@" --quiet
- shift # fnord
- ;;
- esac
- set fnord "$@" "$arg"
- shift # fnord
- shift # "$arg"
- done
- ;;
- esac
- "$@" -E |
- sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
- ) &
- proc=$!
- "$@"
- stat=$?
- wait "$proc"
- if test "$stat" != 0; then exit $stat; fi
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
- echo " " >> "$depfile"
- . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-none)
- exec "$@"
- ;;
-
-*)
- echo "Unknown depmode $depmode" 1>&2
- exit 1
- ;;
-esac
-
-exit 0
diff --git a/moses-cmd/install-sh b/moses-cmd/install-sh
deleted file mode 100755
index 36f96f3e0..000000000
--- a/moses-cmd/install-sh
+++ /dev/null
@@ -1,276 +0,0 @@
-#!/bin/sh
-#
-# install - install a program, script, or datafile
-# This comes from X11R5 (mit/util/scripts/install.sh).
-#
-# Copyright 1991 by the Massachusetts Institute of Technology
-#
-# Permission to use, copy, modify, distribute, and sell this software and its
-# documentation for any purpose is hereby granted without fee, provided that
-# the above copyright notice appear in all copies and that both that
-# copyright notice and this permission notice appear in supporting
-# documentation, and that the name of M.I.T. not be used in advertising or
-# publicity pertaining to distribution of the software without specific,
-# written prior permission. M.I.T. makes no representations about the
-# suitability of this software for any purpose. It is provided "as is"
-# without express or implied warranty.
-#
-# Calling this script install-sh is preferred over install.sh, to prevent
-# `make' implicit rules from creating a file called install from it
-# when there is no Makefile.
-#
-# This script is compatible with the BSD install script, but was written
-# from scratch. It can only install one file at a time, a restriction
-# shared with many OS's install programs.
-
-
-# set DOITPROG to echo to test this script
-
-# Don't use :- since 4.3BSD and earlier shells don't like it.
-doit="${DOITPROG-}"
-
-
-# put in absolute paths if you don't have them in your path; or use env. vars.
-
-mvprog="${MVPROG-mv}"
-cpprog="${CPPROG-cp}"
-chmodprog="${CHMODPROG-chmod}"
-chownprog="${CHOWNPROG-chown}"
-chgrpprog="${CHGRPPROG-chgrp}"
-stripprog="${STRIPPROG-strip}"
-rmprog="${RMPROG-rm}"
-mkdirprog="${MKDIRPROG-mkdir}"
-
-transformbasename=""
-transform_arg=""
-instcmd="$mvprog"
-chmodcmd="$chmodprog 0755"
-chowncmd=""
-chgrpcmd=""
-stripcmd=""
-rmcmd="$rmprog -f"
-mvcmd="$mvprog"
-src=""
-dst=""
-dir_arg=""
-
-while [ x"$1" != x ]; do
- case $1 in
- -c) instcmd=$cpprog
- shift
- continue;;
-
- -d) dir_arg=true
- shift
- continue;;
-
- -m) chmodcmd="$chmodprog $2"
- shift
- shift
- continue;;
-
- -o) chowncmd="$chownprog $2"
- shift
- shift
- continue;;
-
- -g) chgrpcmd="$chgrpprog $2"
- shift
- shift
- continue;;
-
- -s) stripcmd=$stripprog
- shift
- continue;;
-
- -t=*) transformarg=`echo $1 | sed 's/-t=//'`
- shift
- continue;;
-
- -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
- shift
- continue;;
-
- *) if [ x"$src" = x ]
- then
- src=$1
- else
- # this colon is to work around a 386BSD /bin/sh bug
- :
- dst=$1
- fi
- shift
- continue;;
- esac
-done
-
-if [ x"$src" = x ]
-then
- echo "$0: no input file specified" >&2
- exit 1
-else
- :
-fi
-
-if [ x"$dir_arg" != x ]; then
- dst=$src
- src=""
-
- if [ -d "$dst" ]; then
- instcmd=:
- chmodcmd=""
- else
- instcmd=$mkdirprog
- fi
-else
-
-# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
-# might cause directories to be created, which would be especially bad
-# if $src (and thus $dsttmp) contains '*'.
-
- if [ -f "$src" ] || [ -d "$src" ]
- then
- :
- else
- echo "$0: $src does not exist" >&2
- exit 1
- fi
-
- if [ x"$dst" = x ]
- then
- echo "$0: no destination specified" >&2
- exit 1
- else
- :
- fi
-
-# If destination is a directory, append the input filename; if your system
-# does not like double slashes in filenames, you may need to add some logic
-
- if [ -d "$dst" ]
- then
- dst=$dst/`basename "$src"`
- else
- :
- fi
-fi
-
-## this sed command emulates the dirname command
-dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
-
-# Make sure that the destination directory exists.
-# this part is taken from Noah Friedman's mkinstalldirs script
-
-# Skip lots of stat calls in the usual case.
-if [ ! -d "$dstdir" ]; then
-defaultIFS='
- '
-IFS="${IFS-$defaultIFS}"
-
-oIFS=$IFS
-# Some sh's can't handle IFS=/ for some reason.
-IFS='%'
-set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
-IFS=$oIFS
-
-pathcomp=''
-
-while [ $# -ne 0 ] ; do
- pathcomp=$pathcomp$1
- shift
-
- if [ ! -d "$pathcomp" ] ;
- then
- $mkdirprog "$pathcomp"
- else
- :
- fi
-
- pathcomp=$pathcomp/
-done
-fi
-
-if [ x"$dir_arg" != x ]
-then
- $doit $instcmd "$dst" &&
-
- if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dst"; else : ; fi &&
- if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dst"; else : ; fi &&
- if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dst"; else : ; fi &&
- if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dst"; else : ; fi
-else
-
-# If we're going to rename the final executable, determine the name now.
-
- if [ x"$transformarg" = x ]
- then
- dstfile=`basename "$dst"`
- else
- dstfile=`basename "$dst" $transformbasename |
- sed $transformarg`$transformbasename
- fi
-
-# don't allow the sed command to completely eliminate the filename
-
- if [ x"$dstfile" = x ]
- then
- dstfile=`basename "$dst"`
- else
- :
- fi
-
-# Make a couple of temp file names in the proper directory.
-
- dsttmp=$dstdir/#inst.$$#
- rmtmp=$dstdir/#rm.$$#
-
-# Trap to clean up temp files at exit.
-
- trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0
- trap '(exit $?); exit' 1 2 13 15
-
-# Move or copy the file name to the temp name
-
- $doit $instcmd "$src" "$dsttmp" &&
-
-# and set any options; do chmod last to preserve setuid bits
-
-# If any of these fail, we abort the whole thing. If we want to
-# ignore errors from any of these, just make sure not to ignore
-# errors from the above "$doit $instcmd $src $dsttmp" command.
-
- if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dsttmp"; else :;fi &&
- if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dsttmp"; else :;fi &&
- if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dsttmp"; else :;fi &&
- if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dsttmp"; else :;fi &&
-
-# Now remove or move aside any old file at destination location. We try this
-# two ways since rm can't unlink itself on some systems and the destination
-# file might be busy for other reasons. In this case, the final cleanup
-# might fail but the new file should still install successfully.
-
-{
- if [ -f "$dstdir/$dstfile" ]
- then
- $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null ||
- $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null ||
- {
- echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
- (exit 1); exit
- }
- else
- :
- fi
-} &&
-
-# Now rename the file to the real destination.
-
- $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
-
-fi &&
-
-# The final little trick to "correctly" pass the exit status to the exit trap.
-
-{
- (exit 0); exit
-}
diff --git a/moses-cmd/missing b/moses-cmd/missing
deleted file mode 100755
index 6a37006e8..000000000
--- a/moses-cmd/missing
+++ /dev/null
@@ -1,336 +0,0 @@
-#! /bin/sh
-# Common stub for a few missing GNU programs while installing.
-# Copyright (C) 1996, 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
-# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-if test $# -eq 0; then
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
-fi
-
-run=:
-
-# In the cases where this matters, `missing' is being run in the
-# srcdir already.
-if test -f configure.ac; then
- configure_ac=configure.ac
-else
- configure_ac=configure.in
-fi
-
-case "$1" in
---run)
- # Try to run requested program, and just exit if it succeeds.
- run=
- shift
- "$@" && exit 0
- ;;
-esac
-
-# If it does not exist, or fails to run (possibly an outdated version),
-# try to emulate it.
-case "$1" in
-
- -h|--h|--he|--hel|--help)
- echo "\
-$0 [OPTION]... PROGRAM [ARGUMENT]...
-
-Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
-error status if there is no known handling for PROGRAM.
-
-Options:
- -h, --help display this help and exit
- -v, --version output version information and exit
- --run try to run the given command, and emulate it if it fails
-
-Supported PROGRAM values:
- aclocal touch file \`aclocal.m4'
- autoconf touch file \`configure'
- autoheader touch file \`config.h.in'
- automake touch all \`Makefile.in' files
- bison create \`y.tab.[ch]', if possible, from existing .[ch]
- flex create \`lex.yy.c', if possible, from existing .c
- help2man touch the output file
- lex create \`lex.yy.c', if possible, from existing .c
- makeinfo touch the output file
- tar try tar, gnutar, gtar, then tar without non-portable flags
- yacc create \`y.tab.[ch]', if possible, from existing .[ch]"
- ;;
-
- -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
- echo "missing 0.4 - GNU automake"
- ;;
-
- -*)
- echo 1>&2 "$0: Unknown \`$1' option"
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
- ;;
-
- aclocal*)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- fi
-
- echo 1>&2 "\
-WARNING: \`$1' is missing on your system. You should only need it if
- you modified \`acinclude.m4' or \`${configure_ac}'. You might want
- to install the \`Automake' and \`Perl' packages. Grab them from
- any GNU archive site."
- touch aclocal.m4
- ;;
-
- autoconf)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- fi
-
- echo 1>&2 "\
-WARNING: \`$1' is missing on your system. You should only need it if
- you modified \`${configure_ac}'. You might want to install the
- \`Autoconf' and \`GNU m4' packages. Grab them from any GNU
- archive site."
- touch configure
- ;;
-
- autoheader)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- fi
-
- echo 1>&2 "\
-WARNING: \`$1' is missing on your system. You should only need it if
- you modified \`acconfig.h' or \`${configure_ac}'. You might want
- to install the \`Autoconf' and \`GNU m4' packages. Grab them
- from any GNU archive site."
- files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
- test -z "$files" && files="config.h"
- touch_files=
- for f in $files; do
- case "$f" in
- *:*) touch_files="$touch_files "`echo "$f" |
- sed -e 's/^[^:]*://' -e 's/:.*//'`;;
- *) touch_files="$touch_files $f.in";;
- esac
- done
- touch $touch_files
- ;;
-
- automake*)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- fi
-
- echo 1>&2 "\
-WARNING: \`$1' is missing on your system. You should only need it if
- you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
- You might want to install the \`Automake' and \`Perl' packages.
- Grab them from any GNU archive site."
- find . -type f -name Makefile.am -print |
- sed 's/\.am$/.in/' |
- while read f; do touch "$f"; done
- ;;
-
- autom4te)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- fi
-
- echo 1>&2 "\
-WARNING: \`$1' is needed, and you do not seem to have it handy on your
- system. You might have modified some files without having the
- proper tools for further handling them.
- You can get \`$1Help2man' as part of \`Autoconf' from any GNU
- archive site."
-
- file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'`
- test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'`
- if test -f "$file"; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo "#! /bin/sh"
- echo "# Created by GNU Automake missing as a replacement of"
- echo "# $ $@"
- echo "exit 0"
- chmod +x $file
- exit 1
- fi
- ;;
-
- bison|yacc)
- echo 1>&2 "\
-WARNING: \`$1' is missing on your system. You should only need it if
- you modified a \`.y' file. You may need the \`Bison' package
- in order for those modifications to take effect. You can get
- \`Bison' from any GNU archive site."
- rm -f y.tab.c y.tab.h
- if [ $# -ne 1 ]; then
- eval LASTARG="\${$#}"
- case "$LASTARG" in
- *.y)
- SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
- if [ -f "$SRCFILE" ]; then
- cp "$SRCFILE" y.tab.c
- fi
- SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
- if [ -f "$SRCFILE" ]; then
- cp "$SRCFILE" y.tab.h
- fi
- ;;
- esac
- fi
- if [ ! -f y.tab.h ]; then
- echo >y.tab.h
- fi
- if [ ! -f y.tab.c ]; then
- echo 'main() { return 0; }' >y.tab.c
- fi
- ;;
-
- lex|flex)
- echo 1>&2 "\
-WARNING: \`$1' is missing on your system. You should only need it if
- you modified a \`.l' file. You may need the \`Flex' package
- in order for those modifications to take effect. You can get
- \`Flex' from any GNU archive site."
- rm -f lex.yy.c
- if [ $# -ne 1 ]; then
- eval LASTARG="\${$#}"
- case "$LASTARG" in
- *.l)
- SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
- if [ -f "$SRCFILE" ]; then
- cp "$SRCFILE" lex.yy.c
- fi
- ;;
- esac
- fi
- if [ ! -f lex.yy.c ]; then
- echo 'main() { return 0; }' >lex.yy.c
- fi
- ;;
-
- help2man)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- fi
-
- echo 1>&2 "\
-WARNING: \`$1' is missing on your system. You should only need it if
- you modified a dependency of a manual page. You may need the
- \`Help2man' package in order for those modifications to take
- effect. You can get \`Help2man' from any GNU archive site."
-
- file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
- if test -z "$file"; then
- file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'`
- fi
- if [ -f "$file" ]; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo ".ab help2man is required to generate this page"
- exit 1
- fi
- ;;
-
- makeinfo)
- if test -z "$run" && (makeinfo --version) > /dev/null 2>&1; then
- # We have makeinfo, but it failed.
- exit 1
- fi
-
- echo 1>&2 "\
-WARNING: \`$1' is missing on your system. You should only need it if
- you modified a \`.texi' or \`.texinfo' file, or any other file
- indirectly affecting the aspect of the manual. The spurious
- call might also be the consequence of using a buggy \`make' (AIX,
- DU, IRIX). You might want to install the \`Texinfo' package or
- the \`GNU make' package. Grab either from any GNU archive site."
- file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
- if test -z "$file"; then
- file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
- file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file`
- fi
- touch $file
- ;;
-
- tar)
- shift
- if test -n "$run"; then
- echo 1>&2 "ERROR: \`tar' requires --run"
- exit 1
- fi
-
- # We have already tried tar in the generic part.
- # Look for gnutar/gtar before invocation to avoid ugly error
- # messages.
- if (gnutar --version > /dev/null 2>&1); then
- gnutar "$@" && exit 0
- fi
- if (gtar --version > /dev/null 2>&1); then
- gtar "$@" && exit 0
- fi
- firstarg="$1"
- if shift; then
- case "$firstarg" in
- *o*)
- firstarg=`echo "$firstarg" | sed s/o//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- case "$firstarg" in
- *h*)
- firstarg=`echo "$firstarg" | sed s/h//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- fi
-
- echo 1>&2 "\
-WARNING: I can't seem to be able to run \`tar' with the given arguments.
- You may want to install GNU tar or Free paxutils, or check the
- command line arguments."
- exit 1
- ;;
-
- *)
- echo 1>&2 "\
-WARNING: \`$1' is needed, and you do not seem to have it handy on your
- system. You might have modified some files without having the
- proper tools for further handling them. Check the \`README' file,
- it often tells you about the needed prerequirements for installing
- this package. You may also peek at any GNU archive site, in case
- some other package would contain this missing \`$1' program."
- exit 1
- ;;
-esac
-
-exit 0
diff --git a/moses-cmd/moses-cmd.vcproj b/moses-cmd/moses-cmd.vcproj
deleted file mode 100644
index 5467f59dd..000000000
--- a/moses-cmd/moses-cmd.vcproj
+++ /dev/null
@@ -1,231 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
- ProjectType="Visual C++"
- Version="8.00"
- Name="moses-cmd"
- ProjectGUID="{5BBAC701-84F5-438E-8F54-B3391F451F2A}"
- RootNamespace="mosescmd"
- Keyword="Win32Proj"
- >
- <Platforms>
- <Platform
- Name="Win32"
- />
- </Platforms>
- <ToolFiles>
- </ToolFiles>
- <Configurations>
- <Configuration
- Name="Debug|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="1"
- CharacterSet="1"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- AdditionalIncludeDirectories="&quot;$(SolutionDir)\src&quot;;&quot;$(SolutionDir)..\mysqlpp\lib&quot;;&quot;$(SolutionDir)..\irstlm\src&quot;"
- PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;LM_IRST;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- MinimalRebuild="true"
- BasicRuntimeChecks="3"
- RuntimeLibrary="3"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- Detect64BitPortabilityProblems="true"
- DebugInformationFormat="4"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- AdditionalDependencies="&quot;$(SolutionDir)\$(ConfigurationName)\moses.lib&quot; &quot;$(SolutionDir)\$(ConfigurationName)\irstlm.lib&quot; mysqlpp.lib libmysql.lib libboost_thread-vc80-mt-gd.lib"
- OutputFile="$(ProjectDir)\$(ProjectName).exe"
- LinkIncremental="2"
- GenerateDebugInformation="true"
- SubSystem="1"
- TargetMachine="1"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Release|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="1"
- CharacterSet="1"
- WholeProgramOptimization="1"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- AdditionalIncludeDirectories="&quot;$(SolutionDir)\src&quot;;&quot;$(SolutionDir)..\mysqlpp\lib&quot;;&quot;$(SolutionDir)..\irstlm\src&quot;"
- PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;LM_IRST;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="2"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- Detect64BitPortabilityProblems="true"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLinkerTool"
- AdditionalDependencies="&quot;$(SolutionDir)\$(ConfigurationName)\moses.lib&quot; &quot;$(SolutionDir)\$(ConfigurationName)\irstlm.lib&quot; mysqlpp.lib libmysql.lib libboost_thread-vc80-mt-gd.lib"
- OutputFile="$(ProjectDir)\$(ProjectName).exe"
- LinkIncremental="1"
- GenerateDebugInformation="true"
- SubSystem="1"
- OptimizeReferences="2"
- EnableCOMDATFolding="2"
- TargetMachine="1"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCManifestTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCAppVerifierTool"
- />
- <Tool
- Name="VCWebDeploymentTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- </Configurations>
- <References>
- </References>
- <Files>
- <Filter
- Name="Source Files"
- Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
- UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
- >
- <File
- RelativePath=".\src\IOCommandLine.cpp"
- >
- </File>
- <File
- RelativePath=".\src\IOFile.cpp"
- >
- </File>
- <File
- RelativePath=".\src\Main.cpp"
- >
- </File>
- <File
- RelativePath=".\src\TranslationAnalysis.cpp"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h;hpp;hxx;hm;inl;inc;xsd"
- UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
- >
- <File
- RelativePath=".\src\IOCommandLine.h"
- >
- </File>
- <File
- RelativePath=".\src\IOFile.h"
- >
- </File>
- <File
- RelativePath=".\src\Main.h"
- >
- </File>
- <File
- RelativePath=".\src\TranslationAnalysis.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
- UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
- >
- </Filter>
- </Files>
- <Globals>
- </Globals>
-</VisualStudioProject>
diff --git a/moses-cmd/regenerate-makefiles.sh b/moses-cmd/regenerate-makefiles.sh
deleted file mode 100755
index e217cbbd9..000000000
--- a/moses-cmd/regenerate-makefiles.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-
-echo "Calling autoconf..."
-autoconf
-echo "Calling automake..."
-automake
-
-echo
-echo "You should now be able to configure and build:"
-echo " env LDFLAGS=-static ./configure --with-boost=/home/ws06/cdyer/boost-stage --with-srilm=/home/ws06/cdyer/srilm --with-moses=\`pwd\`/../moses"
-echo " make -j 4"
-echo
diff --git a/moses-cmd/src/IOCommandLine.cpp b/moses-cmd/src/IOCommandLine.cpp
deleted file mode 100755
index 17c20739c..000000000
--- a/moses-cmd/src/IOCommandLine.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (c) 2006 University of Edinburgh
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- * Neither the name of the University of Edinburgh nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
-BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
-IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-
-// example file on how to use moses library
-
-#include <iostream>
-#include "TypeDef.h"
-#include "Util.h"
-#include "IOCommandLine.h"
-#include "Hypothesis.h"
-#include "WordsRange.h"
-#include "LatticePathList.h"
-#include "StaticData.h"
-#include "DummyScoreProducers.h"
-
-using namespace std;
-
-IOCommandLine::IOCommandLine(
- const vector<FactorType> &inputFactorOrder
- , const vector<FactorType> &outputFactorOrder
- , const FactorMask &inputFactorUsed
- , FactorCollection &factorCollection
- , size_t nBestSize
- , const string &nBestFilePath)
-:m_inputFactorOrder(inputFactorOrder)
-,m_outputFactorOrder(outputFactorOrder)
-,m_inputFactorUsed(inputFactorUsed)
-,m_factorCollection(factorCollection)
-{
- if (nBestSize > 0)
- {
- m_nBestFile.open(nBestFilePath.c_str());
- }
-}
-
-InputType*IOCommandLine::GetInput(InputType* in)
-{
- return InputOutput::GetInput(in,std::cin,m_inputFactorOrder, m_factorCollection);
-}
-
-/***
- * print surface factor only for the given phrase
- */
-void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
-{
- assert(outputFactorOrder.size() > 0);
- if (reportAllFactors == true)
- {
- out << phrase;
- }
- else
- {
- size_t size = phrase.GetSize();
- for (size_t pos = 0 ; pos < size ; pos++)
- {
- const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
- out << *factor;
-
- for (size_t i = 1 ; i < outputFactorOrder.size() ; i++)
- {
- const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
- out << "|" << *factor;
- }
- out << " ";
- }
- }
-}
-
-void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
- ,bool reportSourceSpan, bool reportAllFactors)
-{
- if ( hypo != NULL)
- {
- OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSourceSpan, reportAllFactors);
- OutputSurface(out, hypo->GetTargetPhrase(), outputFactorOrder, reportAllFactors);
-
- if (reportSourceSpan == true
- && hypo->GetTargetPhrase().GetSize() > 0) {
- out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos()
- << "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| ";
- }
- }
-}
-
-void IOCommandLine::Backtrack(const Hypothesis *hypo){
-
- if (hypo->GetPrevHypo() != NULL) {
- TRACE_ERR("["<< hypo ->m_id<<" => "<<hypo->GetPrevHypo()->m_id<<"]" <<endl);
- Backtrack(hypo->GetPrevHypo());
- }
-}
-
-void IOCommandLine::SetOutput(const Hypothesis *hypo, long /*translationId*/, bool reportSourceSpan, bool reportAllFactors)
-{
- if (hypo != NULL)
- {
- TRACE_ERR("BEST HYPO: " << *hypo << endl);
- TRACE_ERR(hypo->GetScoreBreakdown() << std::endl);
- Backtrack(hypo);
-
- OutputSurface(cout, hypo, m_outputFactorOrder, reportSourceSpan, reportAllFactors);
- }
- else
- {
- TRACE_ERR("NO BEST HYPO" << endl);
- }
-
- cout << endl;
-}
-
-void IOCommandLine::SetNBest(const LatticePathList &nBestList, long translationId)
-{
- bool labeledOutput = StaticData::Instance()->IsLabeledNBestList();
-
- LatticePathList::const_iterator iter;
- for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
- {
- const LatticePath &path = **iter;
- const std::vector<const Hypothesis *> &edges = path.GetEdges();
-
- // print the surface factor of the translation
- m_nBestFile << translationId << " ||| ";
- for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
- {
- const Hypothesis &edge = *edges[currEdge];
- OutputSurface(m_nBestFile, edge.GetTargetPhrase(), m_outputFactorOrder, false); // false for not reporting all factors
- }
- m_nBestFile << " ||| ";
-
- // print the scores in a hardwired order
- // before each model type, the corresponding command-line-like name must be emitted
- // MERT script relies on this
-
- // basic distortion
- if (labeledOutput)
- m_nBestFile << "d: ";
- m_nBestFile << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance()->GetDistortionScoreProducer()) << " ";
-
-// reordering
- vector<LexicalReordering*> rms = StaticData::Instance()->GetReorderModels();
- if(rms.size() > 0)
- {
- vector<LexicalReordering*>::iterator iter;
- for(iter = rms.begin(); iter != rms.end(); ++iter)
- {
- vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
- for (size_t j = 0; j<scores.size(); ++j)
- {
- m_nBestFile << scores[j] << " ";
- }
- }
- }
-
- // lm
- const LMList& lml = StaticData::Instance()->GetAllLM();
- if (lml.size() > 0) {
- if (labeledOutput)
- m_nBestFile << "lm: ";
- LMList::const_iterator lmi = lml.begin();
- for (; lmi != lml.end(); ++lmi) {
- m_nBestFile << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " ";
- }
- }
-
- // translation components
- vector<PhraseDictionaryBase*> pds = StaticData::Instance()->GetPhraseDictionaries();
- if (pds.size() > 0) {
- if (labeledOutput)
- m_nBestFile << "tm: ";
- vector<PhraseDictionaryBase*>::iterator iter;
- for (iter = pds.begin(); iter != pds.end(); ++iter) {
- vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
- for (size_t j = 0; j<scores.size(); ++j)
- m_nBestFile << scores[j] << " ";
-
- }
- }
-
- // word penalty
- if (labeledOutput)
- m_nBestFile << "w: ";
- m_nBestFile << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance()->GetWordPenaltyProducer()) << " ";
-
- // generation
- vector<GenerationDictionary*> gds = StaticData::Instance()->GetGenerationDictionaries();
- if (gds.size() > 0) {
- if (labeledOutput)
- m_nBestFile << "g: ";
- vector<GenerationDictionary*>::iterator iter;
- for (iter = gds.begin(); iter != gds.end(); ++iter) {
- vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
- for (size_t j = 0; j<scores.size(); j++) {
- m_nBestFile << scores[j] << " ";
- }
- }
- }
-
- // total
- m_nBestFile << "||| " << path.GetTotalScore() << endl;
- }
-
- m_nBestFile<<std::flush;
-}
diff --git a/moses-cmd/src/IOCommandLine.h b/moses-cmd/src/IOCommandLine.h
deleted file mode 100755
index 9cc8c3c1c..000000000
--- a/moses-cmd/src/IOCommandLine.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (c) 2006 University of Edinburgh
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- * Neither the name of the University of Edinburgh nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
-BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
-IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-
-// example file on how to use moses library
-
-#pragma once
-
-#include <fstream>
-#include <vector>
-#include <boost/algorithm/string.hpp>
-#include "TypeDef.h"
-#include "InputOutput.h"
-#include "Sentence.h"
-
-class FactorMask;
-class FactorCollection;
-
-class IOCommandLine : public InputOutput
-{
-protected:
- const std::vector<FactorType> &m_inputFactorOrder;
- const std::vector<FactorType> &m_outputFactorOrder;
- const FactorMask &m_inputFactorUsed;
- FactorCollection &m_factorCollection;
- std::ofstream m_nBestFile;
- /***
- * if false, print all factors for best hypotheses (useful for error analysis)
- */
- bool m_printSurfaceOnly;
-
-public:
- IOCommandLine(const std::vector<FactorType> &inputFactorOrder
- , const std::vector<FactorType> &outputFactorOrder
- , const FactorMask &inputFactorUsed
- , FactorCollection &factorCollection
- , size_t nBestSize
- , const std::string &nBestFilePath);
-
- InputType* GetInput(InputType*);
- void SetOutput(const Hypothesis *hypo, long translationId, bool reportSourceSpan, bool reportAllFactors);
- void SetNBest(const LatticePathList &nBestList, long translationId);
- void Backtrack(const Hypothesis *hypo);
-};
-
-#if 0
-// help fn
-inline Sentence *GetInput(std::istream &inputStream
- , const std::vector<FactorType> &factorOrder
- , FactorCollection &factorCollection)
-{
-
- return dynamic_cast<Sentence*>(GetInput(new Sentence(Input),inputStream,factorOrder,factorCollection));
-#if 0
- Sentence *rv=new Sentence(Input);
- if(rv->Read(inputStream,factorOrder,factorCollection))
- return rv;
- else {delete rv; return 0;}
-#endif
-}
-
-#endif
diff --git a/moses-cmd/src/IOFile.cpp b/moses-cmd/src/IOFile.cpp
deleted file mode 100755
index 805f6fc1e..000000000
--- a/moses-cmd/src/IOFile.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (c) 2006 University of Edinburgh
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- * Neither the name of the University of Edinburgh nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
-BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
-IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-
-// example file on how to use moses library
-
-#include "IOFile.h"
-#include "Sentence.h"
-using namespace std;
-
-IOFile::IOFile(const std::vector<FactorType> &inputFactorOrder
- , const std::vector<FactorType> &outputFactorOrder
- , const FactorMask &inputFactorUsed
- , FactorCollection &factorCollection
- , size_t nBestSize
- , const std::string &nBestFilePath
- , const std::string &inputFilePath)
-:IOCommandLine(inputFactorOrder, outputFactorOrder, inputFactorUsed, factorCollection, nBestSize, nBestFilePath)
-,m_inputFilePath(inputFilePath)
-,m_inputFile(inputFilePath)
-{
-}
-
-InputType* IOFile::GetInput(InputType* in)
-{
- return InputOutput::GetInput(in,m_inputFile, m_inputFactorOrder, m_factorCollection);
-}
-
-void IOFile::GetInputPhrase(std::list<Phrase> &inputPhraseList)
-{
- ifstream inputFile(m_inputFilePath.c_str());
- while(Sentence *sentence=dynamic_cast<Sentence*>(InputOutput::GetInput(new Sentence(Input),inputFile, m_inputFactorOrder, m_factorCollection)))
- {
- inputPhraseList.push_back(*sentence);
- Release(sentence);
- }
-}
-
diff --git a/moses-cmd/src/IOFile.h b/moses-cmd/src/IOFile.h
deleted file mode 100755
index 43b6b0ba2..000000000
--- a/moses-cmd/src/IOFile.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (c) 2006 University of Edinburgh
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- * Neither the name of the University of Edinburgh nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
-BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
-IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-
-// example file on how to use moses library
-
-#pragma once
-
-#include <fstream>
-#include <vector>
-#include "TypeDef.h"
-#include "IOCommandLine.h"
-#include "InputFileStream.h"
-
-class IOFile : public IOCommandLine
-{
-protected:
- std::string m_inputFilePath;
- InputFileStream m_inputFile;
-public:
- IOFile(const std::vector<FactorType> &inputFactorOrder
- , const std::vector<FactorType> &outputFactorOrder
- , const FactorMask &inputFactorUsed
- , FactorCollection &factorCollection
- , size_t nBestSize
- , const std::string &nBestFilePath
- , const std::string &inputFilePath);
-
- InputType *GetInput(InputType*);
- void GetInputPhrase(std::list<Phrase> &inputPhraseList);
-};
-
diff --git a/moses-cmd/src/Main.cpp b/moses-cmd/src/Main.cpp
deleted file mode 100644
index 2858909bb..000000000
--- a/moses-cmd/src/Main.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (c) 2006 University of Edinburgh
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- * Neither the name of the University of Edinburgh nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
-BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
-IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-
-// example file on how to use moses library
-
-#ifdef WIN32
-// Include Visual Leak Detector
-#include <vld.h>
-#endif
-
-#include <fstream>
-#include "Main.h"
-#include "LatticePath.h"
-#include "FactorCollection.h"
-#include "Manager.h"
-#include "Phrase.h"
-#include "Util.h"
-#include "LatticePathList.h"
-#include "Timer.h"
-#include "IOCommandLine.h"
-#include "IOFile.h"
-#include "Sentence.h"
-#include "ConfusionNet.h"
-#include "TranslationAnalysis.h"
-
-#if HAVE_CONFIG_H
-#include "config.h"
-#else
-// those not using autoconf have to build MySQL support for now
-# define USE_MYSQL 1
-#endif
-
-using namespace std;
-Timer timer;
-
-
-bool readInput(InputOutput *inputOutput, int inputType, InputType*& source)
-{
- delete source;
- source=inputOutput->GetInput((inputType ?
- static_cast<InputType*>(new ConfusionNet) :
- static_cast<InputType*>(new Sentence(Input))));
- return (source ? true : false);
-}
-
-
-int main(int argc, char* argv[])
-{
- // Welcome message
- TRACE_ERR( "Moses (built on " << __DATE__ << ")" << endl );
- TRACE_ERR( "a beam search decoder for phrase-based statistical machine translation models" << endl );
- TRACE_ERR( "written by Hieu Hoang, with contributions by Nicola Bertoldi, Ondrej Bojar," << endl <<
- "Chris Callison-Burch, Alexandra Constantin, Brooke Cowan, Chris Dyer, Marcello Federico," << endl <<
- "Evan Herbst, Philipp Koehn, Christine Moran, Wade Shen, and Richard Zens." << endl);
- TRACE_ERR( "(c) 2006 University of Edinburgh, Scotland" << endl );
- TRACE_ERR( "command: " );
- for(int i=0;i<argc;++i) TRACE_ERR( argv[i]<<" " );
- TRACE_ERR(endl);
-
- // load data structures
- timer.start("Starting...");
- StaticData staticData;
- if (!staticData.LoadParameters(argc, argv))
- return EXIT_FAILURE;
-
- // set up read/writing class
- InputOutput *inputOutput = GetInputOutput(staticData);
-
- std::cerr << "The score component vector looks like this:\n" << staticData.GetScoreIndexManager();
- std::cerr << "The global weight vector looks like this:\n";
- vector<float> weights = staticData.GetAllWeights();
- std::cerr << weights[0];
- for (size_t j=1; j<weights.size(); j++) { std::cerr << ", " << weights[j]; }
- std::cerr << "\n";
- // every score must have a weight! check that here:
- assert(weights.size() == staticData.GetScoreIndexManager().GetTotalNumberOfScores());
-
- if (inputOutput == NULL)
- return EXIT_FAILURE;
-
- // read each sentence & decode
- InputType *source=0;
- size_t lineCount = 0;
- while(readInput(inputOutput,staticData.GetInputType(),source))
- {
- // note: source is only valid within this while loop!
-
- TRACE_ERR("\nTRANSLATING(" << ++lineCount << "): " << *source <<endl);
-
- staticData.InitializeBeforeSentenceProcessing(*source);
- Manager manager(*source, staticData);
- manager.ProcessSentence();
- inputOutput->SetOutput(manager.GetBestHypothesis(), source->GetTranslationId(),
- staticData.GetReportSourceSpan(),
- staticData.GetReportAllFactors()
- );
-
- // n-best
- size_t nBestSize = staticData.GetNBestSize();
- if (nBestSize > 0)
- {
- TRACE_ERR("WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
- LatticePathList nBestList;
- manager.CalcNBest(nBestSize, nBestList,staticData.OnlyDistinctNBest());
- inputOutput->SetNBest(nBestList, source->GetTranslationId());
- RemoveAllInColl(nBestList);
- }
-
- if (staticData.IsDetailedTranslationReportingEnabled()) {
- TranslationAnalysis::PrintTranslationAnalysis(std::cerr, manager.GetBestHypothesis());
- }
-
- manager.CalcDecoderStatistics(staticData);
- staticData.CleanUpAfterSentenceProcessing();
- }
-
- delete inputOutput;
-
- timer.check("End.");
- return EXIT_SUCCESS;
-}
-
-InputOutput *GetInputOutput(StaticData &staticData)
-{
- InputOutput *inputOutput;
- const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
- ,&outputFactorOrder = staticData.GetOutputFactorOrder();
- FactorMask inputFactorUsed(inputFactorOrder);
-
- // io
- if (staticData.GetIOMethod() == IOMethodFile)
- {
- TRACE_ERR("IO from File" << endl);
- string inputFileHash;
- list< Phrase > inputPhraseList;
- string filePath = staticData.GetParam("input-file")[0];
-
- TRACE_ERR("About to create ioFile" << endl);
- IOFile *ioFile = new IOFile(inputFactorOrder, outputFactorOrder, inputFactorUsed
- , staticData.GetFactorCollection()
- , staticData.GetNBestSize()
- , staticData.GetNBestFilePath()
- , filePath);
- if(staticData.GetInputType())
- {
- TRACE_ERR("Do not read input phrases for confusion net translation\n");
- }
- else
- {
- TRACE_ERR("About to GetInputPhrase\n");
- ioFile->GetInputPhrase(inputPhraseList);
- }
- TRACE_ERR("After GetInputPhrase" << endl);
- inputOutput = ioFile;
- inputFileHash = GetMD5Hash(filePath);
- TRACE_ERR("About to LoadPhraseTables" << endl);
- staticData.LoadPhraseTables(true, inputFileHash, inputPhraseList);
- ioFile->ResetSentenceId();
- }
- else
- {
- TRACE_ERR("IO from STDOUT/STDIN" << endl);
- inputOutput = new IOCommandLine(inputFactorOrder, outputFactorOrder, inputFactorUsed
- , staticData.GetFactorCollection()
- , staticData.GetNBestSize()
- , staticData.GetNBestFilePath());
- staticData.LoadPhraseTables();
- }
- staticData.LoadMapping();
- timer.check("Created input-output object");
-
- return inputOutput;
-}
diff --git a/moses-cmd/src/Main.h b/moses-cmd/src/Main.h
deleted file mode 100644
index 1c3628533..000000000
--- a/moses-cmd/src/Main.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (c) 2006 University of Edinburgh
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- * Neither the name of the University of Edinburgh nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
-BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
-IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-***********************************************************************/
-
-// example file on how to use moses library
-
-#pragma once
-
-#include "StaticData.h"
-
-class InputOutput;
-
-int main(int argc, char* argv[]);
-InputOutput *GetInputOutput(StaticData &staticData);
diff --git a/moses-cmd/src/Makefile b/moses-cmd/src/Makefile
deleted file mode 100644
index f0438801a..000000000
--- a/moses-cmd/src/Makefile
+++ /dev/null
@@ -1,419 +0,0 @@
-# Makefile.in generated by automake 1.9.2 from Makefile.am.
-# src/Makefile. Generated from Makefile.in by configure.
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004 Free Software Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-
-
-SOURCES = $(moses_SOURCES)
-
-srcdir = .
-top_srcdir = ..
-
-pkgdatadir = $(datadir)/moses
-pkglibdir = $(libdir)/moses
-pkgincludedir = $(includedir)/moses
-top_builddir = ..
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-INSTALL = /usr/bin/install -c
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-bin_PROGRAMS = moses$(EXEEXT)
-#am__append_1 = IOMySQL.cpp ThreadMySQL.cpp
-subdir = src
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/configure.in
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-am__installdirs = "$(DESTDIR)$(bindir)"
-binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
-PROGRAMS = $(bin_PROGRAMS)
-am__moses_SOURCES_DIST = Main.cpp IOCommandLine.cpp IOFile.cpp \
- TranslationAnalysis.cpp IOMySQL.cpp ThreadMySQL.cpp
-#am__objects_1 = IOMySQL.$(OBJEXT) \
-# ThreadMySQL.$(OBJEXT)
-am_moses_OBJECTS = Main.$(OBJEXT) IOCommandLine.$(OBJEXT) \
- IOFile.$(OBJEXT) TranslationAnalysis.$(OBJEXT) \
- $(am__objects_1)
-moses_OBJECTS = $(am_moses_OBJECTS)
-DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
-depcomp = $(SHELL) $(top_srcdir)/depcomp
-am__depfiles_maybe = depfiles
-CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-CXXLD = $(CXX)
-CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
- -o $@
-SOURCES = $(moses_SOURCES)
-DIST_SOURCES = $(am__moses_SOURCES_DIST)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = ${SHELL} /home/ws06/rzens/workspace/Aug11/moses-cmd/missing --run aclocal-1.9
-AMDEP_FALSE = #
-AMDEP_TRUE =
-AMTAR = ${SHELL} /home/ws06/rzens/workspace/Aug11/moses-cmd/missing --run tar
-AUTOCONF = ${SHELL} /home/ws06/rzens/workspace/Aug11/moses-cmd/missing --run autoconf
-AUTOHEADER = ${SHELL} /home/ws06/rzens/workspace/Aug11/moses-cmd/missing --run autoheader
-AUTOMAKE = ${SHELL} /home/ws06/rzens/workspace/Aug11/moses-cmd/missing --run automake-1.9
-AWK = gawk
-BUILD_MYSQL_SUPPORT_FALSE =
-BUILD_MYSQL_SUPPORT_TRUE = #
-CPPFLAGS = -I/home/ws06/cdyer/boost-stage -I/home/ws06/cdyer/boost-stage/include -I/home/ws06/rzens/workspace/Aug11/moses/src -I/home/ws06/cdyer/srilm/include
-CXX = g++
-CXXCPP = g++ -E
-CXXDEPMODE = depmode=gcc3
-CXXFLAGS = -O3 -DNDEBUG
-CYGPATH_W = echo
-DEFS = -DHAVE_CONFIG_H
-DEPDIR = .deps
-ECHO_C =
-ECHO_N = -n
-ECHO_T =
-EGREP = grep -E
-EXEEXT =
-INSTALL_DATA = ${INSTALL} -m 644
-INSTALL_PROGRAM = ${INSTALL}
-INSTALL_SCRIPT = ${INSTALL}
-INSTALL_STRIP_PROGRAM = ${SHELL} $(install_sh) -c -s
-INTERNAL_LM_FALSE =
-INTERNAL_LM_TRUE = #
-IRST_LM_FALSE =
-IRST_LM_TRUE = #
-LDFLAGS = -static -L/home/ws06/cdyer/boost-stage/lib -L/home/ws06/cdyer/boost-stage/stage/lib -L/home/ws06/rzens/workspace/Aug11/moses/src -L/home/ws06/cdyer/srilm/lib/i686
-LIBOBJS =
-LIBS = -lmoses -loolm -ldstruct -lmisc -lboost_iostreams-gcc-mt -lboost_filesystem-gcc-mt -lboost_thread-gcc-mt -lz
-LTLIBOBJS =
-MAKEINFO = ${SHELL} /home/ws06/rzens/workspace/Aug11/moses-cmd/missing --run makeinfo
-MYSQLCLIENT_CPPFLAGS =
-MYSQLCLIENT_LDFLAGS =
-MYSQLCLIENT_LIBS =
-OBJEXT = o
-PACKAGE = moses
-PACKAGE_BUGREPORT =
-PACKAGE_NAME =
-PACKAGE_STRING =
-PACKAGE_TARNAME =
-PACKAGE_VERSION =
-PATH_SEPARATOR = :
-RANLIB = ranlib
-SET_MAKE =
-SHELL = /bin/sh
-SRI_LM_FALSE = #
-SRI_LM_TRUE =
-STRIP =
-VERSION = 0.1
-ac_ct_CXX = g++
-ac_ct_RANLIB = ranlib
-ac_ct_STRIP =
-am__fastdepCXX_FALSE = #
-am__fastdepCXX_TRUE =
-am__include = include
-am__leading_dot = .
-am__quote =
-am__tar = ${AMTAR} chof - "$$tardir"
-am__untar = ${AMTAR} xf -
-bindir = ${exec_prefix}/bin
-build_alias =
-datadir = ${prefix}/share
-exec_prefix = ${prefix}
-host_alias =
-includedir = ${prefix}/include
-infodir = ${prefix}/info
-install_sh = /home/ws06/rzens/workspace/Aug11/moses-cmd/install-sh
-libdir = ${exec_prefix}/lib
-libexecdir = ${exec_prefix}/libexec
-localstatedir = ${prefix}/var
-mandir = ${prefix}/man
-mkdir_p = mkdir -p --
-oldincludedir = /usr/include
-prefix = /usr/local
-program_transform_name = s,x,x,
-sbindir = ${exec_prefix}/sbin
-sharedstatedir = ${prefix}/com
-sysconfdir = ${prefix}/etc
-target_alias =
-moses_SOURCES = Main.cpp IOCommandLine.cpp IOFile.cpp \
- TranslationAnalysis.cpp $(am__append_1)
-AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES
-moses_LDADD = -lmoses
-moses_DEPENDENCIES = $(top_srcdir)/../moses/src/libmoses.a
-all: all-am
-
-.SUFFIXES:
-.SUFFIXES: .cpp .o .obj
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
- && exit 0; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \
- cd $(top_srcdir) && \
- $(AUTOMAKE) --foreign src/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-install-binPROGRAMS: $(bin_PROGRAMS)
- @$(NORMAL_INSTALL)
- test -z "$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)"
- @list='$(bin_PROGRAMS)'; for p in $$list; do \
- p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
- if test -f $$p \
- ; then \
- f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
- echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
- $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
- else :; fi; \
- done
-
-uninstall-binPROGRAMS:
- @$(NORMAL_UNINSTALL)
- @list='$(bin_PROGRAMS)'; for p in $$list; do \
- f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
- echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
- rm -f "$(DESTDIR)$(bindir)/$$f"; \
- done
-
-clean-binPROGRAMS:
- -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
-moses$(EXEEXT): $(moses_OBJECTS) $(moses_DEPENDENCIES)
- @rm -f moses$(EXEEXT)
- $(CXXLINK) $(moses_LDFLAGS) $(moses_OBJECTS) $(moses_LDADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-
-include ./$(DEPDIR)/IOCommandLine.Po
-include ./$(DEPDIR)/IOFile.Po
-include ./$(DEPDIR)/IOMySQL.Po
-include ./$(DEPDIR)/Main.Po
-include ./$(DEPDIR)/ThreadMySQL.Po
-include ./$(DEPDIR)/TranslationAnalysis.Po
-
-.cpp.o:
- if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
- then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
-# source='$<' object='$@' libtool=no \
-# DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \
-# $(CXXCOMPILE) -c -o $@ $<
-
-.cpp.obj:
- if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \
- then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
-# source='$<' object='$@' libtool=no \
-# DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \
-# $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-uninstall-info-am:
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) ' { files[$$0] = 1; } \
- END { for (i in files) print i; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) ' { files[$$0] = 1; } \
- END { for (i in files) print i; }'`; \
- if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$tags $$unique; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) ' { files[$$0] = 1; } \
- END { for (i in files) print i; }'`; \
- test -z "$(CTAGS_ARGS)$$tags$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$tags $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && cd $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) $$here
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
- list='$(DISTFILES)'; for file in $$list; do \
- case $$file in \
- $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
- $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
- esac; \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test "$$dir" != "$$file" && test "$$dir" != "."; then \
- dir="/$$dir"; \
- $(mkdir_p) "$(distdir)$$dir"; \
- else \
- dir=''; \
- fi; \
- if test -d $$d/$$file; then \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
- fi; \
- cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
- else \
- test -f $(distdir)/$$file \
- || cp -p $$d/$$file $(distdir)/$$file \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(PROGRAMS)
-installdirs:
- for dir in "$(DESTDIR)$(bindir)"; do \
- test -z "$$dir" || $(mkdir_p) "$$dir"; \
- done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
-
-distclean: distclean-am
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-exec-am: install-binPROGRAMS
-
-install-info: install-info-am
-
-install-man:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-binPROGRAMS uninstall-info-am
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
- clean-generic ctags distclean distclean-compile \
- distclean-generic distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-binPROGRAMS \
- install-data install-data-am install-exec install-exec-am \
- install-info install-info-am install-man install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
- uninstall-am uninstall-binPROGRAMS uninstall-info-am
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/moses-cmd/src/Makefile.am b/moses-cmd/src/Makefile.am
deleted file mode 100644
index 24e2f0517..000000000
--- a/moses-cmd/src/Makefile.am
+++ /dev/null
@@ -1,11 +0,0 @@
-bin_PROGRAMS = moses
-moses_SOURCES = Main.cpp IOCommandLine.cpp IOFile.cpp TranslationAnalysis.cpp
-AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES
-
-moses_LDADD = -lmoses
-moses_DEPENDENCIES = $(top_srcdir)/../moses/src/libmoses.a
-
-if BUILD_MYSQL_SUPPORT
-moses_SOURCES += IOMySQL.cpp ThreadMySQL.cpp
-endif
-
diff --git a/moses-cmd/src/Makefile.in b/moses-cmd/src/Makefile.in
deleted file mode 100644
index 01aaa9e2e..000000000
--- a/moses-cmd/src/Makefile.in
+++ /dev/null
@@ -1,419 +0,0 @@
-# Makefile.in generated by automake 1.9.2 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004 Free Software Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-SOURCES = $(moses_SOURCES)
-
-srcdir = @srcdir@
-top_srcdir = @top_srcdir@
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-top_builddir = ..
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-INSTALL = @INSTALL@
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-bin_PROGRAMS = moses$(EXEEXT)
-@BUILD_MYSQL_SUPPORT_TRUE@am__append_1 = IOMySQL.cpp ThreadMySQL.cpp
-subdir = src
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/configure.in
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-am__installdirs = "$(DESTDIR)$(bindir)"
-binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
-PROGRAMS = $(bin_PROGRAMS)
-am__moses_SOURCES_DIST = Main.cpp IOCommandLine.cpp IOFile.cpp \
- TranslationAnalysis.cpp IOMySQL.cpp ThreadMySQL.cpp
-@BUILD_MYSQL_SUPPORT_TRUE@am__objects_1 = IOMySQL.$(OBJEXT) \
-@BUILD_MYSQL_SUPPORT_TRUE@ ThreadMySQL.$(OBJEXT)
-am_moses_OBJECTS = Main.$(OBJEXT) IOCommandLine.$(OBJEXT) \
- IOFile.$(OBJEXT) TranslationAnalysis.$(OBJEXT) \
- $(am__objects_1)
-moses_OBJECTS = $(am_moses_OBJECTS)
-DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
-depcomp = $(SHELL) $(top_srcdir)/depcomp
-am__depfiles_maybe = depfiles
-CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-CXXLD = $(CXX)
-CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
- -o $@
-SOURCES = $(moses_SOURCES)
-DIST_SOURCES = $(am__moses_SOURCES_DIST)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMDEP_FALSE = @AMDEP_FALSE@
-AMDEP_TRUE = @AMDEP_TRUE@
-AMTAR = @AMTAR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BUILD_MYSQL_SUPPORT_FALSE = @BUILD_MYSQL_SUPPORT_FALSE@
-BUILD_MYSQL_SUPPORT_TRUE = @BUILD_MYSQL_SUPPORT_TRUE@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXDEPMODE = @CXXDEPMODE@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-INTERNAL_LM_FALSE = @INTERNAL_LM_FALSE@
-INTERNAL_LM_TRUE = @INTERNAL_LM_TRUE@
-IRST_LM_FALSE = @IRST_LM_FALSE@
-IRST_LM_TRUE = @IRST_LM_TRUE@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LTLIBOBJS = @LTLIBOBJS@
-MAKEINFO = @MAKEINFO@
-MYSQLCLIENT_CPPFLAGS = @MYSQLCLIENT_CPPFLAGS@
-MYSQLCLIENT_LDFLAGS = @MYSQLCLIENT_LDFLAGS@
-MYSQLCLIENT_LIBS = @MYSQLCLIENT_LIBS@
-OBJEXT = @OBJEXT@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-RANLIB = @RANLIB@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SRI_LM_FALSE = @SRI_LM_FALSE@
-SRI_LM_TRUE = @SRI_LM_TRUE@
-STRIP = @STRIP@
-VERSION = @VERSION@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_RANLIB = @ac_ct_RANLIB@
-ac_ct_STRIP = @ac_ct_STRIP@
-am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
-am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build_alias = @build_alias@
-datadir = @datadir@
-exec_prefix = @exec_prefix@
-host_alias = @host_alias@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-moses_SOURCES = Main.cpp IOCommandLine.cpp IOFile.cpp \
- TranslationAnalysis.cpp $(am__append_1)
-AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES
-moses_LDADD = -lmoses
-moses_DEPENDENCIES = $(top_srcdir)/../moses/src/libmoses.a
-all: all-am
-
-.SUFFIXES:
-.SUFFIXES: .cpp .o .obj
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
- && exit 0; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \
- cd $(top_srcdir) && \
- $(AUTOMAKE) --foreign src/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-install-binPROGRAMS: $(bin_PROGRAMS)
- @$(NORMAL_INSTALL)
- test -z "$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)"
- @list='$(bin_PROGRAMS)'; for p in $$list; do \
- p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
- if test -f $$p \
- ; then \
- f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
- echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
- $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
- else :; fi; \
- done
-
-uninstall-binPROGRAMS:
- @$(NORMAL_UNINSTALL)
- @list='$(bin_PROGRAMS)'; for p in $$list; do \
- f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
- echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
- rm -f "$(DESTDIR)$(bindir)/$$f"; \
- done
-
-clean-binPROGRAMS:
- -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
-moses$(EXEEXT): $(moses_OBJECTS) $(moses_DEPENDENCIES)
- @rm -f moses$(EXEEXT)
- $(CXXLINK) $(moses_LDFLAGS) $(moses_OBJECTS) $(moses_LDADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IOCommandLine.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IOFile.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IOMySQL.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Main.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ThreadMySQL.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/TranslationAnalysis.Po@am__quote@
-
-.cpp.o:
-@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
-@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
-
-.cpp.obj:
-@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \
-@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-uninstall-info-am:
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) ' { files[$$0] = 1; } \
- END { for (i in files) print i; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) ' { files[$$0] = 1; } \
- END { for (i in files) print i; }'`; \
- if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$tags $$unique; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) ' { files[$$0] = 1; } \
- END { for (i in files) print i; }'`; \
- test -z "$(CTAGS_ARGS)$$tags$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$tags $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && cd $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) $$here
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
- list='$(DISTFILES)'; for file in $$list; do \
- case $$file in \
- $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
- $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
- esac; \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test "$$dir" != "$$file" && test "$$dir" != "."; then \
- dir="/$$dir"; \
- $(mkdir_p) "$(distdir)$$dir"; \
- else \
- dir=''; \
- fi; \
- if test -d $$d/$$file; then \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
- fi; \
- cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
- else \
- test -f $(distdir)/$$file \
- || cp -p $$d/$$file $(distdir)/$$file \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(PROGRAMS)
-installdirs:
- for dir in "$(DESTDIR)$(bindir)"; do \
- test -z "$$dir" || $(mkdir_p) "$$dir"; \
- done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
-
-distclean: distclean-am
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-exec-am: install-binPROGRAMS
-
-install-info: install-info-am
-
-install-man:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-binPROGRAMS uninstall-info-am
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
- clean-generic ctags distclean distclean-compile \
- distclean-generic distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-binPROGRAMS \
- install-data install-data-am install-exec install-exec-am \
- install-info install-info-am install-man install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
- uninstall-am uninstall-binPROGRAMS uninstall-info-am
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/moses-cmd/src/TranslationAnalysis.cpp b/moses-cmd/src/TranslationAnalysis.cpp
deleted file mode 100644
index 0f623a8e1..000000000
--- a/moses-cmd/src/TranslationAnalysis.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// $Id$
-
-#include <iostream>
-#include <sstream>
-#include <algorithm>
-#include "StaticData.h"
-#include "Hypothesis.h"
-#include "TranslationAnalysis.h"
-
-namespace TranslationAnalysis {
-
-void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
-{
- os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
- std::vector<const Hypothesis*> translationPath;
- while (hypo) {
- translationPath.push_back(hypo);
- hypo = hypo->GetPrevHypo();
- }
- std::reverse(translationPath.begin(), translationPath.end());
-
- std::vector<std::string> droppedWords;
- std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
- ++tpi; // skip initial translation state
- std::vector<std::string> sourceMap;
- std::vector<std::string> targetMap;
- std::vector<unsigned int> lmAcc(0);
- size_t lmCalls = 0;
- bool doLMStats = ((*tpi)->_lmstats != 0);
- if (doLMStats)
- lmAcc.resize((*tpi)->_lmstats->size(), 0);
- for (; tpi != translationPath.end(); ++tpi) {
- std::ostringstream sms;
- std::ostringstream tms;
- std::string target = (*tpi)->GetTargetPhraseStringRep();
- std::string source = (*tpi)->GetSourcePhraseStringRep();
- WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
- WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
-
- // language model backoff stats,
- if (doLMStats) {
- std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->_lmstats;
- std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
- std::vector<unsigned int>::iterator acc = lmAcc.begin();
- // std::cerr << "\n";
- for (; i != lmstats.end(); ++i, ++acc) {
- std::vector<unsigned int>::iterator j = i->begin();
- lmCalls += i->size();
- // std::cerr << "lm: ";
- for (; j != i->end(); ++j) {
- // std::cerr << *j << " ";
- (*acc) += *j;
- }
- // std::cerr << " (total=" << *acc << ", lmcalls=" << lmCalls << ")" << std::endl;
- }
- }
-
- bool epsilon = false;
- if (target == "") {
- target="<EPSILON>";
- epsilon = true;
- droppedWords.push_back(source);
- }
- os << " SOURCE: " << swr << " " << source << std::endl
- << " TRANSLATED AS: " << target << std::endl;
- size_t twr_i = twr.GetStartPos();
- size_t swr_i = swr.GetStartPos();
- if (!epsilon) { sms << twr_i; }
- if (epsilon) { tms << "del(" << swr_i << ")"; } else { tms << swr_i; }
- swr_i++; twr_i++;
- for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
- sms << '-' << twr_i;
- }
- for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
- tms << '-' << swr_i;
- }
- if (!epsilon) targetMap.push_back(sms.str());
- sourceMap.push_back(tms.str());
- }
- std::vector<std::string>::iterator si = sourceMap.begin();
- std::vector<std::string>::iterator ti = targetMap.begin();
- os << std::endl << "SOURCE/TARGET SPANS:";
- os << std::endl << " SOURCE:";
- for (; si != sourceMap.end(); ++si) {
- os << " " << *si;
- }
- os << std::endl << " TARGET:";
- for (; ti != targetMap.end(); ++ti) {
- os << " " << *ti;
- }
- os << std::endl << std::endl;
- if (doLMStats && lmCalls > 0) {
- std::vector<unsigned int>::iterator acc = lmAcc.begin();
- const LMList& lmlist = StaticData::Instance()->GetAllLM();
- LMList::const_iterator i = lmlist.begin();
- for (; acc != lmAcc.end(); ++acc, ++i) {
- char buf[256];
- sprintf(buf, "%.4f", (double)(*acc)/(double)lmCalls);
- os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
- }
- }
-
- if (droppedWords.size() > 0) {
- std::vector<std::string>::iterator dwi = droppedWords.begin();
- os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
- for (; dwi != droppedWords.end(); ++dwi) {
- os << "\tdropped=" << *dwi << std::endl;
- }
- }
- os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED):" << std::endl;
- StaticData::Instance()->GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance()->GetAllWeights());
- os << std::endl;
-}
-
-}
diff --git a/moses-cmd/src/TranslationAnalysis.h b/moses-cmd/src/TranslationAnalysis.h
deleted file mode 100644
index d7ff181c1..000000000
--- a/moses-cmd/src/TranslationAnalysis.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// $Id$
-
-/*
- * also see moses/SentenceStats
- */
-
-#ifndef _TRANSLATION_ANALYSIS_H_
-#define _TRANSLATION_ANALYSIS_H_
-
-#include <iostream>
-
-class Hypothesis;
-
-namespace TranslationAnalysis
-{
-
-/***
- * print details about the translation represented in hypothesis to
- * os. Included information: phrase alignment, words dropped, scores
- */
-void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo);
-
-}
-
-#endif
diff --git a/moses/configure.in b/moses/configure.in
index c4faf83ec..f1af3eae7 100644
--- a/moses/configure.in
+++ b/moses/configure.in
@@ -69,7 +69,7 @@ then
SAVE_CPPFLAGS="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -I${with_irstlm}/include"
- AC_CHECK_HEADER(n_gram.h,
+ AC_CHECK_HEADER(ngram.h,
[AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])],
[AC_MSG_ERROR([Cannot find IRST-LM!])])
diff --git a/regression-testing/.project b/regression-testing/.project
deleted file mode 100644
index 2a8d86d2b..000000000
--- a/regression-testing/.project
+++ /dev/null
@@ -1,11 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>regression-testing</name>
- <comment></comment>
- <projects>
- </projects>
- <buildSpec>
- </buildSpec>
- <natures>
- </natures>
-</projectDescription>
diff --git a/regression-testing/MosesRegressionTesting.pm b/regression-testing/MosesRegressionTesting.pm
deleted file mode 100644
index c332a3dcc..000000000
--- a/regression-testing/MosesRegressionTesting.pm
+++ /dev/null
@@ -1,75 +0,0 @@
-package MosesRegressionTesting;
-
-use strict;
-
-# if your tests need a new version of the test data, increment this
-# and make sure that a moses-regression-tests-vX.Y is available for
-# download from statmt.org (redpony AT umd dot edu for more info)
-use constant TESTING_DATA_VERSION => '0.1';
-
-# find the data directory in a few likely locations and make sure
-# that it is the correct version
-sub find_data_directory
-{
- my ($test_script_root, $data_dir) = @_;
- my $data_version = TESTING_DATA_VERSION;
- my @ds = ();
- my $mrtp = "moses-reg-test-data-$data_version";
- push @ds, $data_dir if defined $data_dir;
- push @ds, "$test_script_root/$mrtp";
- push @ds, "/export/ws06osmt/regression-testing/$mrtp";
- push @ds, "/tmp/$mrtp";
- push @ds, "/var/tmp/$mrtp";
- foreach my $d (@ds) {
- next unless (-d $d);
- if (!-d "$d/models") {
- print STDERR "Found $d but it is malformed: missing subdir models/\n";
- next;
- }
- if (!-d "$d/lm") {
- print STDERR "Found $d but it is malformed: missing subdir lm/\n";
- next;
- }
- return $d;
- }
- print STDERR<<EOT;
-
-You do not appear to have the regression testing data installed. You may
-either specify a non-standard location when running the test suite with
-the --data-dir option, or, you may install it in any one of the following
-standard locations: $test_script_root, /tmp, or /var/tmp with these
-commands:
-
- cd <DESIRED_INSTALLATION_DIRECTORY>
- wget http://www.statmt.org/moses/reg-testing/moses-regression-tests-v$data_version.tar
- tar xf moses-regression-tests-v$data_version.tar
- rm moses-regression-tests-v$data_version.tar
-
-EOT
- exit 1;
-}
-
-
-sub get_localized_moses_ini
-{
- use File::Temp;
- my ($moses_ini, $data_dir) = @_;
- my $LM_PATH = "$data_dir/lm";
- my $MODEL_PATH = "$data_dir/models";
- my $local_moses_ini = new File::Temp( UNLINK => 0, SUFFIX => '.ini' );
-
- open MI, "<$moses_ini" or die "Couldn't read $moses_ini";
- open MO, ">$local_moses_ini" or die "Couldn't open $local_moses_ini for writing";
- while (my $l = <MI>) {
- $l =~ s/\$\{LM_PATH\}/$LM_PATH/g;
- $l =~ s/\$\{MODEL_PATH\}/$MODEL_PATH/g;
- print $local_moses_ini $l;
- }
- close MO;
- close MI;
-
- return $local_moses_ini->filename;
-}
-
-1;
-
diff --git a/regression-testing/compare-results.pl b/regression-testing/compare-results.pl
deleted file mode 100755
index a05bc7411..000000000
--- a/regression-testing/compare-results.pl
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-my ($results, $truth) = @ARGV;
-
-my ($report, $pass, $fail) = compare_results("$results/results.dat", "$truth/results.dat");
-open OUT, ">$results/Summary";
-print OUT $report;
-print $report;
-close OUT;
-
-if ($fail > 0) {
- print <<EOT;
-
-There were failures in this test run. Please analyze the results carefully.
-
-EOT
- exit 1;
-}
-exit 0;
-
-sub compare_results {
- my ($testf, $truthf) = @_;
- my $test = read_results($testf);
- my $truth = read_results($truthf);
- my $ct1 = delete $truth->{'COMPARISON_TYPE'};
- my $ct2 = delete $test->{'COMPARISON_TYPE'};
- my $pass = 0;
- my $fail = 0;
- my $report = '';
- foreach my $k (sort keys %$truth) {
- $report .= "test-name=$k\tresult=";
- if (!exists $test->{$k}) {
- $report .= "missing from test results\n";
- $fail++;
- next;
- }
- my $truthv = $truth->{$k} || '';
- my $testv = delete $test->{$k} || '';
- if ($ct1->{$k} eq '=') {
- if ($truthv eq $testv) {
- $report .= "pass\n";
- $pass++;
- } else {
- $report .= "fail\n\tTRUTH=$truthv\n\t TEST=$testv\n";
- $fail++;
- }
- } else { # numeric difference
- my $diff = $testv - $truthv;
- if ($diff == 0) { $report .= "identical\n"; next; }
- $report .= "BASELINE=$truthv, TEST=$testv\t DELTA=$diff";
- if ($truthv != 0) {
- my $pct = $diff/$truthv;
- my $t = sprintf "\t PCT CHANGE=%4.2f", $pct*100;
- $report .= $t;
- }
- $report .= "\n";
- }
- }
- foreach my $k (sort keys %$test) {
- $fail++;
- $report .= "test-name=$k\tfound in TEST but not in TRUTH.\n";
- }
- $report .= "\nTESTS PASSED=$pass\nTESTS FAILED=$fail\n";
- return $report, $pass, $fail;
-}
-
-sub read_results {
- my ($file) = @_;
- open IN, "<$file" or die "Could not open $file!";
- my %res;
- while (my $l = <IN>) {
- if ($l =~ /^([A-Za-z0-9_]+)\s*([=~])\s*(.+)$/) {
- my ($key, $comparison_type, $value) = ($1, $2, $3);
- $res{$key} = $value;
- $res{'COMPARISON_TYPE'}->{$key}=$comparison_type;
- }
- }
- close IN;
- return \%res;
-}
-
diff --git a/regression-testing/run-single-test.pl b/regression-testing/run-single-test.pl
deleted file mode 100755
index 0e9c84fe2..000000000
--- a/regression-testing/run-single-test.pl
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
-use MosesRegressionTesting;
-use Getopt::Long;
-use File::Temp qw ( tempfile );
-use POSIX qw ( strftime );
-my @SIGS = qw ( SIGHUP SIGINT SIGQUIT SIGILL SIGTRAP SIGABRT SIGIOT SIGBUS SIGFPE SIGKILL SIGUSR1 SIGSEGV SIGUSR2 SIGPIPE SIGALRM SIGTERM SIGSTKFLT SIGCHLD SIGCONT SIGSTOP SIGTSTP SIGTTIN SIGTTOU SIGURG SIGXCPU SIGXFSZ SIGVTALRM SIGPROF SIGWINCH SIGIO SIGPWR SIGSYS SIGUNUSED SIGRTMIN );
-my ($decoder, $test_name);
-
-my $test_dir = "$script_dir/tests";
-my $data_dir;
-my $BIN_TEST = $script_dir;
-my $results_dir;
-
-GetOptions("decoder=s" => \$decoder,
- "test=s" => \$test_name,
- "data-dir=s"=> \$data_dir,
- "results-dir=s"=> \$results_dir,
- );
-
-die "Please specify a decoder with --decoder\n" unless $decoder;
-die "Please specify a test to run with --test\n" unless $test_name;
-
-die "Please specify the location of the data directory with --data-dir\n" unless $data_dir;
-
-die "Cannot locate test dir at $test_dir" unless (-d $test_dir);
-
-$test_dir .= "/$test_name";
-die "Cannot locate test dir at $test_dir" unless (-d $test_dir);
-
-#### get place to put results
-unless (defined $results_dir) { $results_dir = "$data_dir/results"; }
-if (!-d $results_dir) {
- print STDERR "[WARNING] Results directory not found.\n";
- mkdir ($results_dir) || die "Failed to create $results_dir";
-}
-$results_dir .= "/$test_name";
-if (!-d $results_dir) {
- print STDERR "[WARNING] Results directory for test=$test_name could not be found.\n";
- mkdir ($results_dir) || die "Failed to create $results_dir";
-}
-##########
-
-my $conf = "$test_dir/moses.ini";
-my $input = "$test_dir/to-translate";
-
-die "Cannot locate executable called $decoder\n" unless (-x $decoder);
-die "Cannot find $conf\n" unless (-f $conf);
-die "Cannot locate input at $input" unless (-f $input);
-
-my $local_moses_ini = MosesRegressionTesting::get_localized_moses_ini($conf, $data_dir);
-
-my $ts = get_timestamp($decoder);
-my $results = "$results_dir/$ts";
-mkdir($results) || die "Failed to create results directory: $results\n";
-
-my $truth = "$test_dir/truth";
-if (!-d $truth) {
- die "Could not find truth/ in $test_dir!\n";
-}
-
-print "RESULTS AVAILABLE IN: $results\n\n";
-
-my ($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results);
-my $error = ($sig || $ec > 0);
-if ($error) {
- open OUT, ">$results/Summary";
- print STDERR "MOSES CRASHED.\n\texit_code=$ec\n\tsignal=$sig\n";
- print OUT "MOSES CRASHED.\n\texit_code=$ec\n\tsignal=$sig\n";
- print STDERR "FAILURE, for debugging, local moses.ini=$local_moses_ini\n";
- print OUT "FAILURE, for debugging, local moses.ini=$local_moses_ini\n";
- close OUT;
- exit 2 if $sig;
- exit 3;
-}
-
-($o, $ec, $sig) = run_command("$test_dir/filter-stdout $results/run.stdout > $results/results.dat");
-warn "filter-stdout failed!" if ($ec > 0 || $sig);
-($o, $ec, $sig) = run_command("$test_dir/filter-stderr $results/run.stderr >> $results/results.dat");
-warn "filter-stderr failed!" if ($ec > 0 || $sig);
-
-open OUT, ">>$results/results.dat";
-print OUT "TOTAL_WALLTIME ~ $elapsed\n";
-close OUT;
-
-run_command("gzip $results/run.stdout");
-run_command("gzip $results/run.stderr");
-
-($o, $ec, $sig) = run_command("$BIN_TEST/compare-results.pl $results $truth");
-print $o;
-if ($ec) {
- print STDERR "FAILURE, for debugging, local moses.ini=$local_moses_ini\n";
- exit 1;
-}
-
-unlink $local_moses_ini or warn "Couldn't remove $local_moses_ini\n";
-exit 0;
-
-sub exec_moses {
- my ($decoder, $conf, $input, $results) = @_;
- my $start_time = time;
- my ($o, $ec, $sig) = run_command("$decoder -f $conf -i $input 1> $results/run.stdout 2> $results/run.stderr");
- my $elapsed = time - $start_time;
- return ($o, $elapsed, $ec, $sig);
-}
-
-sub run_command {
- my ($cmd) = @_;
- my $o = `$cmd`;
- my $exit_code = $? >> 8;
-
- my $signal = $? & 127;
- my $core_dumped = $? & 128;
- if ($signal) { $signal = sig_name($signal); }
- return $o, $exit_code, $signal;
-}
-
-sub sig_name {
- my $sig = shift;
- return $SIGS[$sig];
-}
-
-sub get_timestamp {
- my ($file) = @_;
- my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,$blksize,$blocks)
- = stat($file);
- my $timestamp = strftime("%Y%m%d-%H%M%S", gmtime $mtime);
- my $timestamp2 = strftime("%Y%m%d-%H%M%S", gmtime);
- my $username = `whoami`; chomp $username;
- return "moses.v$timestamp-$username-at-$timestamp2";
-}
-
diff --git a/regression-testing/run-test-suite b/regression-testing/run-test-suite
deleted file mode 100755
index 9b388b619..000000000
--- a/regression-testing/run-test-suite
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
-use Getopt::Long;
-
-############################################################
-my @tests = qw (
- basic-surface-only
- ptable-filtering
- multi-factor
- multi-factor-drop
- confusionNet-surface-only
- basic-surface-binptable
- multi-factor-binptable
- );
-############################################################
-
-use MosesRegressionTesting;
-use File::Temp qw ( tempfile );
-use POSIX qw ( strftime );
-
-my $decoder;
-my $test_dir;
-my $BIN_TEST = $script_dir;
-my $data_dir;
-
-GetOptions("decoder=s" => \$decoder,
- "data-dir=s" => \$data_dir,
- );
-
-$data_dir = MosesRegressionTesting::find_data_directory($BIN_TEST, $data_dir);
-
-my $test_run = "$BIN_TEST/run-single-test.pl --data-dir=$data_dir";
-$test_dir = $script_dir . "/tests";
-$test_run .= " --test-dir=$test_dir" if $test_dir;
-
-print "Data directory: $data_dir\n";
-
-die "Please specify a decoder to test with --decoder\n" unless $decoder;
-
-die "Cannot locate executable called $decoder\n" unless (-x $decoder);
-
-$test_run .= " --decoder=$decoder";
-
-print "Running tests: @tests\n\n";
-
-print "TEST NAME STATUS PATH TO RESULTS\n";
-my $lb = "---------------------------------------------------------------------------------------------------------\n";
-print $lb;
-
-my $fail = 0;
-my @failed;
-foreach my $test (@tests) {
- my $cmd = "$test_run --test=$test";
- my ($res, $output, $results_path) = do_test($cmd);
- format STDOUT =
-@<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-$test, $res, $results_path
-.
- write;
- if ($res eq 'FAIL') {
- print "$lb$output$lb";
- $fail++;
- push @failed, $test;
- } else {
-# TOTAL_WALLTIME result=BASELINE=11, TEST=12 DELTA=1 PCT CHANGE=9.09
- if ($output =~ /TOTAL_WALLTIME\s+result\s*=\s*([^\n]+)/o) {
- print "\t\tTiming statistics: $1\n";
- }
- }
-}
-
-my $total = scalar @tests;
-my $fail_percentage = int(100 * $fail / $total);
-my $pass_percentage = int(100 * ($total-$fail) / $total);
-print "\n$pass_percentage% of the tests passed.\n";
-print "$fail_percentage% of the tests failed.\n";
-if ($fail_percentage>0) { print "\nPLEASE INVESTIAGE THESE FAILED TESTS: @failed\n"; }
-
-sub do_test {
- my ($test) = @_;
- my $o = `$test 2>&1`;
- my $res = 'PASS';
- $res = 'FAIL' if ($? > 0);
- my $od = '';
- if ($o =~ /RESULTS AVAILABLE IN: (.*)$/m) {
- $od = $1;
- $o =~ s/^RESULTS AVAIL.*$//mo;
- }
- return ($res, $o, $od);
-}
-
diff --git a/regression-testing/tests/basic-surface-binptable/filter-stderr b/regression-testing/tests/basic-surface-binptable/filter-stderr
deleted file mode 100755
index 58b4ce9ed..000000000
--- a/regression-testing/tests/basic-surface-binptable/filter-stderr
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/perl
-
-BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; }
-use RegTestUtils;
-
-$x=0;
-while (<>) {
- chomp;
-
- if (/^Finished loading LanguageModels/) {
- my $time = RegTestUtils::readTime($_);
- print "LMLOAD_TIME ~ $time\n";
- }
- if (/^Finished loading phrase tables/) {
- my $time = RegTestUtils::readTime($_);
- print "PTLOAD_TIME ~ $time\n";
- }
- next unless /^BEST HYPO:/;
- my $pscore = RegTestUtils::readHypoScore($_);
- $x++;
- print "SCORE_$x = $pscore\n";
-}
diff --git a/regression-testing/tests/basic-surface-binptable/filter-stdout b/regression-testing/tests/basic-surface-binptable/filter-stdout
deleted file mode 100755
index 476ddf6e9..000000000
--- a/regression-testing/tests/basic-surface-binptable/filter-stdout
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/perl
-$x=0;
-while (<>) {
- chomp;
- $x++;
- print "TRANSLATION_$x=$_\n";
-}
diff --git a/regression-testing/tests/basic-surface-binptable/moses.ini b/regression-testing/tests/basic-surface-binptable/moses.ini
deleted file mode 100644
index d51f2bd3d..000000000
--- a/regression-testing/tests/basic-surface-binptable/moses.ini
+++ /dev/null
@@ -1,52 +0,0 @@
-# Moses configuration file
-# automatic exodus from pharaoh.ini Wed Jul 12 18:24:14 EDT 2006
-
-###########################
-### PHARAOH CONFIG FILE ###
-###########################
-
-# phrase table f, n, p(n|f)
-[ttable-file]
-0 0 5 ${MODEL_PATH}/basic-surface-binptable/phrase-table.gz
-
-# language model
-[lmodel-file]
-0 0 3 ${LM_PATH}/europarl.en.srilm.gz
-
-# limit on how many phrase translations e for each phrase f are loaded
-[ttable-limit]
-#ttable element load limit 0 = all elements loaded
-20
-
-# distortion (reordering) weight
-[weight-d]
-0.141806519223522
-
-# language model weight
-[weight-l]
-0.142658800199951
-
-# translation model weight (phrase translation, lexical weighting)
-[weight-t]
-0.00402447059454402
-0.0685647475075862
-0.294089113124688
-0.0328320356515851
--0.0426081987467227
-
-# word penalty
-[weight-w]
--0.273416114951401
-
-[distortion-limit]
-4
-
-[beam-threshold]
-0.03
-
-[input-factors]
-0
-
-[mapping]
-T 0
-
diff --git a/regression-testing/tests/basic-surface-binptable/to-translate b/regression-testing/tests/basic-surface-binptable/to-translate
deleted file mode 100644
index a505702d1..000000000
--- a/regression-testing/tests/basic-surface-binptable/to-translate
+++ /dev/null
@@ -1,5 +0,0 @@
-ich frage sie also , herr präsident : stellen die unterschiedlichen arbeitskosten somit nicht auch eine beschränkung des freien wettbewerbs in der europäischen union dar ?
-schaut man sich die fälligkeitspläne der ausführung des haushalts für die rubriken 2 , 3 , 4 und 7 an , stellt man fest , dass nur durchschnittlich 8 % aller verpflichtungen durch zahlungen gedeckt sind .
-vor drei jahren haben wir mit unserer beschäftigungsinitiative begonnen , indem wir kleinen und mittleren unternehmen halfen , chancenkapital zu bekommen .
-das parlament will das auf zweierlei weise tun .
-nur dann werden die europäischen institutionen auch ihrem auftrag gerecht .
diff --git a/regression-testing/tests/basic-surface-binptable/truth/results.dat b/regression-testing/tests/basic-surface-binptable/truth/results.dat
deleted file mode 100644
index 30b87afaa..000000000
--- a/regression-testing/tests/basic-surface-binptable/truth/results.dat
+++ /dev/null
@@ -1,13 +0,0 @@
-TRANSLATION_1=i ask you , therefore , mr president , the different labour costs are therefore not a restriction of free competition in the european union ?
-TRANSLATION_2=if we look at the fälligkeitspläne the implementation of the budget for the categories 2 , 3 , 4 and 7 to , we see that only an average of 8 % of commitments by payments are met .
-TRANSLATION_3=three years ago our employment strategy , we started by small and medium-sized enterprises halfen , chancenkapital to obtain .
-TRANSLATION_4=parliament wants the in two ways .
-TRANSLATION_5=only then will the european institutions to its mandate .
-LMLOAD_TIME ~ 10.00
-PTLOAD_TIME ~ 10.00
-SCORE_1 = -14.84
-SCORE_2 = -33.76
-SCORE_3 = -40.24
-SCORE_4 = -6.00
-SCORE_5 = -7.02
-TOTAL_WALLTIME ~ 23
diff --git a/regression-testing/tests/basic-surface-only/filter-stderr b/regression-testing/tests/basic-surface-only/filter-stderr
deleted file mode 100755
index 58b4ce9ed..000000000
--- a/regression-testing/tests/basic-surface-only/filter-stderr
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/perl
-
-BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; }
-use RegTestUtils;
-
-$x=0;
-while (<>) {
- chomp;
-
- if (/^Finished loading LanguageModels/) {
- my $time = RegTestUtils::readTime($_);
- print "LMLOAD_TIME ~ $time\n";
- }
- if (/^Finished loading phrase tables/) {
- my $time = RegTestUtils::readTime($_);
- print "PTLOAD_TIME ~ $time\n";
- }
- next unless /^BEST HYPO:/;
- my $pscore = RegTestUtils::readHypoScore($_);
- $x++;
- print "SCORE_$x = $pscore\n";
-}
diff --git a/regression-testing/tests/basic-surface-only/filter-stdout b/regression-testing/tests/basic-surface-only/filter-stdout
deleted file mode 100755
index 476ddf6e9..000000000
--- a/regression-testing/tests/basic-surface-only/filter-stdout
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/perl
-$x=0;
-while (<>) {
- chomp;
- $x++;
- print "TRANSLATION_$x=$_\n";
-}
diff --git a/regression-testing/tests/basic-surface-only/moses.ini b/regression-testing/tests/basic-surface-only/moses.ini
deleted file mode 100644
index 384954ece..000000000
--- a/regression-testing/tests/basic-surface-only/moses.ini
+++ /dev/null
@@ -1,45 +0,0 @@
-# moses.ini for regression test
-
-[ttable-file]
-0 0 5 ${MODEL_PATH}/basic-surface-only/phrase-table.gz
-
-# language model
-[lmodel-file]
-0 0 3 ${LM_PATH}/europarl.en.srilm.gz
-# limit on how many phrase translations e for each phrase f are loaded
-[ttable-limit]
-#ttable element load limit 0 = all elements loaded
-20
-
-# distortion (reordering) weight
-[weight-d]
-0.141806519223522
-
-# language model weight
-[weight-l]
-0.142658800199951
-
-# translation model weight (phrase translation, lexical weighting)
-[weight-t]
-0.00402447059454402
-0.0685647475075862
-0.294089113124688
-0.0328320356515851
--0.0426081987467227
-
-# word penalty
-[weight-w]
--0.273416114951401
-
-[distortion-limit]
-4
-
-[beam-threshold]
-0.03
-
-[input-factors]
-0
-
-[mapping]
-T 0
-
diff --git a/regression-testing/tests/basic-surface-only/to-translate b/regression-testing/tests/basic-surface-only/to-translate
deleted file mode 100644
index a505702d1..000000000
--- a/regression-testing/tests/basic-surface-only/to-translate
+++ /dev/null
@@ -1,5 +0,0 @@
-ich frage sie also , herr präsident : stellen die unterschiedlichen arbeitskosten somit nicht auch eine beschränkung des freien wettbewerbs in der europäischen union dar ?
-schaut man sich die fälligkeitspläne der ausführung des haushalts für die rubriken 2 , 3 , 4 und 7 an , stellt man fest , dass nur durchschnittlich 8 % aller verpflichtungen durch zahlungen gedeckt sind .
-vor drei jahren haben wir mit unserer beschäftigungsinitiative begonnen , indem wir kleinen und mittleren unternehmen halfen , chancenkapital zu bekommen .
-das parlament will das auf zweierlei weise tun .
-nur dann werden die europäischen institutionen auch ihrem auftrag gerecht .
diff --git a/regression-testing/tests/basic-surface-only/truth/results.dat b/regression-testing/tests/basic-surface-only/truth/results.dat
deleted file mode 100644
index e41fb463f..000000000
--- a/regression-testing/tests/basic-surface-only/truth/results.dat
+++ /dev/null
@@ -1,13 +0,0 @@
-TRANSLATION_1=i ask you , therefore , mr president , the different labour costs are therefore not a restriction of free competition in the european union ?
-TRANSLATION_2=if we look at the fälligkeitspläne the implementation of the budget for the categories 2 , 3 , 4 and 7 to , we see that only an average of 8 % of commitments by payments are met .
-TRANSLATION_3=three years ago our employment strategy , we started by small and medium-sized enterprises halfen , chancenkapital to obtain .
-TRANSLATION_4=parliament wants the in two ways .
-TRANSLATION_5=only then will the european institutions to its mandate .
-LMLOAD_TIME ~ 8.00
-PTLOAD_TIME ~ 9.00
-SCORE_1 = -14.84
-SCORE_2 = -33.76
-SCORE_3 = -40.24
-SCORE_4 = -6.00
-SCORE_5 = -7.02
-TOTAL_WALLTIME ~ 28
diff --git a/regression-testing/tests/confusionNet-surface-only/filter-stderr b/regression-testing/tests/confusionNet-surface-only/filter-stderr
deleted file mode 100755
index 58b4ce9ed..000000000
--- a/regression-testing/tests/confusionNet-surface-only/filter-stderr
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/perl
-
-BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; }
-use RegTestUtils;
-
-$x=0;
-while (<>) {
- chomp;
-
- if (/^Finished loading LanguageModels/) {
- my $time = RegTestUtils::readTime($_);
- print "LMLOAD_TIME ~ $time\n";
- }
- if (/^Finished loading phrase tables/) {
- my $time = RegTestUtils::readTime($_);
- print "PTLOAD_TIME ~ $time\n";
- }
- next unless /^BEST HYPO:/;
- my $pscore = RegTestUtils::readHypoScore($_);
- $x++;
- print "SCORE_$x = $pscore\n";
-}
diff --git a/regression-testing/tests/confusionNet-surface-only/filter-stdout b/regression-testing/tests/confusionNet-surface-only/filter-stdout
deleted file mode 100755
index 476ddf6e9..000000000
--- a/regression-testing/tests/confusionNet-surface-only/filter-stdout
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/perl
-$x=0;
-while (<>) {
- chomp;
- $x++;
- print "TRANSLATION_$x=$_\n";
-}
diff --git a/regression-testing/tests/confusionNet-surface-only/moses.ini b/regression-testing/tests/confusionNet-surface-only/moses.ini
deleted file mode 100644
index e9db4481d..000000000
--- a/regression-testing/tests/confusionNet-surface-only/moses.ini
+++ /dev/null
@@ -1,56 +0,0 @@
-# Moses configuration file
-# automatic exodus from pharaoh.ini Wed Jul 12 18:24:14 EDT 2006
-
-###########################
-### PHARAOH CONFIG FILE ###
-###########################
-
-# phrase table f, n, p(n|f)
-[ttable-file]
-0 0 5 ${MODEL_PATH}/confusionNet-surface-only/phrase-table.0-0
-
-# language model
-[lmodel-file]
-0 0 3 ${LM_PATH}/europarl.en.srilm.gz
-# limit on how many phrase translations e for each phrase f are loaded
-[ttable-limit]
-#ttable element load limit 0 = all elements loaded
-20
-
-# distortion (reordering) weight
-[weight-d]
-0.141806519223522
-
-# language model weight
-[weight-l]
-0.142658800199951
-
-# translation model weight (phrase translation, lexical weighting)
-[weight-t]
-0.00402447059454402
-0.0685647475075862
-0.294089113124688
-0.0328320356515851
--0.0426081987467227
-
-# word penalty
-[weight-w]
--0.273416114951401
-
-[distortion-limit]
-4
-
-[beam-threshold]
-0.03
-
-[input-factors]
-0
-
-[mapping]
-T 0
-
-[inputtype]
-1
-
-[weight-i]
-1.0
diff --git a/regression-testing/tests/confusionNet-surface-only/to-translate b/regression-testing/tests/confusionNet-surface-only/to-translate
deleted file mode 100644
index d08755644..000000000
--- a/regression-testing/tests/confusionNet-surface-only/to-translate
+++ /dev/null
@@ -1,15 +0,0 @@
-damit|PROADV 1.0
-ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0
-der|ART 1.0 die|ART 1.0 das|ART 1.0
-arbeitsplan|NN 1.0
-
-damit|PROADV 1.0 dies|PROADV 1.0
-ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 ist|VVFIN 1.0 war|VVFIN 1.0 sei|VVFIN 1.0
-der|ART 1.0 die|ART 1.0 das|ART 1.0 der|DT 1.0 die|DT 1.0 das|DT 1.0
-arbeitsplan|NN 1.0
-
-damit|PROADV 1.0 dies|PROADV 0.0
-ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 dies|PROADV 0.0 das|DT 0.0
-der|ART 1.0 die|ART 1.0 das|ART 1.0 dies|PROADV 0.0
-arbeitsplan|NN 1.0 dies|PROADV 0.0
-
diff --git a/regression-testing/tests/confusionNet-surface-only/to-translate.new b/regression-testing/tests/confusionNet-surface-only/to-translate.new
deleted file mode 100644
index 008278bbb..000000000
--- a/regression-testing/tests/confusionNet-surface-only/to-translate.new
+++ /dev/null
@@ -1,10 +0,0 @@
-damit|PROADV 1.0
-ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0
-der|ART 1.0 die|ART 1.0 das|ART 1.0
-arbeitsplan|NN 1.0
-
-damit|PROADV 1.0 dies|PROADV 1.0
-ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 ist|VVFIN 1.0 war|VVFIN 1.0 sei|VVFIN 1.0
-der|ART 1.0 die|ART 1.0 das|ART 1.0 der|DT 1.0 die|DT 1.0 das|DT 1.0
-arbeitsplan|NN 1.0
-
diff --git a/regression-testing/tests/confusionNet-surface-only/to-translate.orig b/regression-testing/tests/confusionNet-surface-only/to-translate.orig
deleted file mode 100644
index f3fda26da..000000000
--- a/regression-testing/tests/confusionNet-surface-only/to-translate.orig
+++ /dev/null
@@ -1,10 +0,0 @@
-damit|PROADV 0.0
-ist|VSFIN 0.0 war|VSFIN 0.0 sei|VSFIN 0.0
-der|ART 0.0 die|ART 0.0 das|ART 0.0
-arbeitsplan|NN 0.0
-
-damit|PROADV 0.0 dies|PROADV 0.0
-ist|VSFIN 0.0 war|VSFIN 0.0 sei|VSFIN 0.0 ist|VVFIN 0.0 war|VVFIN 0.0 sei|VVFIN 0.0
-der|ART 0.0 die|ART 0.0 das|ART 0.0 der|DT 0.0 die|DT 0.0 das|DT 0.0
-arbeitsplan|NN 0.0
-
diff --git a/regression-testing/tests/confusionNet-surface-only/truth/results.dat b/regression-testing/tests/confusionNet-surface-only/truth/results.dat
deleted file mode 100644
index fd8aac0c6..000000000
--- a/regression-testing/tests/confusionNet-surface-only/truth/results.dat
+++ /dev/null
@@ -1,9 +0,0 @@
-TRANSLATION_1=that is the order of business
-TRANSLATION_2=this is the order of business
-TRANSLATION_3=that is the order of business
-LMLOAD_TIME ~ 10.00
-PTLOAD_TIME ~ 10.00
-SCORE_1 = -2.97
-SCORE_2 = -2.56
-SCORE_3 = -2.97
-TOTAL_WALLTIME ~ 11
diff --git a/regression-testing/tests/multi-factor-binptable/filter-stderr b/regression-testing/tests/multi-factor-binptable/filter-stderr
deleted file mode 100755
index 58b4ce9ed..000000000
--- a/regression-testing/tests/multi-factor-binptable/filter-stderr
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/perl
-
-BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; }
-use RegTestUtils;
-
-$x=0;
-while (<>) {
- chomp;
-
- if (/^Finished loading LanguageModels/) {
- my $time = RegTestUtils::readTime($_);
- print "LMLOAD_TIME ~ $time\n";
- }
- if (/^Finished loading phrase tables/) {
- my $time = RegTestUtils::readTime($_);
- print "PTLOAD_TIME ~ $time\n";
- }
- next unless /^BEST HYPO:/;
- my $pscore = RegTestUtils::readHypoScore($_);
- $x++;
- print "SCORE_$x = $pscore\n";
-}
diff --git a/regression-testing/tests/multi-factor-binptable/filter-stdout b/regression-testing/tests/multi-factor-binptable/filter-stdout
deleted file mode 100755
index 476ddf6e9..000000000
--- a/regression-testing/tests/multi-factor-binptable/filter-stdout
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/perl
-$x=0;
-while (<>) {
- chomp;
- $x++;
- print "TRANSLATION_$x=$_\n";
-}
diff --git a/regression-testing/tests/multi-factor-binptable/moses.ini b/regression-testing/tests/multi-factor-binptable/moses.ini
deleted file mode 100644
index db43d33b9..000000000
--- a/regression-testing/tests/multi-factor-binptable/moses.ini
+++ /dev/null
@@ -1,63 +0,0 @@
-#########################
-### MOSES CONFIG FILE ###
-#########################
-
-# input factors
-[input-factors]
-0
-1
-
-# mapping steps
-[mapping]
-T 0
-G 0
-T 1
-
-# translation tables: source-factors, target-factors, number of scores, file
-[ttable-file]
-0 0 5 ${MODEL_PATH}/multi-factor-binptable/phrase-table.0-0.gz
-1 1 5 ${MODEL_PATH}/multi-factor-binptable/phrase-table.1-1.gz
-
-# generation models: source-factors, target-factors
-[generation-file]
-0 1 2 ${MODEL_PATH}/multi-factor-binptable/generation.0-1.gz
-
-# language models: 0, factors, type, file
-[lmodel-file]
-0 0 3 ${LM_PATH}/europarl.en.srilm.gz
-
-# limit on how many phrase translations e for each phrase f are loaded
-# 0 = all elements loaded
-[ttable-limit]
-20
-0
-
-# distortion (reordering) weight
-[weight-d]
-0.6
-
-# language model weights
-[weight-l]
-0.5
-
-# translation model weights
-[weight-t]
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-
-# generation model weights
-[weight-generation]
-0.3
-0
-
-# word penalty
-[weight-w]
--1
diff --git a/regression-testing/tests/multi-factor-binptable/to-translate b/regression-testing/tests/multi-factor-binptable/to-translate
deleted file mode 100644
index b3283a754..000000000
--- a/regression-testing/tests/multi-factor-binptable/to-translate
+++ /dev/null
@@ -1 +0,0 @@
-es|PPER gibt|VVFIN verschiedene|ADJ andere|ADJ meinungen|NN .|PE
diff --git a/regression-testing/tests/multi-factor-binptable/truth/results.dat b/regression-testing/tests/multi-factor-binptable/truth/results.dat
deleted file mode 100644
index d7637123d..000000000
--- a/regression-testing/tests/multi-factor-binptable/truth/results.dat
+++ /dev/null
@@ -1,5 +0,0 @@
-TRANSLATION_1=there are various different opinions .
-LMLOAD_TIME ~ 10.00
-PTLOAD_TIME ~ 10.00
-SCORE_1 = -13.73
-TOTAL_WALLTIME ~ 11
diff --git a/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl b/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl
deleted file mode 100755
index e764eec84..000000000
--- a/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/perl
-
-#add_empties_to_phrase_table: go through an old-style pharaoh phrase table (no empty target sources) and add one such line for each single-word source phrase in the table,
-#complete with factors (note the number and type of factors are hardcoded here);
-#also add deletion-cost factors as necessary to all lines
-#Evan Herbst 7 / 11 / 06
-
-#usage: aetpt INPUT_PTABLE OUTPUT_PTABLE
-
-my ($inputFile, $outputFile) = @ARGV;
-my $DELETIONCOST = 2.718; #weight for an individual deletion
-
-open(INFILE, "<$inputFile") or die "couldn't open '$inputFile' for read\n";
-open(OUTFILE, ">$outputFile") or die "couldn't open '$outputFile' for write\n";
-my ($lastSrcPhrase, $srcPhrase);
-while(my $line = <INFILE>)
-{
- chop($line);
- $lastSrcPhrase = $srcPhrase;
- my @tokens = split(/\|\|\|/, $line);
- $srcPhrase = $tokens[0];
- if($srcPhrase ne $lastSrcPhrase && $srcPhrase =~ /^\s*\S+\s*$/) #new source phrase of a single word; add deletion line
- {
- print OUTFILE "$srcPhrase ||| ||| 1 1 1 1 2.718 $DELETIONCOST\n";
- }
- print OUTFILE "$line 1\n";
-}
-close(INFILE);
-close(OUTFILE);
diff --git a/regression-testing/tests/multi-factor-drop/filter-stderr b/regression-testing/tests/multi-factor-drop/filter-stderr
deleted file mode 100755
index 58b4ce9ed..000000000
--- a/regression-testing/tests/multi-factor-drop/filter-stderr
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/perl
-
-BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; }
-use RegTestUtils;
-
-$x=0;
-while (<>) {
- chomp;
-
- if (/^Finished loading LanguageModels/) {
- my $time = RegTestUtils::readTime($_);
- print "LMLOAD_TIME ~ $time\n";
- }
- if (/^Finished loading phrase tables/) {
- my $time = RegTestUtils::readTime($_);
- print "PTLOAD_TIME ~ $time\n";
- }
- next unless /^BEST HYPO:/;
- my $pscore = RegTestUtils::readHypoScore($_);
- $x++;
- print "SCORE_$x = $pscore\n";
-}
diff --git a/regression-testing/tests/multi-factor-drop/filter-stdout b/regression-testing/tests/multi-factor-drop/filter-stdout
deleted file mode 100755
index 476ddf6e9..000000000
--- a/regression-testing/tests/multi-factor-drop/filter-stdout
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/perl
-$x=0;
-while (<>) {
- chomp;
- $x++;
- print "TRANSLATION_$x=$_\n";
-}
diff --git a/regression-testing/tests/multi-factor-drop/moses.ini b/regression-testing/tests/multi-factor-drop/moses.ini
deleted file mode 100644
index 526d59ee0..000000000
--- a/regression-testing/tests/multi-factor-drop/moses.ini
+++ /dev/null
@@ -1,71 +0,0 @@
-#########################
-### MOSES CONFIG FILE ###
-#########################
-
-# input factors
-[input-factors]
-0
-1
-
-# mapping steps
-[mapping]
-T 0
-G 0
-T 1
-
-# translation tables: source-factors, target-factors, number of scores, file
-[ttable-file]
-0 0 6 ${MODEL_PATH}/multi-factor-drop/phrase-table.0-0.gz
-1 1 5 ${MODEL_PATH}/multi-factor-drop/phrase-table.1-1.gz
-
-# generation models: source-factors, target-factors
-[generation-file]
-0 1 ${MODEL_PATH}/multi-factor-drop/generation.0-1.gz
-
-# language models: 0, factors, type, file
-[lmodel-file]
-0 0 3 ${LM_PATH}/europarl.en.srilm.gz
-
-# limit on how many phrase translations e for each phrase f are loaded
-# 0 = all elements loaded
-[ttable-limit]
-20
-0
-
-# distortion (reordering) weight
-[weight-d]
-0.6
-
-# language model weights
-[weight-l]
-0.5
-
-# translation model weights
-[weight-t]
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-
-# generation model weights
-[weight-generation]
-0.3
-0
-
-# word penalty
-[weight-w]
--1
-
-[phrase-drop-allowed]
-yes
-
-##word-drop penalty
-#[weight-e]
-#2.718
diff --git a/regression-testing/tests/multi-factor-drop/to-translate b/regression-testing/tests/multi-factor-drop/to-translate
deleted file mode 100644
index b3283a754..000000000
--- a/regression-testing/tests/multi-factor-drop/to-translate
+++ /dev/null
@@ -1 +0,0 @@
-es|PPER gibt|VVFIN verschiedene|ADJ andere|ADJ meinungen|NN .|PE
diff --git a/regression-testing/tests/multi-factor-drop/truth/results.dat b/regression-testing/tests/multi-factor-drop/truth/results.dat
deleted file mode 100644
index 00928862a..000000000
--- a/regression-testing/tests/multi-factor-drop/truth/results.dat
+++ /dev/null
@@ -1,5 +0,0 @@
-TRANSLATION_1=there are different opinions
-LMLOAD_TIME ~ 10.00
-PTLOAD_TIME ~ 11.00
-SCORE_1 = -12.67
-TOTAL_WALLTIME ~ 11
diff --git a/regression-testing/tests/multi-factor/filter-stderr b/regression-testing/tests/multi-factor/filter-stderr
deleted file mode 100755
index 58b4ce9ed..000000000
--- a/regression-testing/tests/multi-factor/filter-stderr
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/perl
-
-BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; }
-use RegTestUtils;
-
-$x=0;
-while (<>) {
- chomp;
-
- if (/^Finished loading LanguageModels/) {
- my $time = RegTestUtils::readTime($_);
- print "LMLOAD_TIME ~ $time\n";
- }
- if (/^Finished loading phrase tables/) {
- my $time = RegTestUtils::readTime($_);
- print "PTLOAD_TIME ~ $time\n";
- }
- next unless /^BEST HYPO:/;
- my $pscore = RegTestUtils::readHypoScore($_);
- $x++;
- print "SCORE_$x = $pscore\n";
-}
diff --git a/regression-testing/tests/multi-factor/filter-stdout b/regression-testing/tests/multi-factor/filter-stdout
deleted file mode 100755
index 476ddf6e9..000000000
--- a/regression-testing/tests/multi-factor/filter-stdout
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/perl
-$x=0;
-while (<>) {
- chomp;
- $x++;
- print "TRANSLATION_$x=$_\n";
-}
diff --git a/regression-testing/tests/multi-factor/moses.ini b/regression-testing/tests/multi-factor/moses.ini
deleted file mode 100644
index e8611a2cf..000000000
--- a/regression-testing/tests/multi-factor/moses.ini
+++ /dev/null
@@ -1,63 +0,0 @@
-#########################
-### MOSES CONFIG FILE ###
-#########################
-
-# input factors
-[input-factors]
-0
-1
-
-# mapping steps
-[mapping]
-T 0
-G 0
-T 1
-
-# translation tables: source-factors, target-factors, number of scores, file
-[ttable-file]
-0 0 5 ${MODEL_PATH}/multi-factor/phrase-table.0-0.gz
-1 1 5 ${MODEL_PATH}/multi-factor/phrase-table.1-1.gz
-
-# generation models: source-factors, target-factors
-[generation-file]
-0 1 2 ${MODEL_PATH}/multi-factor/generation.0-1.gz
-
-# language models: 0, factors, type, file
-[lmodel-file]
-0 0 3 ${LM_PATH}/europarl.en.srilm.gz
-
-# limit on how many phrase translations e for each phrase f are loaded
-# 0 = all elements loaded
-[ttable-limit]
-20
-0
-
-# distortion (reordering) weight
-[weight-d]
-0.6
-
-# language model weights
-[weight-l]
-0.5
-
-# translation model weights
-[weight-t]
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-
-# generation model weights
-[weight-generation]
-0.3
-0
-
-# word penalty
-[weight-w]
--1
diff --git a/regression-testing/tests/multi-factor/moses2.ini b/regression-testing/tests/multi-factor/moses2.ini
deleted file mode 100644
index a98e71483..000000000
--- a/regression-testing/tests/multi-factor/moses2.ini
+++ /dev/null
@@ -1,63 +0,0 @@
-#########################
-### MOSES CONFIG FILE ###
-#########################
-
-# input factors
-[input-factors]
-0
-1
-
-# mapping steps
-[mapping]
-T 0
-G 0
-T 1
-
-# translation tables: source-factors, target-factors, number of scores, file
-[ttable-file]
-0 0 5 /export/ws06osmt/regression-testing/models/multi-factor/phrase-table.0-0.gz
-1 1 5 /export/ws06osmt/regression-testing/models/multi-factor/phrase-table.1-1.gz
-
-# generation models: source-factors, target-factors
-[generation-file]
-0 1 2 /export/ws06osmt/regression-testing/models/multi-factor/generation.0-1.gz
-
-# language models: 0, factors, type, file
-[lmodel-file]
-0 0 3 /export/ws06osmt/models/lm/europarl.en.srilm.gz
-
-# limit on how many phrase translations e for each phrase f are loaded
-# 0 = all elements loaded
-[ttable-limit]
-20
-0
-
-# distortion (reordering) weight
-[weight-d]
-0.6
-
-# language model weights
-[weight-l]
-0.5
-
-# translation model weights
-[weight-t]
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-0.2
-
-# generation model weights
-[weight-generation]
-0.3
-0
-
-# word penalty
-[weight-w]
--1
diff --git a/regression-testing/tests/multi-factor/to-translate b/regression-testing/tests/multi-factor/to-translate
deleted file mode 100644
index b3283a754..000000000
--- a/regression-testing/tests/multi-factor/to-translate
+++ /dev/null
@@ -1 +0,0 @@
-es|PPER gibt|VVFIN verschiedene|ADJ andere|ADJ meinungen|NN .|PE
diff --git a/regression-testing/tests/multi-factor/truth/results.dat b/regression-testing/tests/multi-factor/truth/results.dat
deleted file mode 100644
index 3f07889b2..000000000
--- a/regression-testing/tests/multi-factor/truth/results.dat
+++ /dev/null
@@ -1,5 +0,0 @@
-TRANSLATION_1=there are various different opinions .
-LMLOAD_TIME ~ 10.00
-PTLOAD_TIME ~ 11.00
-SCORE_1 = -13.73
-TOTAL_WALLTIME ~ 12
diff --git a/regression-testing/tests/perllib/RegTestUtils.pm b/regression-testing/tests/perllib/RegTestUtils.pm
deleted file mode 100644
index 25e7f5e53..000000000
--- a/regression-testing/tests/perllib/RegTestUtils.pm
+++ /dev/null
@@ -1,31 +0,0 @@
-#RegTestUtils.pm: for moses regression testing
-#Evan Herbst, 8 / 11 / 06
-
-use strict;
-
-package RegTestUtils;
-return 1;
-
-###############################################################
-
-#arguments: chomped line of output that gives the best hypo and various scores
-#return: a string to be compared with the correct total hypothesis score;
-# it's formatted as a double if no error, or "FORMAT ERROR" if there is one
-sub readHypoScore
-{
- my $line = shift;
- #the 0.12 is hardcoded in Hypothesis.cpp because some parsing scripts still
- #expect a comma-separated list of scores -- EVH
- if($line =~ /\[total=\s*(-?\d+\.\d+)\]/) {return $1;}
- return "FORMAT ERROR";
-}
-
-#arguments: chomped line of output that gives a time in seconds
-#return: a string to be compared with the correct time;
-# it's formatted as a double if no error, or "FORMAT ERROR" if there is one
-sub readTime
-{
- my $line = shift;
- if($line =~ /\[(\d+\.\d+)\]\s*seconds$/) {return $1;}
- return "FORMAT ERROR";
-}
diff --git a/regression-testing/tests/ptable-filtering/filter-stderr b/regression-testing/tests/ptable-filtering/filter-stderr
deleted file mode 100755
index 4b359127e..000000000
--- a/regression-testing/tests/ptable-filtering/filter-stderr
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/perl
-$x=0;
-while (<>) {
- chomp;
-
- if (/^\[.* ; 2-2\]$/o) {
- my @lines;
- my $done = 0;
- while (!$done) {
- $x = <>;
- if ($x =~ /^\s*$/o) { $done = 1; } else {
- chomp $x;
- $x =~ s/^\s+//o;
- push @lines, $x;
- }
- }
- my $c = 0;
- foreach my $x (sort @lines) {
- $c++;
- print "TRANSLATION_OPTION_$c=$x\n";
- }
- }
-
- next unless /^BEST HYPO:/;
- s/^BEST HYPO:\s*//;
- s/\s*\[111+.*$//;
- $x++;
- print "TRANSLATION_$x = $_\n";
-}
diff --git a/regression-testing/tests/ptable-filtering/filter-stdout b/regression-testing/tests/ptable-filtering/filter-stdout
deleted file mode 100755
index a0421ef93..000000000
--- a/regression-testing/tests/ptable-filtering/filter-stdout
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/perl
-$x=0;
diff --git a/regression-testing/tests/ptable-filtering/moses.ini b/regression-testing/tests/ptable-filtering/moses.ini
deleted file mode 100644
index 360c4389f..000000000
--- a/regression-testing/tests/ptable-filtering/moses.ini
+++ /dev/null
@@ -1,59 +0,0 @@
-# reg testing
-
-[ttable-file]
-0 0 6 ${MODEL_PATH}/ptable-filtering/phrase-table.gz
-
-# language model
-[lmodel-file]
-0 0 3 ${LM_PATH}/europarl.en.srilm.gz
-# limit on how many phrase translations e for each phrase f are loaded
-# 0 = all elements loaded
-[ttable-limit]
-20
-
-# distortion (reordering) type
-[distortion-type]
-distance
-
-# distortion (reordering) weight
-[weight-d]
-0.01
-
-# language model weight
-[weight-l]
-0.05
-
-# translation model weight (phrase translation, lexical weighting)
-[weight-t]
-0.019268
-0.025749
-0.018038
-0.008511
-0.013410
--0.624610
-
-# word penalty
-[weight-w]
--0.108496
-
-[distortion-limit]
-6
-
-[stack]
-100
-
-[lmodel-floor]
--50
-
-[beam-threshold]
-0.03
-
-[input-factors]
-0
-
-[mapping]
-T 0
-
-[verbose]
-3
-
diff --git a/regression-testing/tests/ptable-filtering/to-translate b/regression-testing/tests/ptable-filtering/to-translate
deleted file mode 100644
index 3ece67fa6..000000000
--- a/regression-testing/tests/ptable-filtering/to-translate
+++ /dev/null
@@ -1 +0,0 @@
-西 è” æ±‡æ¬¾ .
diff --git a/regression-testing/tests/ptable-filtering/truth/results.dat b/regression-testing/tests/ptable-filtering/truth/results.dat
deleted file mode 100644
index 8ad5240a1..000000000
--- a/regression-testing/tests/ptable-filtering/truth/results.dat
+++ /dev/null
@@ -1,22 +0,0 @@
-TRANSLATION_OPTION_1=form of , pC=-0.57, c=-0.81
-TRANSLATION_OPTION_2=from , pC=-0.60, c=-0.78
-TRANSLATION_OPTION_3=money , pC=-0.38, c=-0.68
-TRANSLATION_OPTION_4=money transfers , pC=-0.29, c=-0.80
-TRANSLATION_OPTION_5=of transfers , pC=-0.32, c=-0.80
-TRANSLATION_OPTION_6=payments , pC=-0.49, c=-0.82
-TRANSLATION_OPTION_7=providing , pC=-0.54, c=-0.87
-TRANSLATION_OPTION_8=remittance , pC=-0.06, c=-0.63
-TRANSLATION_OPTION_9=remittance of , pC=-0.15, c=-0.73
-TRANSLATION_OPTION_10=remittances , , pC=-0.20, c=-0.79
-TRANSLATION_OPTION_11=remittances , pC=-0.04, c=-0.61
-TRANSLATION_OPTION_12=remittances , to , pC=-0.16, c=-0.86
-TRANSLATION_OPTION_13=remittances from , pC=-0.15, c=-0.75
-TRANSLATION_OPTION_14=represents , pC=-0.55, c=-0.85
-TRANSLATION_OPTION_15=sending money , pC=-0.33, c=-0.84
-TRANSLATION_OPTION_16=sent , pC=-0.49, c=-0.83
-TRANSLATION_OPTION_17=transfer , pC=-0.37, c=-0.72
-TRANSLATION_OPTION_18=transfer of money , pC=-0.35, c=-0.80
-TRANSLATION_OPTION_19=transfers , pC=-0.33, c=-0.69
-TRANSLATION_OPTION_20=transfers from , pC=-0.36, c=-0.76
-TRANSLATION_1 = from the west .
-TOTAL_WALLTIME ~ 14
diff --git a/scripts/.cvsignore b/scripts/.cvsignore
deleted file mode 100644
index 0d20b6487..000000000
--- a/scripts/.cvsignore
+++ /dev/null
@@ -1 +0,0 @@
-*.pyc
diff --git a/scripts/Makefile b/scripts/Makefile
deleted file mode 100644
index 8b15f858a..000000000
--- a/scripts/Makefile
+++ /dev/null
@@ -1,147 +0,0 @@
-# This makefile is here to simplify the automatic releases (and tests!!!)
-# of the scripts
-
-
-TS?=$(shell date '+%Y%m%d-%H%M')
-DS?=$(shell date '+%Y%m%d')
-
-# to simplify redirect to custom releases
-DEFAULTTARGETDIR?=/export/ws06osmt/bin
-
-MAIN_SCRIPTS_TARGET_DIR=$(TARGETDIR)
-# MAIN_SCRIPTS_TARGET_DIR=$(shell echo `pwd`/temp)
-
-RELEASEDIR=$(TARGETDIR)/scripts-$(TS)
-# RELEASEDIR=$(shell echo `pwd`/temp)
-
-
-
-## Rules to compile parts that need compilation
-
-
-all: compile
-
-compile:
- cd training/cmert-0.5/ && make
- ## Compiled mert
- cd training/phrase-extract/ && make
- ## Compiled phrase_extract
- cd training/symal/ && make
- ## Compiled symal
- ## All files that need compilation were compiled
-
-
-### "MAIN" scripts are scripts that have a Philipp-like name, too
-## That is for each script (listed below in MAIN_SCRIPTS),
-## we create a date-stamped version in MAIN_SCRIPTS_TARGET_DIR
-
-MAIN_TRAINING_SCRIPTS_NAMES=filter-model-given-input.pl mert-moses.pl train-factored-phrase-model.perl clean-corpus-n.perl
-# Make trick to add directory name to all of them:
-MAIN_TRAINING_SCRIPTS=$(MAIN_TRAINING_SCRIPTS_NAMES:%=training/%)
-
-MAIN_GENERIC_SCRIPTS_NAMES= moses-parallel.pl
-# Make trick to add directory name to all of them:
-MAIN_GENERIC_SCRIPTS=$(MAIN_GENERIC_SCRIPTS_NAMES:%=generic/%)
-
-# the list of all scripts that should be released
-MAIN_SCRIPTS= $(MAIN_TRAINING_SCRIPTS) $(MAIN_GENERIC_SCRIPTS)
-
-
-release:
- # Compile the parts
- make all
- if [ -z "$(TARGETDIR)" ]; then \
- echo "Please specify a TARGETDIR." ; \
- echo " For custom releases issue: "; \
- echo " TARGETDIR=$(HOME)/releases make release"; \
- echo " For official releases (that include generation of helper scripts directly in /bin issue: "; \
- echo " TARGETDIR=/export/ws06osmt make jhu_release"; \
- exit 1; fi
- if [ -e $(RELEASEDIR) ]; then echo "Targetdir exists! Not touching it! $(RELEASEDIR)"; exit 1; fi
- mkdir -p $(RELEASEDIR)
- rsync -r --files-from ./released-files . $(RELEASEDIR)/
- echo "####### Do not forget to:" >> $(RELEASEDIR)/README
- echo " export SCRIPTS_ROOTDIR=$(RELEASEDIR)" >> $(RELEASEDIR)/README
- ## Remember, only files listed in released-files are released!!
- ## URGE yourself to:
- @echo " export SCRIPTS_ROOTDIR=$(RELEASEDIR)"
-
-jhu_release:
- TS=$(TS) DS=$(DS) TARGETDIR=$(DEFAULTTARGETDIR) make release
- TS=$(TS) DS=$(DS) TARGETDIR=$(DEFAULTTARGETDIR) make generate_wrappers
- ## Remember, only files listed in released-files are released!!
- ### Release succeeded, tagging the CVS
- cvs tag SCRIPTS-RELEASE-$(TS)
-
-generate_wrappers:
- ## And for each script, create/rewrite the daily release
- export TARGET
- for s in $(MAIN_SCRIPTS); do \
- bn=`basename $$s`; \
- echo '#!/bin/bash' > $(MAIN_SCRIPTS_TARGET_DIR)/$$bn-$(DS) || exit 1; \
- echo "export SCRIPTS_ROOTDIR=$(RELEASEDIR); $(RELEASEDIR)/$$s "'"$$@"; exit $$?' >> $(MAIN_SCRIPTS_TARGET_DIR)/$$bn-$(DS) || exit 1; \
- chmod 775 $(MAIN_SCRIPTS_TARGET_DIR)/$$bn-$(DS); \
- done
-
-
-MOSESRELEASE=$(TARGETDIR)/moses.$(DS)
-## This is a handy goal to release moses binary, too
-releasemoses:
- if [ -z "$(TARGETDIR)" ]; then \
- echo "Please specify a TARGETDIR." ; \
- echo " For custom releases issue: "; \
- echo " TARGETDIR=$(HOME)/releases make releasemoses"; \
- echo " For official releases: "; \
- echo " TARGETDIR=/export/ws06osmt make releasemoses"; \
- exit 1; \
- fi
- if [ -e $(MOSESRELEASE) ]; then echo "Moses release exists! Not touching it! $(MOSESRELEASE)"; exit 1; fi
- if [ ! -e ../moses-cmd/src/moses ]; then echo "Moses (../moses-cmd/src/moses) does not exist, nothing to release"; ecit 1; fi
- if file ../moses-cmd/src/moses | grep -q 'dynamicall' ; then echo "Moses (../moses-cmd/src/moses) is dynamically linked, not releasing."; ecit 1; fi
- cp ../moses-cmd/src/moses $(MOSESRELEASE)
- ## Your current version of moses:
- @echo " $(MOSESRELEASE)"
-
-
-## This goal lists all files you might have wanted to release
-# but forgot to mention in released-files
-missed:
- ### These might be intended for release
- find . -type f \
- | grep -v '/CVS/' \
- | grep -v /tests/ \
- | sed 's/^\.\///' \
- | grep -F -x -v -f released-files
-
-
-
-### Tests, applicable only at JHU environment due to data dependencies
-export WORKSPACE=$(shell pwd)/../
-
-.PHONY: tests
-tests:
- export SCRIPTS_ROOTDIR=`pwd`; \
- cd tests; \
- ts=`date '+%Y%m%d-%H%M%N'`; \
- for test in *.test; do \
- mkdir $$test.$$ts; \
- cd $$test.$$ts; \
- echo "Running $$test in tests/$$test.$$ts"; \
- ../$$test > log 2>&1 || exit 1; \
- cd ..; \
- done
- ## All tests passed
-
-## Run just one test
-tests/%.test.run: tests/%.test
- export SCRIPTS_ROOTDIR=`pwd`; \
- ts=`date '+%Y%m%d-%H%M%N'`; \
- cd tests; \
- test=$*.test; \
- mkdir $$test.$$ts; \
- cd $$test.$$ts; \
- echo "Running $$test in tests/$$test.$$ts"; \
- ( nohup ../$$test > log 2>&1 & ) || exit 1; \
- echo "Observe tests/$$test.$$ts/log"; \
- cd ..
-
diff --git a/scripts/README b/scripts/README
deleted file mode 100644
index 609352bcc..000000000
--- a/scripts/README
+++ /dev/null
@@ -1,15 +0,0 @@
-2006-07-29
-
-This directory should contain all multi-purpose scripts for:
-
-- training ... training moses (including BLEU evaluation needed for MERT)
-- analysis ... analyzing MT output (for human analysis)
-- generic ... script for handling generic issues (parallelization)
-- lib ... perl modules used by various scripts
-
-
-The Makefile then takes care of proper 'release' from your CVS directory to
-the shared directories.
-
-The released scripts should remain in the *same directory structure*.
-
diff --git a/scripts/analysis/README b/scripts/analysis/README
deleted file mode 100644
index 6e28bc9e0..000000000
--- a/scripts/analysis/README
+++ /dev/null
@@ -1,6 +0,0 @@
-Put any scripts useful for human analysis of MT output here.
-
-[EVH]
-sentence-by-sentence.pl: show comparison of sentences in reference translation(s)/system otuput/(truth) in colorful format
--- shows all sentences given, with non-matching words in the system output marked, BLEU scores given by sentence, and matching n-grams shown in a table
--- requires all input files be utf8-encoded (you can convert a file with `cat FILE | perl -n -e 'binmode(STDOUT, ":utf8"); print;' > FILE.utf8`)
diff --git a/scripts/analysis/nontranslated_words.pl b/scripts/analysis/nontranslated_words.pl
deleted file mode 100755
index 687eaae20..000000000
--- a/scripts/analysis/nontranslated_words.pl
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/perl
-#
-
-use strict;
-use warnings;
-use Getopt::Long;
-
-my $ignore_numbers = 0;
-my $ignore_punct = 0;
-my $usage = 0;
-my $top = 10;
-
-GetOptions(
- "help" => \$usage,
- "top=i" => \$top,
- "ignore-numbers" => \$ignore_numbers,
- "ignore-punct" => \$ignore_punct,
-) or exit 1;
-my $src = shift;
-my $tgt = shift;
-
-if ($usage || !defined $src || !defined $tgt) {
- print STDERR "nontranslated_words.pl srcfile hypothesisfile
-...counts the number of words that are equal in src and hyp. These are
-typically unknown words.
-Options:
- --top=N ... list N top copied tokens
- --ignore-numbers ... numbers usually do not get translated, but do
- not count them (it is not an error)
- --ignore-punct ... same for punct, do not include it in the count
-";
- exit 1;
-}
-
-binmode(STDOUT, ":utf8");
-binmode(STDERR, ":utf8");
-
-open SRC, $src or die "Can't read $src";
-open TGT, $tgt or die "Can't read $tgt";
-binmode(SRC, ":utf8");
-binmode(TGT, ":utf8");
-
-my $nr=0;
-my $outtoks = 0;
-my $intoks = 0;
-my $copiedtoks = 0;
-my %copiedtok;
-while (<SRC>) {
- $nr++;
- chomp;
- s/^\s+|\s+$//g;
- my @src = split /\s+/;
- my %src = map {($_,1)} @src;
- $intoks += scalar @src;
- my $t = <TGT>;
- die "$tgt too short!" if !defined $t;
- $t =~ s/^\s+|\s+$//g;
- foreach my $outtok (split /\s+/, $t) {
- $outtoks++;
- next if !defined $src{$outtok}; # this word did not appear in input, we generated it
- next if $ignore_numbers && $outtok =~ /^-?[0-9]*([.,][0-9]+)?$/;
- next if $ignore_punct && $outtok =~ /^[[:punct:]]+$/;
- $copiedtoks++;
- $copiedtok{$outtok}++;
- }
-}
-my $t = <TGT>;
-die "$tgt too long!" if defined $t;
-close SRC;
-close TGT;
-
-print "Sentences:\t$nr
-Source tokens:\t$intoks
-Output tokens:\t$outtoks
-Output tokens appearing also in input sent:\t$copiedtoks\t"
- .sprintf("%.2f %%", $copiedtoks/$outtoks*100)
- ."\t".($ignore_punct?"ignoring":"including")." punctuation"
- ."\t".($ignore_numbers?"ignoring":"including")." numbers"
- ."\n";
-
-if ($top) {
- my $cnt = 0;
- print "Top $top copied tokens:\n";
- foreach my $t (sort {$copiedtok{$b}<=>$copiedtok{$a} || $a cmp $b} keys %copiedtok) {
- print "$copiedtok{$t}\t$t\n";
- last if $cnt > $top;
- $cnt++;
- }
-}
diff --git a/scripts/analysis/perllib/Error.pm b/scripts/analysis/perllib/Error.pm
deleted file mode 100644
index cc9edbb69..000000000
--- a/scripts/analysis/perllib/Error.pm
+++ /dev/null
@@ -1,744 +0,0 @@
-# Error.pm
-#
-# Copyright (c) 1997-8 Graham Barr <gbarr@ti.com>. All rights reserved.
-# This program is free software; you can redistribute it and/or
-# modify it under the same terms as Perl itself.
-#
-# Based on my original Error.pm, and Exceptions.pm by Peter Seibel
-# <peter@weblogic.com> and adapted by Jesse Glick <jglick@sig.bsh.com>.
-#
-# but modified ***significantly***
-
-package Error;
-
-use strict;
-use vars qw($VERSION);
-use 5.004;
-
-$VERSION = "0.15";
-
-use overload (
- '""' => 'stringify',
- '0+' => 'value',
- 'bool' => sub { return 1; },
- 'fallback' => 1
-);
-
-$Error::Depth = 0; # Depth to pass to caller()
-$Error::Debug = 0; # Generate verbose stack traces
-@Error::STACK = (); # Clause stack for try
-$Error::THROWN = undef; # last error thrown, a workaround until die $ref works
-
-my $LAST; # Last error created
-my %ERROR; # Last error associated with package
-
-# Exported subs are defined in Error::subs
-
-sub import {
- shift;
- local $Exporter::ExportLevel = $Exporter::ExportLevel + 1;
- Error::subs->import(@_);
-}
-
-# I really want to use last for the name of this method, but it is a keyword
-# which prevent the syntax last Error
-
-sub prior {
- shift; # ignore
-
- return $LAST unless @_;
-
- my $pkg = shift;
- return exists $ERROR{$pkg} ? $ERROR{$pkg} : undef
- unless ref($pkg);
-
- my $obj = $pkg;
- my $err = undef;
- if($obj->isa('HASH')) {
- $err = $obj->{'__Error__'}
- if exists $obj->{'__Error__'};
- }
- elsif($obj->isa('GLOB')) {
- $err = ${*$obj}{'__Error__'}
- if exists ${*$obj}{'__Error__'};
- }
-
- $err;
-}
-
-# Return as much information as possible about where the error
-# happened. The -stacktrace element only exists if $Error::DEBUG
-# was set when the error was created
-
-sub stacktrace {
- my $self = shift;
-
- return $self->{'-stacktrace'}
- if exists $self->{'-stacktrace'};
-
- my $text = exists $self->{'-text'} ? $self->{'-text'} : "Died";
-
- $text .= sprintf(" at %s line %d.\n", $self->file, $self->line)
- unless($text =~ /\n$/s);
-
- $text;
-}
-
-# Allow error propagation, ie
-#
-# $ber->encode(...) or
-# return Error->prior($ber)->associate($ldap);
-
-sub associate {
- my $err = shift;
- my $obj = shift;
-
- return unless ref($obj);
-
- if($obj->isa('HASH')) {
- $obj->{'__Error__'} = $err;
- }
- elsif($obj->isa('GLOB')) {
- ${*$obj}{'__Error__'} = $err;
- }
- $obj = ref($obj);
- $ERROR{ ref($obj) } = $err;
-
- return;
-}
-
-sub new {
- my $self = shift;
- my($pkg,$file,$line) = caller($Error::Depth);
-
- my $err = bless {
- '-package' => $pkg,
- '-file' => $file,
- '-line' => $line,
- @_
- }, $self;
-
- $err->associate($err->{'-object'})
- if(exists $err->{'-object'});
-
- # To always create a stacktrace would be very inefficient, so
- # we only do it if $Error::Debug is set
-
- if($Error::Debug) {
- require Carp;
- local $Carp::CarpLevel = $Error::Depth;
- my $text = defined($err->{'-text'}) ? $err->{'-text'} : "Error";
- my $trace = Carp::longmess($text);
- # Remove try calls from the trace
- $trace =~ s/(\n\s+\S+__ANON__[^\n]+)?\n\s+eval[^\n]+\n\s+Error::subs::try[^\n]+(?=\n)//sog;
- $trace =~ s/(\n\s+\S+__ANON__[^\n]+)?\n\s+eval[^\n]+\n\s+Error::subs::run_clauses[^\n]+\n\s+Error::subs::try[^\n]+(?=\n)//sog;
- $err->{'-stacktrace'} = $trace
- }
-
- $@ = $LAST = $ERROR{$pkg} = $err;
-}
-
-# Throw an error. this contains some very gory code.
-
-sub throw {
- my $self = shift;
- local $Error::Depth = $Error::Depth + 1;
-
- # if we are not rethrow-ing then create the object to throw
- $self = $self->new(@_) unless ref($self);
-
- die $Error::THROWN = $self;
-}
-
-# syntactic sugar for
-#
-# die with Error( ... );
-
-sub with {
- my $self = shift;
- local $Error::Depth = $Error::Depth + 1;
-
- $self->new(@_);
-}
-
-# syntactic sugar for
-#
-# record Error( ... ) and return;
-
-sub record {
- my $self = shift;
- local $Error::Depth = $Error::Depth + 1;
-
- $self->new(@_);
-}
-
-# catch clause for
-#
-# try { ... } catch CLASS with { ... }
-
-sub catch {
- my $pkg = shift;
- my $code = shift;
- my $clauses = shift || {};
- my $catch = $clauses->{'catch'} ||= [];
-
- unshift @$catch, $pkg, $code;
-
- $clauses;
-}
-
-# Object query methods
-
-sub object {
- my $self = shift;
- exists $self->{'-object'} ? $self->{'-object'} : undef;
-}
-
-sub file {
- my $self = shift;
- exists $self->{'-file'} ? $self->{'-file'} : undef;
-}
-
-sub line {
- my $self = shift;
- exists $self->{'-line'} ? $self->{'-line'} : undef;
-}
-
-sub text {
- my $self = shift;
- exists $self->{'-text'} ? $self->{'-text'} : undef;
-}
-
-# overload methods
-
-sub stringify {
- my $self = shift;
- defined $self->{'-text'} ? $self->{'-text'} : "Died";
-}
-
-sub value {
- my $self = shift;
- exists $self->{'-value'} ? $self->{'-value'} : undef;
-}
-
-package Error::Simple;
-
-@Error::Simple::ISA = qw(Error);
-
-sub new {
- my $self = shift;
- my $text = "" . shift;
- my $value = shift;
- my(@args) = ();
-
- local $Error::Depth = $Error::Depth + 1;
-
- @args = ( -file => $1, -line => $2)
- if($text =~ s/ at (\S+) line (\d+)(\.\n)?$//s);
-
- push(@args, '-value', 0 + $value)
- if defined($value);
-
- $self->SUPER::new(-text => $text, @args);
-}
-
-sub stringify {
- my $self = shift;
- my $text = $self->SUPER::stringify;
- $text .= sprintf(" at %s line %d.\n", $self->file, $self->line)
- unless($text =~ /\n$/s);
- $text;
-}
-
-##########################################################################
-##########################################################################
-
-# Inspired by code from Jesse Glick <jglick@sig.bsh.com> and
-# Peter Seibel <peter@weblogic.com>
-
-package Error::subs;
-
-use Exporter ();
-use vars qw(@EXPORT_OK @ISA %EXPORT_TAGS);
-
-@EXPORT_OK = qw(try with finally except otherwise);
-%EXPORT_TAGS = (try => \@EXPORT_OK);
-
-@ISA = qw(Exporter);
-
-sub run_clauses ($$$\@) {
- my($clauses,$err,$wantarray,$result) = @_;
- my $code = undef;
-
- $err = new Error::Simple($err) unless ref($err);
-
- CATCH: {
-
- # catch
- my $catch;
- if(defined($catch = $clauses->{'catch'})) {
- my $i = 0;
-
- CATCHLOOP:
- for( ; $i < @$catch ; $i += 2) {
- my $pkg = $catch->[$i];
- unless(defined $pkg) {
- #except
- splice(@$catch,$i,2,$catch->[$i+1]->());
- $i -= 2;
- next CATCHLOOP;
- }
- elsif($err->isa($pkg)) {
- $code = $catch->[$i+1];
- while(1) {
- my $more = 0;
- local($Error::THROWN);
- my $ok = eval {
- if($wantarray) {
- @{$result} = $code->($err,\$more);
- }
- elsif(defined($wantarray)) {
- @{$result} = ();
- $result->[0] = $code->($err,\$more);
- }
- else {
- $code->($err,\$more);
- }
- 1;
- };
- if( $ok ) {
- next CATCHLOOP if $more;
- undef $err;
- }
- else {
- $err = defined($Error::THROWN)
- ? $Error::THROWN : $@;
- $err = new Error::Simple($err)
- unless ref($err);
- }
- last CATCH;
- };
- }
- }
- }
-
- # otherwise
- my $owise;
- if(defined($owise = $clauses->{'otherwise'})) {
- my $code = $clauses->{'otherwise'};
- my $more = 0;
- my $ok = eval {
- if($wantarray) {
- @{$result} = $code->($err,\$more);
- }
- elsif(defined($wantarray)) {
- @{$result} = ();
- $result->[0] = $code->($err,\$more);
- }
- else {
- $code->($err,\$more);
- }
- 1;
- };
- if( $ok ) {
- undef $err;
- }
- else {
- $err = defined($Error::THROWN)
- ? $Error::THROWN : $@;
- $err = new Error::Simple($err)
- unless ref($err);
- }
- }
- }
- $err;
-}
-
-sub try (&;$) {
- my $try = shift;
- my $clauses = @_ ? shift : {};
- my $ok = 0;
- my $err = undef;
- my @result = ();
-
- unshift @Error::STACK, $clauses;
-
- my $wantarray = wantarray();
-
- do {
- local $Error::THROWN = undef;
-
- $ok = eval {
- if($wantarray) {
- @result = $try->();
- }
- elsif(defined $wantarray) {
- $result[0] = $try->();
- }
- else {
- $try->();
- }
- 1;
- };
-
- $err = defined($Error::THROWN) ? $Error::THROWN : $@
- unless $ok;
- };
-
- shift @Error::STACK;
-
- $err = run_clauses($clauses,$err,wantarray,@result)
- unless($ok);
-
- $clauses->{'finally'}->()
- if(defined($clauses->{'finally'}));
-
- throw $err if defined($err);
-
- wantarray ? @result : $result[0];
-}
-
-# Each clause adds a sub to the list of clauses. The finally clause is
-# always the last, and the otherwise clause is always added just before
-# the finally clause.
-#
-# All clauses, except the finally clause, add a sub which takes one argument
-# this argument will be the error being thrown. The sub will return a code ref
-# if that clause can handle that error, otherwise undef is returned.
-#
-# The otherwise clause adds a sub which unconditionally returns the users
-# code reference, this is why it is forced to be last.
-#
-# The catch clause is defined in Error.pm, as the syntax causes it to
-# be called as a method
-
-sub with (&;$) {
- @_
-}
-
-sub finally (&) {
- my $code = shift;
- my $clauses = { 'finally' => $code };
- $clauses;
-}
-
-# The except clause is a block which returns a hashref or a list of
-# key-value pairs, where the keys are the classes and the values are subs.
-
-sub except (&;$) {
- my $code = shift;
- my $clauses = shift || {};
- my $catch = $clauses->{'catch'} ||= [];
-
- my $sub = sub {
- my $ref;
- my(@array) = $code->($_[0]);
- if(@array == 1 && ref($array[0])) {
- $ref = $array[0];
- $ref = [ %$ref ]
- if(UNIVERSAL::isa($ref,'HASH'));
- }
- else {
- $ref = \@array;
- }
- @$ref
- };
-
- unshift @{$catch}, undef, $sub;
-
- $clauses;
-}
-
-sub otherwise (&;$) {
- my $code = shift;
- my $clauses = shift || {};
-
- if(exists $clauses->{'otherwise'}) {
- require Carp;
- Carp::croak("Multiple otherwise clauses");
- }
-
- $clauses->{'otherwise'} = $code;
-
- $clauses;
-}
-
-1;
-__END__
-
-=head1 NAME
-
-Error - Error/exception handling in an OO-ish way
-
-=head1 SYNOPSIS
-
- use Error qw(:try);
-
- throw Error::Simple( "A simple error");
-
- sub xyz {
- ...
- record Error::Simple("A simple error")
- and return;
- }
-
- unlink($file) or throw Error::Simple("$file: $!",$!);
-
- try {
- do_some_stuff();
- die "error!" if $condition;
- throw Error::Simple -text => "Oops!" if $other_condition;
- }
- catch Error::IO with {
- my $E = shift;
- print STDERR "File ", $E->{'-file'}, " had a problem\n";
- }
- except {
- my $E = shift;
- my $general_handler=sub {send_message $E->{-description}};
- return {
- UserException1 => $general_handler,
- UserException2 => $general_handler
- };
- }
- otherwise {
- print STDERR "Well I don't know what to say\n";
- }
- finally {
- close_the_garage_door_already(); # Should be reliable
- }; # Don't forget the trailing ; or you might be surprised
-
-=head1 DESCRIPTION
-
-The C<Error> package provides two interfaces. Firstly C<Error> provides
-a procedural interface to exception handling. Secondly C<Error> is a
-base class for errors/exceptions that can either be thrown, for
-subsequent catch, or can simply be recorded.
-
-Errors in the class C<Error> should not be thrown directly, but the
-user should throw errors from a sub-class of C<Error>.
-
-=head1 PROCEDURAL INTERFACE
-
-C<Error> exports subroutines to perform exception handling. These will
-be exported if the C<:try> tag is used in the C<use> line.
-
-=over 4
-
-=item try BLOCK CLAUSES
-
-C<try> is the main subroutine called by the user. All other subroutines
-exported are clauses to the try subroutine.
-
-The BLOCK will be evaluated and, if no error is throw, try will return
-the result of the block.
-
-C<CLAUSES> are the subroutines below, which describe what to do in the
-event of an error being thrown within BLOCK.
-
-=item catch CLASS with BLOCK
-
-This clauses will cause all errors that satisfy C<$err-E<gt>isa(CLASS)>
-to be caught and handled by evaluating C<BLOCK>.
-
-C<BLOCK> will be passed two arguments. The first will be the error
-being thrown. The second is a reference to a scalar variable. If this
-variable is set by the catch block then, on return from the catch
-block, try will continue processing as if the catch block was never
-found.
-
-To propagate the error the catch block may call C<$err-E<gt>throw>
-
-If the scalar reference by the second argument is not set, and the
-error is not thrown. Then the current try block will return with the
-result from the catch block.
-
-=item except BLOCK
-
-When C<try> is looking for a handler, if an except clause is found
-C<BLOCK> is evaluated. The return value from this block should be a
-HASHREF or a list of key-value pairs, where the keys are class names
-and the values are CODE references for the handler of errors of that
-type.
-
-=item otherwise BLOCK
-
-Catch any error by executing the code in C<BLOCK>
-
-When evaluated C<BLOCK> will be passed one argument, which will be the
-error being processed.
-
-Only one otherwise block may be specified per try block
-
-=item finally BLOCK
-
-Execute the code in C<BLOCK> either after the code in the try block has
-successfully completed, or if the try block throws an error then
-C<BLOCK> will be executed after the handler has completed.
-
-If the handler throws an error then the error will be caught, the
-finally block will be executed and the error will be re-thrown.
-
-Only one finally block may be specified per try block
-
-=back
-
-=head1 CLASS INTERFACE
-
-=head2 CONSTRUCTORS
-
-The C<Error> object is implemented as a HASH. This HASH is initialized
-with the arguments that are passed to it's constructor. The elements
-that are used by, or are retrievable by the C<Error> class are listed
-below, other classes may add to these.
-
- -file
- -line
- -text
- -value
- -object
-
-If C<-file> or C<-line> are not specified in the constructor arguments
-then these will be initialized with the file name and line number where
-the constructor was called from.
-
-If the error is associated with an object then the object should be
-passed as the C<-object> argument. This will allow the C<Error> package
-to associate the error with the object.
-
-The C<Error> package remembers the last error created, and also the
-last error associated with a package. This could either be the last
-error created by a sub in that package, or the last error which passed
-an object blessed into that package as the C<-object> argument.
-
-=over 4
-
-=item throw ( [ ARGS ] )
-
-Create a new C<Error> object and throw an error, which will be caught
-by a surrounding C<try> block, if there is one. Otherwise it will cause
-the program to exit.
-
-C<throw> may also be called on an existing error to re-throw it.
-
-=item with ( [ ARGS ] )
-
-Create a new C<Error> object and returns it. This is defined for
-syntactic sugar, eg
-
- die with Some::Error ( ... );
-
-=item record ( [ ARGS ] )
-
-Create a new C<Error> object and returns it. This is defined for
-syntactic sugar, eg
-
- record Some::Error ( ... )
- and return;
-
-=back
-
-=head2 STATIC METHODS
-
-=over 4
-
-=item prior ( [ PACKAGE ] )
-
-Return the last error created, or the last error associated with
-C<PACKAGE>
-
-=back
-
-=head2 OBJECT METHODS
-
-=over 4
-
-=item stacktrace
-
-If the variable C<$Error::Debug> was non-zero when the error was
-created, then C<stacktrace> returns a string created by calling
-C<Carp::longmess>. If the variable was zero the C<stacktrace> returns
-the text of the error appended with the filename and line number of
-where the error was created, providing the text does not end with a
-newline.
-
-=item object
-
-The object this error was associated with
-
-=item file
-
-The file where the constructor of this error was called from
-
-=item line
-
-The line where the constructor of this error was called from
-
-=item text
-
-The text of the error
-
-=back
-
-=head2 OVERLOAD METHODS
-
-=over 4
-
-=item stringify
-
-A method that converts the object into a string. This method may simply
-return the same as the C<text> method, or it may append more
-information. For example the file name and line number.
-
-By default this method returns the C<-text> argument that was passed to
-the constructor, or the string C<"Died"> if none was given.
-
-=item value
-
-A method that will return a value that can be associated with the
-error. For example if an error was created due to the failure of a
-system call, then this may return the numeric value of C<$!> at the
-time.
-
-By default this method returns the C<-value> argument that was passed
-to the constructor.
-
-=back
-
-=head1 PRE-DEFINED ERROR CLASSES
-
-=over 4
-
-=item Error::Simple
-
-This class can be used to hold simple error strings and values. It's
-constructor takes two arguments. The first is a text value, the second
-is a numeric value. These values are what will be returned by the
-overload methods.
-
-If the text value ends with C<at file line 1> as $@ strings do, then
-this infomation will be used to set the C<-file> and C<-line> arguments
-of the error object.
-
-This class is used internally if an eval'd block die's with an error
-that is a plain string.
-
-=back
-
-=head1 KNOWN BUGS
-
-None, but that does not mean there are not any.
-
-=head1 AUTHORS
-
-Graham Barr <gbarr@pobox.com>
-
-The code that inspired me to write this was originally written by
-Peter Seibel <peter@weblogic.com> and adapted by Jesse Glick
-<jglick@sig.bsh.com>.
-
-=head1 MAINTAINER
-
-Arun Kumar U <u_arunkumar@yahoo.com>
-
-=cut
diff --git a/scripts/analysis/sentence-by-sentence.pl b/scripts/analysis/sentence-by-sentence.pl
deleted file mode 100755
index f265a07b8..000000000
--- a/scripts/analysis/sentence-by-sentence.pl
+++ /dev/null
@@ -1,447 +0,0 @@
-#!/usr/bin/perl -w
-
-#sentence-by-sentence: take in a system output, with any number of factors, and a reference translation, also maybe with factors, and show each sentence and its errors
-#usage: sentence-by-sentence SYSOUT [REFERENCE]+ > sentences.html
-
-use strict;
-use Getopt::Long;
-
-my $sourcefile = undef;
-GetOptions(
- "source=s" => \$sourcefile,
-) or exit(1);
-
-my ($sysoutfile, @truthfiles) = @ARGV;
-
-if (!defined $sysoutfile || scalar(@truthfiles) == 0) {
- print STDERR "
-usage: $0 system_output reference(s...) > sentence-by-sentence.html
-Options:
- --source STRING ... foreign original
-
-N-grams are colored by the number of supporting references:
- red for fewest, green for most, mediate shades otherwise.
-";
- exit(1);
-}
-
-my @TRUTHS = () x scalar(@truthfiles);
-for(my $i = 0; $i < scalar(@truthfiles); $i++)
-{
- open($TRUTHS[$i], "<$truthfiles[$i]") or die "couldn't open '$truthfiles[$i]' for read: $!\n";
- binmode($TRUTHS[$i], ":utf8");
-}
-open(SYSOUT, "<$sysoutfile") or die "couldn't open '$sysoutfile' for read: $!\n";
-binmode(SYSOUT, ":utf8");
-binmode(STDOUT, ":utf8");
-if (defined $sourcefile)
-{
- open(SOURCE, "<$sourcefile") or die "couldn't open '$sourcefile' for read: $!\n";
- binmode(SOURCE, ":utf8");
-}
-my @bleuScores;
-my @htmlSentences;
-my @htmlColors = ('#99ff99', '#aaaaff', '#ffff99', '#ff9933', '#ff9999'); #color sentences by rank (split in n tiers)
-my $ngramSingleRefColor = '#aaffaa';
-my @ngramMultirefColors = ('ff9999', 'ff9933', 'ffff99', 'a0a0ff', '99ff99'); #arbitrary-length list; first entry is used for worst n-grams
-my $i = 0;
-while(my $sLine = <SYSOUT>)
-{
- my @sFactors = @{extractFactorArrays($sLine)};
- my @eLines = () x scalar(@truthfiles);
- my @eFactors;
- for(my $j = 0; $j < scalar(@truthfiles); $j++)
- {
- my $fh = $TRUTHS[$j];
- $eLines[$j] = <$fh>;
- push @eFactors, extractFactorArrays($eLines[$j], "$truthfiles[$j] shorter than $sysoutfile");
- }
- my $sourceFactors;
- if (defined $sourcefile)
- {
- my $sourceLine = <SOURCE>;
- $sourceFactors = extractFactorArrays($sourceLine, "$sourcefile shorter than $sysoutfile");
- }
-
- my $bleuData = getBLEUSentenceDetails(\@sFactors, \@eFactors, 0);
- push @bleuScores, [$i, $bleuData->[0], 0]; #the last number will be the rank
- my $pwerData = getPWERSentenceDetails(\@sFactors, \@eFactors, 0);
- my $html = "<div class=\"sentence\" style=\"background-color: %%%%\" id=\"sentence$i\">"; #the %%%% and other tokens like it are flags to be replaced
- $html .= "<div class=\"bleu_report\"><b>Sentence $i)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;BLEU:</b> " . sprintf("%.4lg", $bleuData->[0]->[0]) . " (" . join('/', map {sprintf("%.4lg", $_)} @{$bleuData->[0]}[1 .. 4]) . ")</div><table>\n";
- if(defined $sourcefile)
- {
- $html .= "<tr><td class=\"sent_title\">Source</td><td class=\"source_sentence\" id=\"source$i\">" . getFactoredSentenceHTML($sourceFactors) . "</td></tr>\n";
- }
- for(my $j = 0; $j < scalar(@truthfiles); $j++)
- {
- $html .= "<tr><td class=\"sent_title\">Ref $j</td><td class=\"truth_sentence\" id=\"truth${i}_$j\">" . getFactoredSentenceHTML($eFactors[$j]) . "</td></tr>\n";
- }
- my $j = 0;
- $html .= "<tr><td class=\"sent_title\">Output</td><td class=\"sysout_sentence\" id=\"sysout$i\">" . getFactoredSentenceHTML(\@sFactors, $pwerData) . "</td></tr>\n";
- $j = 0;
- $html .= "<tr><td class=\"sent_title\">N-grams</td><td class=\"sysout_ngrams\" id=\"ngrams$i\">" . getAllNgramsHTML(\@sFactors, $bleuData->[1], scalar(@truthfiles)) . "</td></tr>\n";
- $html .= "</table></div>\n";
- push @htmlSentences, $html;
- $i++;
-}
-close(SYSOUT);
-foreach my $truthfh (@TRUTHS) {close($truthfh);}
-
-rankSentencesByBLEU(\@bleuScores);
-my $stylesheet = <<EOHTML;
-<style type="text/css">
-.legend {background: #fff; border: 1px solid #000; padding: 2px; margin-bottom: 10px; margin-right: 15px}
-.legend_title {font-weight: bold; font-size: medium; text-decoration: underline}
-div.sentence {background: #ffffee; border: 1px solid #000088; padding: 0px 8px 0px 8px} //entire composition for a given sentence
-div.sentence td {margin: 8px 0px 8px 0px}
-div.bleu_report {margin-bottom: 5px}
-td.sent_title {font-weight: bold; font-size: medium; margin-bottom: 12px}
-.source_sentence {background: #ffcccc; border: 1px solid #bbb}
-.truth_sentence {background: #ccffcc; border: 1px solid #bbb}
-.sysout_sentence {background: #ccccff; border: 1px solid #bbb}
-table.sentence_table {border: none}
-.sysout_ngrams {background: #fff; border: 1px solid #bbb}
-table.ngram_table {}
-td.ngram_cell {padding: 1px}
-</style>
-EOHTML
-print "<html><head>\n";
-print "<meta http-equiv=\"Content-type: text/html; charset=utf-8\">\n";
-print "<title>$sysoutfile vs. [" . join(' ', @truthfiles) . "]: Sentence-by-sentence Comparison</title>$stylesheet</head><body>\n";
-
-#javascript to sort by BLEU, by order in corpus, ...
-my %rank2index = map {$bleuScores[$_]->[2] => $_} (0 .. scalar(@htmlSentences) - 1);
-print "<script type=\"text/javascript\">
-function sortByBLEU()
-{
- var body = document.getElementById('all_sentences'); var row;\n";
-foreach my $rank (sort {$a <=> $b} keys %rank2index)
-{
- print "\trow = document.getElementById('everything" . $rank2index{$rank} . "');\n";
- print "\tbody.removeChild(row); body.appendChild(row);\n";
-}
-print "}
-function sortByCorpusOrder()
-{
- var body = document.getElementById('all_sentences'); var row;\n";
-for(my $j = 0; $j < scalar(@htmlSentences); $j++)
-{
- print "\trow = document.getElementById('everything$j');\n";
- print "\tbody.removeChild(row); body.appendChild(row);\n";
-}
-print "}
-</script>\n";
-
-#legend for background colors
-my @minBLEU = (1e9) x scalar(@htmlColors);
-my @maxBLEU = (-1e9) x scalar(@htmlColors);
-for(my $k = 0; $k < scalar(@htmlSentences); $k++)
-{
- my $tier = int($bleuScores[$k]->[2] / (scalar(@htmlSentences) / scalar(@htmlColors)));
- if($bleuScores[$k]->[1]->[0] < $minBLEU[$tier]) {$minBLEU[$tier] = $bleuScores[$k]->[1]->[0];}
- elsif($bleuScores[$k]->[1]->[0] > $maxBLEU[$tier]) {$maxBLEU[$tier] = $bleuScores[$k]->[1]->[0];}
-}
-print "<table border=0><tr><td><div class=\"legend\"><span class=\"legend_title\">Sentence Background Colors => BLEU Ranges</span><table border=0>";
-for(my $k = 0; $k < scalar(@htmlColors); $k++)
-{
- print "<tr><td style=\"width: 15px; height: 15px; background: " . $htmlColors[$k] . "\"></td><td align=left style=\"padding-left: 12px\">"
- . sprintf("%.4lg", $minBLEU[$k]) . " - " . sprintf("%.4lg", $maxBLEU[$k]) . "</td>";
-}
-print "</table></div></td>\n";
-print "<td><div class=\"legend\"><span class=\"legend_title\">N-gram Colors => Number of Matching Reference Translations</span><table border=0>";
-for(my $k = 1; $k <= scalar(@truthfiles); $k++)
-{
- print "<tr><td style=\"width: 15px; height: 15px; background: " . getNgramColorHTML($k, scalar(@truthfiles)) . "\"></td><td align=left style=\"padding-left: 12px\">$k</td>";
-}
-print "</table></div></td></tr></table><div style=\"font-weight: bold; margin-bottom: 15px\">
-PWER errors are marked in red on output sentence displays.</div>
-<div style=\"margin-bottom: 8px\">Sort by <a href=\"javascript:sortByBLEU();\">BLEU score</a> | <a href=\"javascript:sortByCorpusOrder();\">corpus order</a> (default)</div>\n";
-
-#sentence boxes
-print "<div id=\"all_sentences\">";
-my $j = 0;
-foreach my $sentenceHTML (@htmlSentences)
-{
- print "<div id=\"everything$j\" style=\"margin: 0px; padding: 0px\">";
- print "<hr width=98%>";
- my $bgcolor = getSentenceBGColorHTML($bleuScores[$j], $i); #i is now # of sentences
- $sentenceHTML =~ s/%%%%/$bgcolor/;
- print "$sentenceHTML</div>\n";
- $j++;
-}
-print "</div></body></html>";
-
-##################### utils #####################
-
-#arguments: a, b (scalars)
-sub min
-{
- my ($a, $b) = @_;
- return ($a < $b) ? $a : $b;
-}
-#arguments: a, b (scalars)
-sub max
-{
- my ($a, $b) = @_;
- return ($a > $b) ? $a : $b;
-}
-#arguments: a list of elements
-#return undef for an empty list, the max element otherwise
-sub maxN
-{
- if(scalar @_ == 0) {return undef;}
- my $val = shift @_;
- foreach my $e (@_) {if($e > $val) {$val = $e;}}
- return $val;
-}
-#arguments: x
-sub my_log
-{
- return -9999999999 unless $_[0];
- return log($_[0]);
-}
-#arguments: x
-sub round
-{
- my $x = shift;
- return ($x - int($x) < .5) ? int($x) : int($x) + 1;
-}
-
-###############################################################################################################################################################
-
-#arguments: line read from corpus file, (optionally) string to die with if line isn't defined (default die-msg is empty)
-#return: sentence (arrayref of arrayrefs of factor strings) taken from line
-sub extractFactorArrays
-{
- my ($line, $msg) = (shift, '');
- $msg = shift if scalar(@_);
- die $msg if !defined $line;
- chomp $line;
- $line =~ s/^\s*|\s*$//g; #added by Ondrej to handle moses-mert-parallel output
- my @words = split(/\s+/, $line);
- my @factors = map {my @f = split(/\|/, $_); \@f;} @words;
- return \@factors;
-}
-
-#can handle multiple reference translations; assume at least one
-#arguments: sysout sentence (arrayref of arrayrefs of factor strings), truth sentences (arrayref of same), factor index to use
-#return: arrayref of [arrayref of [overall BLEU score, n-gram precisions], arrayref of matching n-gram [start index, length, arrayref of indices of matching references]]
-sub getBLEUSentenceDetails
-{
- my $maxNgramOrder = 4;
- my ($refSysOutput, $refTruths, $factorIndex) = @_;
- my $length_translation = scalar(@$refSysOutput); #length of sysout sentence
- my @length_references = map {scalar(@$_)} @$refTruths;
- my $closestTruthLength = (sort(map {abs($_ - $length_translation)} @length_references))[0];
- my @correct = (0) x $maxNgramOrder; #n-gram counts
- my @total = (0) x $maxNgramOrder; #n-gram counts
- my $returnData = [[], []];
- my %REF_GRAM; #hash from n-gram to arrayref with # of times found in each reference
- my $ngramMatches = []; #arrayref of n-gram [start index, length]
- for(my $j = 0; $j < scalar(@$refTruths); $j++)
- {
- for(my $i = 0; $i < $length_references[$j]; $i++)
- {
- my $gram = '';
- for(my $k = 0; $k < min($i + 1, $maxNgramOrder); $k++) #run over n-gram orders
- {
- $gram = $refTruths->[$j]->[$i - $k]->[$factorIndex] . " " . $gram;
- #increment the count for the given n-gram and given reference number
- if(!exists $REF_GRAM{$gram})
- {
- my @tmp = (0) x scalar @$refTruths;
- $tmp[$j] = 1;
- $REF_GRAM{$gram} = \@tmp;
- }
- else
- {
- $REF_GRAM{$gram}->[$j]++;
- }
- }
- }
- }
- for(my $i = 0; $i < $length_translation; $i++)
- {
- my $gram = '';
- for(my $k = 0; $k < min($i + 1, $maxNgramOrder); $k++) #run over n-gram orders
- {
- $gram = $refSysOutput->[$i - $k]->[$factorIndex] . " " . $gram;
- if(exists $REF_GRAM{$gram}) #this n-gram was found in at least one reference
- {
- $correct[$k]++;
- my @indices = ();
- for(my $m = 0; $m < scalar(@{$REF_GRAM{$gram}}); $m++)
- {
- if($REF_GRAM{$gram}->[$m] > 0)
- {
- push @indices, $m;
- $REF_GRAM{$gram}->[$m]--;
- }
- }
- push @$ngramMatches, [$i - $k, $k + 1, \@indices];
- }
- }
- }
- my $brevity = ($length_translation > $closestTruthLength || $length_translation == 0) ? 1 : exp(1 - $closestTruthLength / $length_translation);
- my @pct;
- my ($logsum, $logcount) = (0, 0);
- for(my $i = 0; $i < $maxNgramOrder; $i++)
- {
- $total[$i] = max(1, $length_translation - $i);
- push @pct, ($total[$i] == 0) ? -1 : $correct[$i] / $total[$i];
- if($total[$i] > 0)
- {
- $logsum += my_log($pct[$i]);
- $logcount++;
- }
- }
- my $bleu = $brevity * exp($logsum / $logcount);
- $returnData->[0] = [$bleu, @pct];
- $returnData->[1] = $ngramMatches;
- return $returnData;
-}
-
-#can handle multiple sentence translations; assume at least one
-#arguments: sysout sentence (arrayref of arrayrefs of factor strings), truth sentences (arrayref of same), factor index to use
-#return: hashref of sysout word index => whether word matches
-sub getPWERSentenceDetails
-{
- my ($refSysOutput, $refTruths, $factorIndex) = @_;
- my $matches = {};
- my %truthWords; #hash from word to arrayref with number of times seen in each reference (but later holds only the max)
- for(my $i = 0; $i < scalar(@$refTruths); $i++)
- {
- foreach my $eWord (@{$refTruths->[$i]})
- {
- my $factor = $eWord->[$factorIndex];
- if(exists $truthWords{$factor}) {$truthWords{$factor}->[$i]++;}
- else {my @tmp = (0) x scalar(@$refTruths); $tmp[$i] = 1; $truthWords{$factor} = \@tmp;}
- }
- }
- %truthWords = map {$_ => maxN(@{$truthWords{$_}})} (keys %truthWords); #save only the max times each word is seen in a reference
- for(my $j = 0; $j < scalar(@$refSysOutput); $j++)
- {
- if(exists $truthWords{$refSysOutput->[$j]->[$factorIndex]} && $truthWords{$refSysOutput->[$j]->[$factorIndex]} > 0)
- {
- $truthWords{$refSysOutput->[$j]->[$factorIndex]}--;
- $matches->{$j} = 1;
- }
- else
- {
- $matches->{$j} = 0;
- }
- }
- return $matches;
-}
-
-#assign ranks to sentences by BLEU score
-#arguments: arrayref of arrayrefs of [sentence index, arrayref of [bleu score, n-gram precisions], rank to be assigned]
-#return: none
-sub rankSentencesByBLEU
-{
- my $bleuData = shift;
- my $i = 0;
- #sort first on score, then on 1-gram accuracy, then on sentence index
- foreach my $sentenceData (reverse sort {my $c = $a->[1]->[0] <=> $b->[1]->[0]; if($c == 0) {my $d = $a->[1]->[1] <=> $b->[1]->[1]; if($d == 0) {$a->[0] cmp $b->[0];} else {$d;}} else {$c;}} @$bleuData) {$sentenceData->[2] = $i++;}
-}
-
-###############################################################################################################################################################
-
-#write HTML for a sentence containing factors (display words in a row)
-#arguments: sentence (arrayref of arrayrefs of factor strings), PWER results (hashref from word indices to 0/1 whether matched a truth word)
-#return: HTML string
-sub getFactoredSentenceHTML
-{
- my $sentence = shift;
- my $pwer = 0; if(scalar(@_) > 0) {$pwer = shift;}
- my $html = "<table class=\"sentence_table\"><tr>";
- for(my $i = 0; $i < scalar(@$sentence); $i++) #loop over words
- {
- my $style = ''; #default
- if($pwer ne '0' && $pwer->{$i} == 0) {$style = 'color: #cc0000; font-weight: bold';}
- $html .= "<td align=center style=\"$style\">" . join("<br>", @{$sentence->[$i]}) . "</td>";
- }
- return $html . "</tr></table>";
-}
-
-#arguments: arrayref of [sentence index, arrayref of [bleu score, n-gram precisions], rank], number of sentences
-#return: HTML color string
-sub getSentenceBGColorHTML
-{
- my ($scoreData, $numSentences) = @_;
- my $tier = int($scoreData->[2] / ($numSentences / scalar(@htmlColors))); #0..n-1
- return $htmlColors[$tier];
-}
-
-#display all matching n-grams in the given sentence, with all 1-grams on one line, then arranged by picking, for each, the first line on which it fits,
-# where a given word position can only be filled by one n-gram per line, so that all n-grams can be shown
-#arguments: sentence (arrayref of arrayrefs of factor strings), arrayref of arrayrefs of matching n-gram [start, length, arrayref of matching reference indices],
-# number of reference translations
-#return: HTML string
-sub getAllNgramsHTML
-{
- my ($sentence, $ngrams, $numTruths) = @_;
- my $factorIndex = 0;
- my @table = (); #array or arrayrefs each of which represents a line; each position has the index of the occupying n-gram, or -1 if none
- my $n = 0; #n-gram index
- foreach my $ngram (sort {$a->[0] <=> $b->[0]} @$ngrams)
- {
- #check for an open slot in an existing row
- my $foundRow = 0;
- my $r = 0;
- foreach my $row (@table)
- {
- if(rowIsClear($row, $ngram) == 1)
- {
- @{$row}[$ngram->[0] .. ($ngram->[0] + $ngram->[1] - 1)] = ($n) x $ngram->[1];
- push @$ngram, $r; #add row index
- $foundRow = 1;
- last;
- }
- $r++;
- }
- #add row if necessary
- if($foundRow == 0)
- {
- my @row = (-1) x scalar(@$sentence);
- @row[$ngram->[0] .. ($ngram->[0] + $ngram->[1] - 1)] = ($n) x $ngram->[1];
- push @$ngram, scalar(@table); #add row index
- push @table, \@row;
- }
- $n++;
- }
-
- my $html = "<table class=\"ngram_table\"><tr><td align=center>" . join("</td><td align=center>", map {$_->[$factorIndex]} @$sentence) . "</td></tr>";
-
- my $numWords = scalar(@$sentence);
- my ($curRow, $curCol) = (0, 0); #address in table
- $html .= "<tr>";
- foreach my $ngram (sort {my $c = $a->[3] <=> $b->[3]; if($c == 0) {$a->[0] <=> $b->[0]} else {$c}} @$ngrams) #sort by row, then word num
- {
- while($ngram->[0] > $curCol || $ngram->[3] > $curRow) {$html .= "<td></td>"; $curCol = ($curCol + 1) % $numWords; if($curCol == 0) {$html .= "</tr><tr>"; $curRow++;}}
- $html .= "<td colspan=" . $ngram->[1] . " align=center class=\"ngram_cell\" style=\"background: " . getNgramColorHTML(scalar(@{$ngram->[2]}), $numTruths) . "\">" . join(' ', map {$_->[$factorIndex]} @{$sentence}[$ngram->[0] .. $ngram->[0] + $ngram->[1] - 1]) . "</td>";
- $curCol = ($curCol + $ngram->[1]) % $numWords; if($curCol == 0) {$html .= "</tr><tr>"; $curRow++;}
- }
- $html .= "</tr>";
-
- return $html . "</table>\n";
-}
-
-#auxiliary to getAllNgramsHTML(): check a table row for an empty piece at the right place for the given n-gram
-#arguments: row (arrayref of ints), n-gram (arrayref of [start index, length])
-#return: whether (0/1) row is clear
-sub rowIsClear
-{
- my ($row, $ngram) = @_;
- return (maxN(@{$row}[$ngram->[0] .. $ngram->[0] + $ngram->[1] - 1]) == -1) ? 1 : 0;
-}
-
-#auxiliary to getAllNgramsHTML()
-#arguments: number of reference translations matching the n-gram, total number of references
-#return: HTML color string
-sub getNgramColorHTML
-{
- my ($matches, $total) = @_;
- if($total == 1) {return $ngramSingleRefColor;}
- return $ngramMultirefColors[round($matches / $total * (scalar(@ngramMultirefColors) - 1))];
-}
diff --git a/scripts/analysis/smtgui/Corpus.pm b/scripts/analysis/smtgui/Corpus.pm
deleted file mode 100644
index 5a2753fdf..000000000
--- a/scripts/analysis/smtgui/Corpus.pm
+++ /dev/null
@@ -1,1311 +0,0 @@
-#package Corpus: hold a bunch of sentences in any language, with translation factors and stats about individual sentences and the corpus as a whole
-#Evan Herbst, 7 / 25 / 06
-
-package Corpus;
-BEGIN
-{
- push @INC, "../perllib"; #for Error.pm
-}
-use Error;
-
-return 1;
-
-###########################################################################################################################
-
-##### 'our' variables are available outside the package #####
-#all factor names used should be in this list, just in case
-our @FACTORNAMES = ('surf', 'pos', 'lemma', 'stem', 'morph');
-
-#constructor
-#arguments: short corpus name (-name), hashref of filenames to descriptions (-descriptions), formatted string with various config info (-info_line)
-sub new
-{
- my $class = shift;
- my %args = @_; #turn the remainder of @_ into a hash
- my ($corpusName, $refFileDescs, $infoLine) = ($args{'-name'}, $args{'-descriptions'}, $args{'-info_line'});
- my ($factorList, $inputLingmodels, $outputLingmodels) = split(/\s*:\s*/, $infoLine);
- my $self = {};
- $self->{'corpusName'} = $corpusName;
- $self->{'truth'} = []; #arrayref of arrayrefs of factors
- $self->{'input'} = []; #same; also same for any system outputs that get loaded
- $self->{'tokenCount'} = {}; #sysname => number of tokens in file
- $self->{'truthFilename'} = "";
- $self->{'inputFilename'} = "";
- $self->{'sysoutFilenames'} = {}; #hashref of (string => string) for (system name, filename)
- $self->{'phraseTableFilenames'} = {}; #factor name => filename
- $self->{'fileCtimes'} = {}; #file ID of some kind => changetime in seconds
- $self->{'factorIndices'} = {}; #factor name => index
- my @factors = split(/\s+/, $factorList);
- for(my $i = 0; $i < scalar(@factors); $i++)
- {
- $self->{'factorIndices'}->{$factors[$i]} = $i;
- }
- $self->{'inputLMs'} = {}; #factor name => lingmodel filename
- $self->{'outputLMs'} = {};
- foreach my $lmInfo (split(/\s*,\s*/, $inputLingmodels))
- {
- my @tokens = split(/\s+/, $lmInfo);
- $self->{'inputLMs'}->{$tokens[0]} = $tokens[1];
- }
- foreach my $lmInfo (split(/\s*,\s*/, $outputLingmodels))
- {
- my @tokens = split(/\s+/, $lmInfo);
- $self->{'outputLMs'}->{$tokens[0]} = $tokens[1];
- }
- $self->{'phraseTables'} = {}; #factor name (from @FACTORNAMES) => hashref of source phrases to anything; used for unknown-word counting
- $self->{'unknownCount'} = {}; #factor name => count of unknown tokens in input
- $self->{'sysoutWER'} = {}; #system name => (factor name => arrayref with system output total WER and arrayref of WER scores for individual sysout sentences wrt truth)
- $self->{'sysoutPWER'} = {}; #similarly
- $self->{'nnAdjWERPWER'} = {}; #system name => arrayref of [normalized WER, normalized PWER]
- $self->{'perplexity'} = {}; #system name => (factor name => perplexity raw score)
- $self->{'fileDescriptions'} = {}; #filename associated with us => string description of file
- $self->{'bleuScores'} = {}; #system name => (factor name => arrayref of (overall score, arrayref of per-sentence scores) )
- $self->{'bleuConfidence'} = {}; #system name => (factor name => arrayrefs holding statistical test data on BLEU scores)
- $self->{'subsetBLEUstats'} = {}; #system name => (factor name => n-gram precisions and lengths for independent corpus subsets)
- $self->{'comparisonStats'} = {}; #system name 1 => (system name 2 => (factor name => p-values, and indices of better system, for all tests used))
- $self->{'cacheFilename'} = "cache/$corpusName.cache"; #all memory of various scores is stored here
- bless $self, $class;
- $self->locateFiles($refFileDescs); #find all relevant files in the current directory; set filenames and descriptions
- $self->loadCacheFile();
- return $self;
-}
-
-#arguments: filename
-#return: description string
-#throw if filename doesn't belong to this corpus
-sub getFileDescription
-{
- my ($self, $filename) = @_;
- if(!defined($self->{'fileDescriptions'}->{$filename}))
- {
- throw Error::Simple(-text => "Corpus::getFileDescription(): invalid filename '$filename'\n");
- }
- return $self->{'fileDescriptions'}->{$filename};
-}
-
-#arguments: none
-#return: list of system names (NOT including 'input', 'truth' and other special cases)
-sub getSystemNames
-{
- my $self = shift;
- return keys %{$self->{'sysoutFilenames'}};
-}
-
-#calculate the number of unknown factor values for the given factor in the input file
-#arguments: factor name
-#return: unknown factor count, total factor count (note the total doesn't depend on the factor)
-#throw if we don't have an input file or a phrase table for the given factor defined or if there's no index known for the given factor
-sub calcUnknownTokens
-{
- my ($self, $factorName) = @_;
- #check in-memory cache first
- if(defined($self->{'unknownCount'}->{$factorName}))
- {
- return ($self->{'unknownCount'}->{$factorName}, $self->{'tokenCount'}->{'input'});
- }
- warn "calcing unknown tokens\n";
-
- $self->ensureFilenameDefined('input');
- $self->ensurePhraseTableDefined($factorName);
- $self->ensureFactorPosDefined($factorName);
- $self->loadSentences('input', $self->{'inputFilename'});
- $self->loadPhraseTable($factorName);
-
- #count unknown and total words
- my ($unknownTokens, $totalTokens) = (0, 0);
- my $factorIndex = $self->{'factorIndices'}->{$factorName};
- foreach my $sentence (@{$self->{'input'}})
- {
- $totalTokens += scalar(@$sentence);
- foreach my $word (@$sentence)
- {
- if(!defined($self->{'phraseTables'}->{$factorName}->{$word->[$factorIndex]}))
- {
- $unknownTokens++;
- }
- }
- }
- $self->{'unknownCount'}->{$factorName} = $unknownTokens;
- $self->{'tokenCount'}->{'input'} = $totalTokens;
-
- return ($unknownTokens, $totalTokens);
-}
-
-#arguments: system name
-#return: (WER, PWER) for nouns and adjectives in given system wrt truth
-#throw if given system or truth is not set or if index of 'surf' or 'pos' hasn't been specified
-sub calcNounAdjWER_PWERDiff
-{
- my ($self, $sysname) = @_;
- #check in-memory cache first
- if(exists $self->{'nnAdjWERPWER'}->{$sysname})
- {
- return @{$self->{'nnAdjWERPWER'}->{$sysname}};
- }
- warn "calcing NN/JJ PWER/WER\n";
-
- $self->ensureFilenameDefined('truth');
- $self->ensureFilenameDefined($sysname);
- $self->ensureFactorPosDefined('surf');
- $self->ensureFactorPosDefined('pos');
- $self->loadSentences('truth', $self->{'truthFilename'});
- $self->loadSentences($sysname, $self->{'sysoutFilenames'}->{$sysname});
- #find nouns and adjectives and score them
- my ($werScore, $pwerScore) = (0, 0);
- my $nnNadjTags = $self->getPOSTagList('nounAndAdj');
- for(my $i = 0; $i < scalar(@{$self->{'truth'}}); $i++)
- {
- my @nnAdjEWords = $self->filterFactors($self->{'truth'}->[$i], $self->{'factorIndices'}->{'pos'}, $nnNadjTags);
- my @nnAdjSWords = $self->filterFactors($self->{$sysname}->[$i], $self->{'factorIndices'}->{'pos'}, $nnNadjTags);
- my ($sentWer, $tmp) = $self->sentenceWER(\@nnAdjSWords, \@nnAdjEWords, $self->{'factorIndices'}->{'surf'});
- $werScore += $sentWer;
- ($sentWer, $tmp) = $self->sentencePWER(\@nnAdjSWords, \@nnAdjEWords, $self->{'factorIndices'}->{'surf'});
- $pwerScore += $sentWer;
- }
-
- #unhog memory
- $self->releaseSentences('truth');
- $self->releaseSentences($sysname);
- $self->{'nnAdjWERPWER'}->{$sysname} = [$werScore / $self->{'tokenCount'}->{'truth'}, $pwerScore / $self->{'tokenCount'}->{'truth'}];
- return @{$self->{'nnAdjWERPWER'}->{$sysname}};
-}
-
-#calculate detailed WER statistics and put them into $self
-#arguments: system name, factor name to consider (default 'surf', surface form)
-#return: overall surface WER for given system (w/o filtering)
-#throw if given system or truth is not set or if index of factor name hasn't been specified
-sub calcOverallWER
-{
- my ($self, $sysname, $factorName) = (shift, shift, 'surf');
- if(scalar(@_) > 0) {$factorName = shift;}
- #check in-memory cache first
- if(defined($self->{'sysoutWER'}->{$sysname}->{$factorName}))
- {
- return $self->{'sysoutWER'}->{$sysname}->{$factorName}->[0];
- }
- warn "calcing WER\n";
-
- $self->ensureFilenameDefined('truth');
- $self->ensureFilenameDefined($sysname);
- $self->ensureFactorPosDefined($factorName);
- $self->loadSentences('truth', $self->{'truthFilename'});
- $self->loadSentences($sysname, $self->{'sysoutFilenames'}->{$sysname});
-
- my ($wer, $swers, $indices) = $self->corpusWER($self->{$sysname}, $self->{'truth'}, $self->{'factorIndices'}->{$factorName});
- $self->{'sysoutWER'}->{$sysname}->{$factorName} = [$wer, $swers, $indices]; #total; arrayref of scores for individual sentences; arrayref of arrayrefs of offending words in each sentence
-
- #unhog memory
- $self->releaseSentences('truth');
- $self->releaseSentences($sysname);
- return $self->{'sysoutWER'}->{$sysname}->{$factorName}->[0] / $self->{'tokenCount'}->{'truth'};
-}
-
-#calculate detailed PWER statistics and put them into $self
-#arguments: system name, factor name to consider (default 'surf')
-#return: overall surface PWER for given system (w/o filtering)
-#throw if given system or truth is not set or if index of factor name hasn't been specified
-sub calcOverallPWER
-{
- my ($self, $sysname, $factorName) = (shift, shift, 'surf');
- if(scalar(@_) > 0) {$factorName = shift;}
- #check in-memory cache first
- if(defined($self->{'sysoutPWER'}->{$sysname}->{$factorName}))
- {
- return $self->{'sysoutPWER'}->{$sysname}->{$factorName}->[0];
- }
- warn "calcing PWER\n";
-
- $self->ensureFilenameDefined('truth');
- $self->ensureFilenameDefined($sysname);
- $self->ensureFactorPosDefined($factorName);
- $self->loadSentences('truth', $self->{'truthFilename'});
- $self->loadSentences($sysname, $self->{'sysoutFilenames'}->{$sysname});
-
- my ($pwer, $spwers, $indices) = $self->corpusPWER($self->{$sysname}, $self->{'truth'}, $self->{'factorIndices'}->{$factorName});
- $self->{'sysoutPWER'}->{$sysname}->{$factorName} = [$pwer, $spwers, $indices]; #total; arrayref of scores for individual sentences; arrayref of arrayrefs of offending words in each sentence
-
- #unhog memory
- $self->releaseSentences('truth');
- $self->releaseSentences($sysname);
- return $self->{'sysoutPWER'}->{$sysname}->{$factorName}->[0] / $self->{'tokenCount'}->{'truth'};
-}
-
-#arguments: system name, factor name to consider (default 'surf')
-#return: BLEU score, n-gram precisions, brevity penalty
-sub calcBLEU
-{
- my ($self, $sysname, $factorName) = (shift, shift, 'surf');
- if(scalar(@_) > 0) {$factorName = shift;}
- #check in-memory cache first
- if(exists $self->{'bleuScores'}->{$sysname} && exists $self->{'bleuScores'}->{$sysname}->{$factorName})
- {
- return $self->{'bleuScores'}->{$sysname}->{$factorName};
- }
- warn "calcing BLEU\n";
-
- $self->ensureFilenameDefined('truth');
- $self->ensureFilenameDefined($sysname);
- $self->ensureFactorPosDefined($factorName);
- $self->loadSentences('truth', $self->{'truthFilename'});
- $self->loadSentences($sysname, $self->{'sysoutFilenames'}->{$sysname});
-
- #score structure: various total scores, arrayref of by-sentence score arrays
- if(!exists $self->{'bleuScores'}->{$sysname}) {$self->{'bleuScores'}->{$sysname} = {};}
- if(!exists $self->{'bleuScores'}->{$sysname}->{$factorName}) {$self->{'bleuScores'}->{$sysname}->{$factorName} = [[], []];}
-
- my ($good1, $tot1, $good2, $tot2, $good3, $tot3, $good4, $tot4, $totCLength, $totRLength) = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
- my $factorIndex = $self->{'factorIndices'}->{$factorName};
- for(my $i = 0; $i < scalar(@{$self->{'truth'}}); $i++)
- {
- my ($truthSentence, $sysoutSentence) = ($self->{'truth'}->[$i], $self->{$sysname}->[$i]);
- my ($unigood, $unicount, $bigood, $bicount, $trigood, $tricount, $quadrugood, $quadrucount, $cLength, $rLength) =
- $self->sentenceBLEU($truthSentence, $sysoutSentence, $factorIndex, 0); #last argument is whether to debug-print
- push @{$self->{'bleuScores'}->{$sysname}->{$factorName}->[1]}, [$unigood, $unicount, $bigood, $bicount, $trigood, $tricount, $quadrugood, $quadrucount, $cLength, $rLength];
- $good1 += $unigood; $tot1 += $unicount;
- $good2 += $bigood; $tot2 += $bicount;
- $good3 += $trigood; $tot3 += $tricount;
- $good4 += $quadrugood; $tot4 += $quadrucount;
- $totCLength += $cLength;
- $totRLength += $rLength;
- }
- my $brevity = ($totCLength > $totRLength || $totCLength == 0) ? 1 : exp(1 - $totRLength / $totCLength);
- my ($pct1, $pct2, $pct3, $pct4) = ($tot1 == 0 ? -1 : $good1 / $tot1, $tot2 == 0 ? -1 : $good2 / $tot2,
- $tot3 == 0 ? -1 : $good3 / $tot3, $tot4 == 0 ? -1 : $good4 / $tot4);
- my ($logsum, $logcount) = (0, 0);
- if($tot1 > 0) {$logsum += my_log($pct1); $logcount++;}
- if($tot2 > 0) {$logsum += my_log($pct2); $logcount++;}
- if($tot3 > 0) {$logsum += my_log($pct3); $logcount++;}
- if($tot4 > 0) {$logsum += my_log($pct4); $logcount++;}
- my $bleu = $brevity * exp($logsum / $logcount);
- $self->{'bleuScores'}->{$sysname}->{$factorName}->[0] = [$bleu, 100 * $pct1, 100 * $pct2, 100 * $pct3, 100 * $pct4, $brevity];
-
- #unhog memory
- $self->releaseSentences('truth');
- $self->releaseSentences($sysname);
- return @{$self->{'bleuScores'}->{$sysname}->{$factorName}->[0]};
-}
-
-#do t-tests on the whole-corpus n-gram precisions vs. the average precisions over a set number of disjoint subsets
-#arguments: system name, factor name BLEU was run on (default 'surf')
-#return: arrayref of [arrayref of p-values for overall precision vs. subset average, arrayrefs of [(lower, upper) 95% credible intervals for true overall n-gram precisions]]
-#
-#written to try to save memory
-sub statisticallyTestBLEUResults
-{
- my ($self, $sysname, $factorName) = (shift, shift, 'surf');
- if(scalar(@_) > 0) {$factorName = shift;}
- #check in-memory cache first
- if(exists $self->{'bleuConfidence'}->{$sysname} && exists $self->{'bleuConfidence'}->{$sysname}->{$factorName})
- {
- return $self->{'bleuConfidence'}->{$sysname}->{$factorName};
- }
- warn "performing consistency tests\n";
-
- my $k = 30; #HARDCODED NUMBER OF SUBSETS (WE DO k-FOLD CROSS-VALIDATION); IF YOU CHANGE THIS YOU MUST ALSO CHANGE getApproxPValue() and $criticalTStat
- my $criticalTStat = 2.045; #hardcoded value given alpha (.025 here) and degrees of freedom (= $k - 1) ########################################
- $self->ensureFilenameDefined('truth');
- $self->ensureFilenameDefined($sysname);
- $self->ensureFactorPosDefined($factorName);
-
- #ensure we have full-corpus BLEU results
- if(!exists $self->{'bleuScores'}->{$sysname}->{$factorName})
- {
- $self->calcBLEU($sysname, $factorName);
- }
- if(!exists $self->{'subsetBLEUstats'}->{$sysname}) {$self->{'subsetBLEUstats'}->{$sysname} = {};}
- if(!exists $self->{'subsetBLEUstats'}->{$sysname}->{$factorName}) {$self->{'subsetBLEUstats'}->{$sysname}->{$factorName} = [];}
-
- #calculate n-gram precisions for each small subset
- my @sentenceStats = @{$self->{'bleuScores'}->{$sysname}->{$factorName}->[1]};
- for(my $i = 0; $i < $k; $i++)
- {
- my ($good1, $tot1, $good2, $tot2, $good3, $tot3, $good4, $tot4, $sysoutLength, $truthLength) = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
- for(my $j = $i; $j < scalar(@sentenceStats); $j += $k) #subset #K consists of every Kth sentence
- {
- $good1 += $sentenceStats[$j]->[0]; $tot1 += $sentenceStats[$j]->[1];
- $good2 += $sentenceStats[$j]->[2]; $tot2 += $sentenceStats[$j]->[3];
- $good3 += $sentenceStats[$j]->[4]; $tot3 += $sentenceStats[$j]->[5];
- $good4 += $sentenceStats[$j]->[6]; $tot4 += $sentenceStats[$j]->[7];
- $sysoutLength += $sentenceStats[$j]->[8];
- $truthLength += $sentenceStats[$j]->[9];
- }
- push @{$self->{'subsetBLEUstats'}->{$sysname}->{$factorName}}, [$good1, $tot1, $good2, $tot2, $good3, $tot3, $good4, $tot4, $sysoutLength, $truthLength];
- }
- my $subsetStats = $self->{'subsetBLEUstats'}->{$sysname}->{$factorName};
- #calculate first two moments for subset scores for each n-gram precision, and t statistic
- my $fullCorpusBLEU = $self->{'bleuScores'}->{$sysname}->{$factorName}->[0]; #an arrayref
- my @means = (0) x 4;
- my @devs = (0) x 4;
- my $t = []; #t statistics for all n-gram orders
- if(!exists $self->{'bleuConfidence'}->{$sysname}) {$self->{'bleuConfidence'}->{$sysname} = {};}
- $self->{'bleuConfidence'}->{$sysname}->{$factorName} = [[], []]; #lower-bound p-values for whole corpus vs. subset average; confidence intervals for all n-gram orders
- for(my $i = 0; $i < 4; $i++) #run through n-gram orders
- {
- for(my $j = 0; $j < $k; $j++) #run through subsets
- {
- $means[$i] += $subsetStats->[$j]->[2 * $i] / $subsetStats->[$j]->[2 * $i + 1]; #matching / total n-grams
- }
- $means[$i] /= $k;
- for(my $j = 0; $j < $k; $j++) #run through subsets
- {
- $devs[$i] += ($subsetStats->[$j]->[2 * $i] / $subsetStats->[$j]->[2 * $i + 1] - $means[$i]) ** 2;
- }
- $devs[$i] = sqrt($devs[$i] / ($k - 1));
- $t->[$i] = ($fullCorpusBLEU->[$i + 1] / 100 - $means[$i]) / $devs[$i];
- push @{$self->{'bleuConfidence'}->{$sysname}->{$factorName}->[0]}, getLowerBoundPValue($t->[$i]); #p-value for overall score vs. subset average
-# warn "$i: mean " . $means[$i] . ", dev " . $devs[$i] . ", t " . $t->[$i] . ", conf " . getLowerBoundPValue($t->[$i]) . "\n";
- push @{$self->{'bleuConfidence'}->{$sysname}->{$factorName}->[1]},
- [$means[$i] - $criticalTStat * $devs[$i] / sqrt($k), $means[$i] + $criticalTStat * $devs[$i] / sqrt($k)]; #the confidence interval
- }
-
- return $self->{'bleuConfidence'}->{$sysname}->{$factorName};
-}
-
-#arguments: system name, factor name
-#return: perplexity of language model (specified in a config file) wrt given system output
-sub calcPerplexity
-{
- my ($self, $sysname, $factorName) = @_;
- #check in-memory cache first
- if(exists $self->{'perplexity'}->{$sysname} && exists $self->{'perplexity'}->{$sysname}->{$factorName})
- {
- return $self->{'perplexity'}->{$sysname}->{$factorName};
- }
- warn "calcing perplexity\n";
-
- $self->ensureFilenameDefined($sysname);
- my $sysoutFilename;
- if($sysname eq 'truth' || $sysname eq 'input') {$sysoutFilename = $self->{"${sysname}Filename"};}
- else {$sysoutFilename = $self->{'sysoutFilenames'}->{$sysname};}
- my $lmFilename;
- if($sysname eq 'input') {$lmFilename = $self->{'inputLMs'}->{$factorName};}
- else {$lmFilename = $self->{'outputLMs'}->{$factorName};}
- my $tmpfile = ".tmp" . time;
- my $cmd = "perl ./extract-factors.pl $sysoutFilename " . $self->{'factorIndices'}->{$factorName} . " > $tmpfile";
- `$cmd`; #extract just the factor we're interested in; ngram doesn't understand factored notation
- my @output = `./ngram -lm $lmFilename -ppl $tmpfile`; #run the SRI n-gram tool
- `rm $tmpfile`;
- $output[1] =~ /ppl1=\s*([0-9\.]+)/;
- $self->{'perplexity'}->{$sysname} = $1;
- return $self->{'perplexity'}->{$sysname}->{$factorName};
-}
-
-#run a paired t test and a sign test on BLEU statistics for subsets of both systems' outputs
-#arguments: system name 1, system name 2, factor name
-#return: arrayref of [arrayref of confidence levels for t test at which results differ, arrayref of index (0/1) of better system by t test,
-# arrayref of confidence levels for sign test at which results differ, arrayref of index (0/1) of better system by sign test],
-# where each inner arrayref has one element per n-gram order considered
-sub statisticallyCompareSystemResults
-{
- my ($self, $sysname1, $sysname2, $factorName) = @_;
- #check in-memory cache first
- if(exists $self->{'comparisonStats'}->{$sysname1} && exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}
- && exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName})
- {
- return $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName};
- }
- warn "comparing sysoutputs\n";
-
- $self->ensureFilenameDefined($sysname1);
- $self->ensureFilenameDefined($sysname2);
- $self->ensureFactorPosDefined($factorName);
- #make sure we have tallied results for both systems
- if(!exists $self->{'subsetBLEUstats'}->{$sysname1}->{$factorName}) {$self->statisticallyTestBLEUResults($sysname1, $factorName);}
- if(!exists $self->{'subsetBLEUstats'}->{$sysname2}->{$factorName}) {$self->statisticallyTestBLEUResults($sysname2, $factorName);}
-
- if(!exists $self->{'comparisonStats'}->{$sysname1}) {$self->{'comparisonStats'}->{$sysname1} = {};}
- if(!exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}) {$self->{'comparisonStats'}->{$sysname1}->{$sysname2} = {};}
- if(!exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}) {$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName} = [];}
- my ($tConfidences, $tWinningIndices, $signConfidences, $signWinningIndices) = ([], [], [], []);
- for(my $i = 0; $i < 4; $i++) #loop over n-gram order
- {
- #t-test stats
- my ($mean, $dev) = (0, 0); #of the difference between the first and second systems' precisions
- #sign-test stats
- my ($nPlus, $nMinus) = (0, 0);
- my $j;
- for($j = 0; $j < scalar(@{$self->{'subsetBLEUstats'}->{$sysname1}->{$factorName}}); $j++)
- {
- my ($stats1, $stats2) = ($self->{'subsetBLEUstats'}->{$sysname1}->{$factorName}->[$j], $self->{'subsetBLEUstats'}->{$sysname2}->{$factorName}->[$j]);
- my ($prec1, $prec2) = ($stats1->[2 * $i] / $stats1->[2 * $i + 1], $stats2->[2 * $i] / $stats2->[2 * $i + 1]); #n-gram precisions
- $mean += $prec1 - $prec2;
- if($prec1 > $prec2) {$nPlus++;} else {$nMinus++;}
- }
- $mean /= $j;
- for($j = 0; $j < scalar(@{$self->{'subsetBLEUstats'}->{$sysname1}->{$factorName}}); $j++)
- {
- my ($stats1, $stats2) = ($self->{'subsetBLEUstats'}->{$sysname1}->{$factorName}->[$j], $self->{'subsetBLEUstats'}->{$sysname2}->{$factorName}->[$j]);
- my ($prec1, $prec2) = ($stats1->[2 * $i] / $stats1->[2 * $i + 1], $stats2->[2 * $i] / $stats2->[2 * $i + 1]); #n-gram precisions
- $dev += ($prec1 - $prec2 - $mean) ** 2;
- }
- $dev = sqrt($dev / (($j - 1) * $j)); #need the extra j because the variance of Xbar is 1/n the variance of X
- #t test
- my $t = $mean / $dev; #this isn't the standard form; remember the difference of the means is equal to the mean of the differences
- push @$tConfidences, getUpperBoundPValue($t);
- push @$tWinningIndices, ($mean > 0) ? 0 : 1;
- #sign test
- my %binomialCoefficients; #map (n+ - n-) to a coefficient; compute on the fly!
- for(my $k = 0; $k <= $nPlus + $nMinus; $k++)
- {
- $binomialCoefficients{$k} = binCoeff($nPlus + $nMinus, $k);
- }
- my $sumCoeffs = 0;
- foreach my $coeff (values %binomialCoefficients) #get a lower bound on the probability mass inside (n+ - n-)
- {
- if($coeff > $binomialCoefficients{$nPlus}) {$sumCoeffs += $coeff;}
- }
- push @$signConfidences, $sumCoeffs;
- push @$signWinningIndices, ($nPlus > $nMinus) ? 0 : 1;
- }
- $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName} = [$tConfidences, $tWinningIndices, $signConfidences, $signWinningIndices];
- return $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName};
-}
-
-#write HTML to be displayed to compare the various versions we have of each sentence in the corpus;
-#allow to filter which versions will be displayed
-#(we don't write the whole page, just the contents of the body)
-#arguments: filehandleref to which to write, regex to filter filename extensions to be included
-#return: none
-sub writeComparisonPage
-{
- my ($self, $fh, $filter) = @_;
- my @filteredExtensions = grep($filter, ('e', 'f', keys %{$self->{'sysoutFilenames'}}));
- my %openedFiles = $self->openFiles(@filteredExtensions);
- my $id = 1; #sentence ID string
- while(my %lines = $self->readLineFromFiles(%openedFiles))
- {
- $self->printSingleSentenceComparison($fh, $id, %lines);
- $id++;
- }
- $self->closeFiles(%openedFiles);
-}
-
-##########################################################################################################
-##### INTERNAL ###################################################################################
-##########################################################################################################
-
-#destructor!
-#arguments: none
-#return: none
-sub DESTROY
-{
- my $self = shift;
- $self->writeCacheFile();
-}
-
-#write all scores in memory to disk
-#arguments: none
-#return: none
-sub writeCacheFile
-{
- my $self = shift;
- if(!open(CACHEFILE, ">" . $self->{'cacheFilename'}))
- {
- warn "Corpus::writeCacheFile(): can't open '" . $self->{'cacheFilename'} . "' for write\n";
- return;
- }
-
- #store file changetimes to disk
- print CACHEFILE "File changetimes\n";
- my $ensureCtimeIsOutput = sub
- {
- my $ext = shift;
- #check for a previously read value
- if(exists $self->{'fileCtimes'}->{$ext}) {print CACHEFILE $self->{'corpusName'} . ".$ext " . $self->{'fileCtimes'}->{$ext} . "\n";}
- else {print CACHEFILE $self->{'corpusName'} . ".$ext " . time . "\n";}
- };
- if(exists $self->{'truthFilename'}) {&$ensureCtimeIsOutput('e');}
- if(exists $self->{'inputFilename'}) {&$ensureCtimeIsOutput('f');}
- foreach my $factorName (keys %{$self->{'phraseTableFilenames'}}) {&$ensureCtimeIsOutput("pt_$factorName");}
- foreach my $sysname (keys %{$self->{'sysoutFilenames'}}) {&$ensureCtimeIsOutput($sysname);}
- #store bleu scores to disk
- print CACHEFILE "\nBLEU scores\n";
- foreach my $sysname (keys %{$self->{'bleuScores'}})
- {
- foreach my $factorName (keys %{$self->{'bleuScores'}->{$sysname}})
- {
- print CACHEFILE "$sysname $factorName " . join(' ', @{$self->{'bleuScores'}->{$sysname}->{$factorName}->[0]});
- foreach my $sentenceBLEU (@{$self->{'bleuScores'}->{$sysname}->{$factorName}->[1]})
- {
- print CACHEFILE "; " . join(' ', @$sentenceBLEU);
- }
- print CACHEFILE "\n";
- }
- }
- #store t statistics for overall BLEU score and subsets in k-fold cross-validation
- print CACHEFILE "\nBLEU statistics\n";
- foreach my $sysname (keys %{$self->{'bleuConfidence'}})
- {
- foreach my $factorName (keys %{$self->{'bleuConfidence'}->{$sysname}})
- {
- print CACHEFILE "$sysname $factorName " . join(' ', @{$self->{'bleuConfidence'}->{$sysname}->{$factorName}->[0]});
- foreach my $subsetConfidence (@{$self->{'bleuConfidence'}->{$sysname}->{$factorName}->[1]})
- {
- print CACHEFILE "; " . join(' ', @$subsetConfidence);
- }
- print CACHEFILE "\n";
- }
- }
- #store statistics comparing system outputs
- print CACHEFILE "\nStatistical comparisons\n";
- foreach my $sysname1 (keys %{$self->{'comparisonStats'}})
- {
- foreach my $sysname2 (keys %{$self->{'comparisonStats'}->{$sysname1}})
- {
- foreach my $factorName (keys %{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}})
- {
- print CACHEFILE "$sysname1 $sysname2 $factorName " . join('; ', map {join(' ', @$_)} @{$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}}) . "\n";
- }
- }
- }
- #store unknown-token counts to disk
- print CACHEFILE "\nUnknown-token counts\n";
- foreach my $factorName (keys %{$self->{'unknownCount'}})
- {
- print CACHEFILE $factorName . " " . $self->{'phraseTableFilenames'}->{$factorName} . " " . $self->{'unknownCount'}->{$factorName} . " " . $self->{'tokenCount'}->{'input'} . "\n";
- }
- #store WER, PWER to disk
- print CACHEFILE "\nWER scores\n";
- my $printWERFunc =
- sub
- {
- my $werType = shift;
- foreach my $sysname (keys %{$self->{$werType}})
- {
- foreach my $factorName (keys %{$self->{$werType}->{$sysname}})
- {
- my ($totalWER, $sentenceWERs, $errorWords) = @{$self->{$werType}->{$sysname}->{$factorName}};
- print CACHEFILE "$werType $sysname $factorName $totalWER " . join(' ', @$sentenceWERs);
- foreach my $indices (@$errorWords)
- {
- print CACHEFILE ";" . join(' ', @$indices);
- }
- print CACHEFILE "\n";
- }
- }
- };
- &$printWERFunc('sysoutWER');
- &$printWERFunc('sysoutPWER');
- #store corpus perplexities to disk
- print CACHEFILE "\nPerplexity\n";
- foreach my $sysname (keys %{$self->{'perplexity'}})
- {
- foreach my $factorName (keys %{$self->{'perplexity'}->{$sysname}})
- {
- print CACHEFILE "$sysname $factorName " . $self->{'perplexity'}->{$sysname}->{$factorName} . "\n";
- }
- }
- print "\nNN/ADJ WER/PWER\n";
- foreach my $sysname (keys %{$self->{'nnAdjWERPWER'}})
- {
- print CACHEFILE "$sysname " . join(' ', @{$self->{'nnAdjWERPWER'}->{$sysname}}) . "\n";
- }
- print "\n";
- close(CACHEFILE);
-}
-
-#load all scores present in the cache file into the appropriate fields of $self
-#arguments: none
-#return: none
-sub loadCacheFile
-{
- my $self = shift;
- if(!open(CACHEFILE, "<" . $self->{'cacheFilename'}))
- {
- warn "Corpus::loadCacheFile(): can't open '" . $self->{'cacheFilename'} . "' for read\n";
- return;
- }
- my $mode = 'none';
- while(my $line = <CACHEFILE>)
- {
- next if $line =~ /^[ \t\n\r\x0a]*$/; #anyone know why char 10 (0x0a) shows up on empty lines, at least on solaris?
- chop $line;
- #check for start of section
- if($line eq "File changetimes\n") {$mode = 'ctime';}
- elsif($line eq "BLEU scores\n") {$mode = 'bleu';}
- elsif($line eq "BLEU statistics\n") {$mode = 'bstats';}
- elsif($line eq "Statistical comparisons\n") {$mode = 'cmp';}
- elsif($line eq "Unknown-token counts\n") {$mode = 'unk';}
- elsif($line eq "WER scores") {$mode = 'wer';}
- elsif($line eq "Perplexity") {$mode = 'ppl';}
- elsif($line eq "NN/ADJ WER/PWER") {$mode = 'nawp';}
- #get data when in a mode already
- elsif($mode eq 'ctime')
- {
- local ($fileExtension, $ctime) = split(/\s+/, $line);
- $self->{'fileCtimes'}->{$fileExtension} = $ctime;
- }
- elsif($mode eq 'bleu')
- {
- local ($sysname, $factorName, $rest) = split(/\s+/, $line, 3);
- if(!$self->cacheIsCurrentForFile($sysname) || !$self->cacheIsCurrentForFile('e')) {next;}
- if(!exists $self->{'bleuScores'}->{$sysname}) {$self->{'bleuScores'}->{$sysname} = {};}
- if(!exists $self->{'bleuScores'}->{$sysname}->{$factorName}) {$self->{'bleuScores'}->{$sysname}->{$factorName} = [[], []];}
- my @stats = map {my @tmp = split(/\s+/, $_); \@tmp;} split(/;/, $rest);
- $self->{'bleuScores'}->{$sysname}->{$factorName}->[0] = shift @stats;
- $self->{'bleuScores'}->{$sysname}->{$factorName}->[1] = \@stats;
- }
- elsif($mode eq 'bstats')
- {
- local ($sysname, $factorName, $rest) = split(/\s+/, $line, 3);
- if(!$self->cacheIsCurrentForFile($sysname) || !$self->cacheIsCurrentForFile('e')) {next;}
- if(!exists $self->{'bleuConfidence'}->{$sysname}) {$self->{'bleuConfidence'}->{$sysname} = {};}
- if(!exists $self->{'bleuConfidence'}->{$sysname}->{$factorName}) {$self->{'bleuConfidence'}->{$sysname}->{$factorName} = [[], []];}
- my @stats = map {my @tmp = split(/\s+/, $_); \@tmp;} split(/;/, $rest);
- $self->{'bleuConfidence'}->{$sysname}->{$factorName}->[0] = shift @stats;
- $self->{'bleuConfidence'}->{$sysname}->{$factorName}->[1] = \@stats;
- }
- elsif($mode eq 'cmp')
- {
- local ($sysname1, $sysname2, $factorName, $rest) = split(/\s+/, $line, 4);
- if(!$self->cacheIsCurrentForFile($sysname1) || !$self->cacheIsCurrentForFile($sysname2) || !$self->cacheIsCurrentForFile('e')) {next;}
- if(!exists $self->{'comparisonStats'}->{$sysname1}) {$self->{'comparisonStats'}->{$sysname1} = {};}
- if(!exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}) {$self->{'comparisonStats'}->{$sysname1}->{$sysname2} = {};}
- if(!exists $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName}) {$self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName} = [];}
- my @stats = map {my @x = split(' ', $_); \@x} split(/;/, $rest);
- $self->{'comparisonStats'}->{$sysname1}->{$sysname2}->{$factorName} = \@stats;
- }
- elsif($mode eq 'unk')
- {
- local ($factorName, $phraseTableFilename, $unknownCount, $totalCount) = split(' ', $line);
- if(!$self->cacheIsCurrentForFile('f') || !$self->cacheIsCurrentForFile("pt_$factorName")) {next;}
- if(defined($self->{'phraseTableFilenames'}->{$factorName}) && $self->{'phraseTableFilenames'}->{$factorName} eq $phraseTableFilename)
- {
- $self->{'unknownCount'}->{$factorName} = $unknownCount;
- $self->{'totalTokens'} = $totalCount;
- }
- }
- elsif($mode eq 'wer')
- {
- local ($werType, $sysname, $factorName, $totalWER, $details) = split(/\s+/, $line, 5); #werType is 'sysoutWER' or 'sysoutPWER'
- if(!$self->cacheIsCurrentForFile($sysname) || !$self->cacheIsCurrentForFile('e')) {next;}
- $details =~ /^([^;]*);(.*)/;
- my @sentenceWERs = split(/\s+/, $1);
- if(!exists $self->{$werType}->{$sysname}) {$self->{$werType}->{$sysname} = {};}
- $self->{$werType}->{$sysname}->{$factorName} = [$totalWER, \@sentenceWERs, []];
- my @indexLists = split(/;/, $2);
- for(my $i = 0; $i < scalar(@sentenceWERs); $i++)
- {
- my @indices = grep(/\S/, split(/\s+/, $indexLists[$i])); #find all nonempty tokens
- $self->{$werType}->{$sysname}->{$factorName}->[2] = \@indices;
- }
- }
- elsif($mode eq 'ppl')
- {
- local ($sysname, $factorName, $perplexity) = split(/\s+/, $line);
- if(!exists $self->{'perplexity'}->{$sysname}) {$self->{'perplexity'}->{$sysname} = {};}
- $self->{'perplexity'}->{$sysname}->{$factorName} = $perplexity;
- }
- elsif($mode eq 'nawp')
- {
- local ($sysname, @scores) = split(/\s+/, $line);
- $self->{'nnAdjWERPWER'}->{$sysname} = \@scores;
- }
- }
- close(CACHEFILE);
-}
-
-#arguments: cache type ('bleu' | ...), system name, factor name
-#return: none
-sub flushCache
-{
- my ($self, $cacheType, $sysname, $factorName) = @_;
- if($cacheType eq 'bleu')
- {
- if(defined($self->{'bleuScores'}->{$sysname}) && defined($self->{'bleuScores'}->{$sysname}->{$factorName}))
- {
- delete $self->{'bleuScores'}->{$sysname}->{$factorName};
- }
- }
-}
-
-#arguments: file extension
-#return: whether (0/1) our cache for the given file is at least as recent as the file
-sub cacheIsCurrentForFile
-{
- my ($self, $ext) = @_;
- return 0 if(!exists $self->{'fileCtimes'}->{$ext});
- my @liveStats = stat($self->{'corpusName'} . ".$ext");
- return ($liveStats[9] <= $self->{'fileCtimes'}->{$ext}) ? 1 : 0;
-}
-
-##### utils #####
-#arguments: a, b (scalars)
-sub min
-{
- my ($a, $b) = @_;
- return ($a < $b) ? $a : $b;
-}
-#arguments: a, b (scalars)
-sub max
-{
- my ($a, $b) = @_;
- return ($a > $b) ? $a : $b;
-}
-#arguments: x
-sub my_log
-{
- return -9999999999 unless $_[0];
- return log($_[0]);
-}
-#arguments: x
-sub round
-{
- my $x = shift;
- if($x - int($x) < .5) {return int($x);}
- return int($x) + 1;
-}
-
-#return an approximation of the p-value for a given t FOR A HARDCODED NUMBER OF DEGREES OF FREEDOM
-# (IF YOU CHANGE THIS HARDCODED NUMBER YOU MUST ALSO CHANGE statisticallyTestBLEUResults() and getLowerBoundPValue() )
-#arguments: the t statistic, $t
-#return: a lower bound on the probability mass outside (beyond) +/-$t in the t distribution
-#
-#for a wonderful t-distribution calculator, see <http://math.uc.edu/~brycw/classes/148/tables.htm#t>. UC.edu is Cincinnati.
-sub getLowerBoundPValue
-{
- my $t = abs(shift);
- #encode various known p-values for ###### DOF = 29 ######
- my %t2p = #since we're comparing (hopefully) very similar values, this chart is weighted toward the low end of the t-stat
- (
- 0.0063 => .995,
- 0.0126 => .99,
- 0.0253 => .98,
- 0.0380 => .97,
- 0.0506 => .96,
- 0.0633 => .95,
- 0.0950 => .925,
- 0.127 => .9,
- 0.191 => .85,
- 0.256 => .8,
- 0.389 => .7,
- 0.530 => .6,
- 0.683 => .5,
- 0.854 => .4,
- 1.055 => .3,
- 1.311 => .2,
- 1.699 => .1
- );
- foreach my $tCmp (sort keys %t2p) {return $t2p{$tCmp} if $t <= $tCmp;}
- return 0; #loosest bound ever! groovy, man
-}
-#arguments: the t statistic, $t
-#return: an upper bound on the probability mass outside (beyond) +/-$t in the t distribution
-sub getUpperBoundPValue
-{
- my $t = abs(shift);
- #encode various known p-values for ###### DOF = 29 ######
- my %t2p =
- (
- 4.506 => .0001,
- 4.254 => .0002,
- 3.918 => .0005,
- 3.659 => .001,
- 3.396 => .002,
- 3.038 => .005,
- 2.756 => .01,
- 2.462 => .02,
- 2.045 => .05,
- 1.699 => .1,
- 1.311 => .2,
- 0.683 => .5
- );
- foreach my $tCmp (reverse sort keys %t2p) {return $t2p{$tCmp} if $t >= $tCmp;}
- return 1; #loosest bound ever!
-}
-
-#arguments: n, r
-#return: binomial coefficient for p = .5 (ie nCr * (1/2)^n)
-sub binCoeff
-{
- my ($n, $r) = @_;
- my $coeff = 1;
- for(my $i = $r + 1; $i <= $n; $i++) {$coeff *= $i; $coeff /= ($i - $r);}
- return $coeff * (.5 ** $n);
-}
-
-#throw if the given factor doesn't have an index defined
-#arguments: factor name
-#return: none
-sub ensureFactorPosDefined
-{
- my ($self, $factorName) = @_;
- if(!defined($self->{'factorIndices'}->{$factorName}))
- {
- throw Error::Simple(-text => "Corpus: no index known for factor '$factorName'\n");
- }
-}
-
-#throw if the filename field corresponding to the argument hasn't been defined
-#arguments: 'truth' | 'input' | a system name
-#return: none
-sub ensureFilenameDefined
-{
- my ($self, $sysname) = @_;
- if($sysname eq 'truth' || $sysname eq 'input')
- {
- if(!defined($self->{"${sysname}Filename"}))
- {
- throw Error::Simple(-text => "Corpus: no $sysname corpus defined\n");
- }
- }
- else
- {
- if(!defined($self->{'sysoutFilenames'}->{$sysname}))
- {
- throw Error::Simple(-text => "Corpus: no system $sysname defined\n");
- }
- }
-}
-
-#throw if there isn't a defined phrase-table filename for the given factor
-#arguments: factor name
-#return: none
-sub ensurePhraseTableDefined
-{
- my ($self, $factorName) = @_;
- if(!defined($self->{'phraseTableFilenames'}->{$factorName}))
- {
- throw Error::Simple(-text => "Corpus: no phrase table defined for factor '$factorName'\n");
- }
-}
-
-#search current directory for files with our corpus name as basename and set filename fields of $self
-#arguments: hashref of filenames to descriptions
-#return: none
-sub locateFiles
-{
- my ($self, $refDescs) = @_;
- open(DIR, "ls -x1 . |") or die "Corpus::locateFiles(): couldn't list current directory\n";
- my $corpusName = $self->{'corpusName'};
- while(my $filename = <DIR>)
- {
- chop $filename; #remove \n
- if($filename =~ /^$corpusName\.(.*)$/)
- {
- my $ext = $1;
- if($ext eq 'e') {$self->{'truthFilename'} = $filename;}
- elsif($ext eq 'f') {$self->{'inputFilename'} = $filename;}
- elsif($ext =~ /pt_(.*)/) {$self->{'phraseTableFilenames'}->{$1} = $filename;}
- else {$self->{'sysoutFilenames'}->{$ext} = $filename;}
- if(defined($refDescs->{$filename}))
- {
- $self->{'fileDescriptions'}->{$filename} = $refDescs->{$filename};
- }
- }
- }
- close(DIR);
-}
-
-#arguments: type ('truth' | 'input' | a string to represent a system output), filename
-#pre: filename exists
-#return: none
-sub loadSentences
-{
- my ($self, $sysname, $filename) = @_;
- #if the sentences are already loaded, leave them be
- if(exists $self->{$sysname} && scalar(@{$self->{$sysname}}) > 0) {return;}
-
- $self->{$sysname} = [];
- $self->{'tokenCount'}->{$sysname} = 0;
- open(INFILE, "<$filename") or die "Corpus::load(): couldn't open '$filename' for read\n";
- while(my $line = <INFILE>)
- {
- my @words = split(/\s+/, $line);
- $self->{'tokenCount'}->{$sysname} += scalar(@words);
- my $refFactors = [];
- foreach my $word (@words)
- {
- my @factors = split(/\|/, $word);
- push @$refFactors, \@factors;
- }
- push @{$self->{$sysname}}, $refFactors;
- }
- close(INFILE);
-}
-
-#free the memory used for the given corpus (but NOT any associated calculations, eg WER)
-#arguments: type ('truth' | 'input' | a string to represent a system output)
-#return: none
-sub releaseSentences
-{
-# my ($self, $sysname) = @_;
-# $self->{$sysname} = [];
-}
-
-#arguments: factor name
-#return: none
-#throw if we don't have a filename for the given phrase table
-sub loadPhraseTable
-{
- my ($self, $factorName) = @_;
- $self->ensurePhraseTableDefined($factorName);
-
- my $filename = $self->{'phraseTableFilenames'}->{$factorName};
- open(PTABLE, "<$filename") or die "couldn't open '$filename' for read\n";
- $self->{'phraseTables'}->{$factorName} = {}; #create ref to phrase table (hash of strings, for source phrases, to anything whatsoever)
- #assume the table is sorted so that duplicate source phrases will be consecutive
- while(my $line = <PTABLE>)
- {
- my @phrases = split(/\s*\|\|\|\s*/, $line, 2);
- $self->{'phraseTables'}->{$factorName}->{$phrases[0]} = 0; #just so that it's set to something
- }
- close(PTABLE);
-}
-
-#arguments: factor name
-#return: none
-sub releasePhraseTable
-{
- my ($self, $factorName) = @_;
- $self->{'phraseTables'}->{$factorName} = {};
-}
-
-#arguments: name of list ('nounAndAdj' | ...)
-#return: arrayref of strings (postags)
-sub getPOSTagList
-{
- my ($self, $listname) = @_;
- ##### assume PTB tagset #####
- if($listname eq 'nounAndAdj') {return ['NN', 'NNS', 'NNP', 'NNPS', 'JJ', 'JJR', 'JJS'];}
-# if($listname eq '') {return [];}
-}
-
-#arguments: list to be filtered (arrayref of arrayrefs of factor strings), desired factor index, arrayref of allowable values
-#return: filtered list as array of arrayrefs of factor strings
-sub filterFactors
-{
- my ($self, $refFullList, $index, $refFactorValues) = @_;
- my $valuesRegex = join("|", @$refFactorValues);
- my @filteredList = ();
- foreach my $factors (@$refFullList)
- {
- if($factors->[$index] =~ m/$valuesRegex/)
- {
- push @filteredList, $factors;
- }
- }
- return @filteredList;
-}
-
-#arguments: system output (arrayref of arrayrefs of arrayrefs of factor strings), truth (same), factor index to use
-#return: wer score, arrayref of sentence scores, arrayref of arrayrefs of indices of errorful words
-sub corpusWER
-{
- my ($self, $refSysOutput, $refTruth, $index) = @_;
- my ($totWER, $sentenceWER, $errIndices) = (0, [], []);
- for(my $i = 0; $i < scalar(@$refSysOutput); $i++)
- {
- my ($sentWER, $indices) = $self->sentenceWER($refSysOutput->[$i], $refTruth->[$i], $index);
- $totWER += $sentWER;
- push @$sentenceWER, $sentWER;
- push @$errIndices, $indices;
- }
- return ($totWER, $sentenceWER, $errIndices);
-}
-
-#arguments: system output (arrayref of arrayrefs of factor strings), truth (same), factor index to use
-#return: wer score, arrayref of arrayrefs of indices of errorful words
-sub sentenceWER
-{
- #constants: direction we came through the table
- my ($DIR_NONE, $DIR_SKIPTRUTH, $DIR_SKIPOUT, $DIR_SKIPBOTH) = (-1, 0, 1, 2); #values don't matter but must be unique
- my ($self, $refSysOutput, $refTruth, $index) = @_;
- my ($totWER, $indices) = (0, []);
- my ($sLength, $eLength) = (scalar(@$refSysOutput), scalar(@$refTruth));
- if($sLength == 0 || $eLength == 0) {return ($totWER, $indices);} #special case
-
- my @refWordsMatchIndices = (-1) x $eLength; #at what sysout-word index this truth word is first matched
- my @sysoutWordsMatchIndices = (-1) x $sLength; #at what truth-word index this sysout word is first matched
- my $table = []; #index by sysout word index, then truth word index; a cell holds max count of matching words and direction we came to get it
- #dynamic-programming time: find the path through the table with the maximum number of matching words
- for(my $i = 0; $i < $sLength; $i++)
- {
- push @$table, [];
- for(my $j = 0; $j < $eLength; $j++)
- {
- my ($maxPrev, $prevDir) = (0, $DIR_NONE);
- if($i > 0 && $table->[$i - 1]->[$j]->[0] >= $maxPrev) {$maxPrev = $table->[$i - 1]->[$j]->[0]; $prevDir = $DIR_SKIPOUT;}
- if($j > 0 && $table->[$i]->[$j - 1]->[0] >= $maxPrev) {$maxPrev = $table->[$i]->[$j - 1]->[0]; $prevDir = $DIR_SKIPTRUTH;}
- if($i > 0 && $j > 0 && $table->[$i - 1]->[$j - 1]->[0] >= $maxPrev) {$maxPrev = $table->[$i - 1]->[$j - 1]->[0]; $prevDir = $DIR_SKIPBOTH;}
- my $match = ($refSysOutput->[$i]->[$index] eq $refTruth->[$j]->[$index] && $refWordsMatchIndices[$j] == -1 && $sysoutWordsMatchIndices[$i] == -1) ? 1 : 0;
- if($match == 1) {$refWordsMatchIndices[$j] = $i; $sysoutWordsMatchIndices[$i] = $j;}
- push @{$table->[$i]}, [($match ? $maxPrev + 1 : $maxPrev), $prevDir];
- }
- }
-
- #look back along the path and get indices of non-matching words
- my @unusedSysout = (0) x $sLength; #whether each sysout word was matched--used for outputting html table
- my ($i, $j) = ($sLength - 1, $eLength - 1);
- while($i > 0) #work our way back to the first sysout word
- {
- push @{$table->[$i]->[$j]}, 0; #length is flag to highlight cell
- if($table->[$i]->[$j]->[1] == $DIR_SKIPTRUTH)
- {
- $j--;
- }
- elsif($table->[$i]->[$j]->[1] == $DIR_SKIPOUT)
- {
- if($table->[$i - 1]->[$j]->[0] == $table->[$i]->[$j]->[0]) {unshift @$indices, $i; $unusedSysout[$i] = 1;}
- $i--;
- }
- elsif($table->[$i]->[$j]->[1] == $DIR_SKIPBOTH)
- {
- if($table->[$i - 1]->[$j - 1]->[0] == $table->[$i]->[$j]->[0]) {unshift @$indices, $i; $unusedSysout[$i] = 1;}
- $i--; $j--;
- }
- }
- #we're at the first sysout word; finish up checking for matches
- while($j > 0 && $refWordsMatchIndices[$j] != 0) {push @{$table->[0]->[$j]}, 0; $j--;}
- if($j == 0 && $refWordsMatchIndices[0] != 0) {unshift @$indices, 0; $unusedSysout[0] = 1;} #no truth word was matched to the first sysout word
-
- #print some HTML to debug the WER algorithm
-# print "<table border=1><tr><td></td><td>" . join("</td><td>", map {() . $_->[$index]} @$refTruth) . "</td></tr>";
-# for(my $i = 0; $i < $sLength; $i++)
-# {
-# print "<tr><td" . (($unusedSysout[$i] == 1) ? " style=\"background-color: #ffdd88\">" : ">") . $refSysOutput->[$i]->[$index] . "</td>";
-# for(my $j = 0; $j < $eLength; $j++)
-# {
-# print "<td";
-# if(scalar(@{$table->[$i]->[$j]}) > 2) {print " style=\"color: yellow; background-color: #000080\"";}
-# my $arrow;
-# if($table->[$i]->[$j]->[1] == $DIR_NONE) {$arrow = "&times;";}
-# elsif($table->[$i]->[$j]->[1] == $DIR_SKIPTRUTH) {$arrow = "&larr;";}
-# elsif($table->[$i]->[$j]->[1] == $DIR_SKIPOUT) {$arrow = "&uarr;";}
-# elsif($table->[$i]->[$j]->[1] == $DIR_SKIPBOTH) {$arrow = "&loz;";}
-# print ">" . $table->[$i]->[$j]->[0] . " " . $arrow . "</td>";
-# }
-# print "</tr>";
-# }
-# print "</table>";
-
- my $matchCount = 0;
- if($sLength > 0) {$matchCount = $table->[$sLength - 1]->[$eLength - 1]->[0];}
- return ($sLength - $matchCount, $indices);
-}
-
-#arguments: system output (arrayref of arrayrefs of arrayrefs of factor strings), truth (same), factor index to use
-#return: wer score, arrayref of sentence scores, arrayref of arrayrefs of indices of errorful words
-sub corpusPWER
-{
- my ($self, $refSysOutput, $refTruth, $index) = @_;
- my ($totWER, $sentenceWER, $errIndices) = (0, [], []);
- for(my $i = 0; $i < scalar(@$refSysOutput); $i++)
- {
- my ($sentWER, $indices) = $self->sentencePWER($refSysOutput->[$i], $refTruth->[$i], $index);
- $totWER += $sentWER;
- push @$sentenceWER, $sentWER;
- push @$errIndices, $indices;
- }
- return ($totWER, $sentenceWER, $errIndices);
-}
-
-#arguments: system output (arrayref of arrayrefs of factor strings), truth (same), factor index to use
-#return: wer score, arrayref of arrayrefs of indices of errorful words
-sub sentencePWER
-{
- my ($self, $refSysOutput, $refTruth, $index) = @_;
- my ($totWER, $indices) = (0, []);
- my ($sLength, $eLength) = (scalar(@$refSysOutput), scalar(@$refTruth));
- my @truthWordUsed = (0) x $eLength; #array of 0/1; can only match a given truth word once
- for(my $j = 0; $j < $sLength; $j++)
- {
- my $found = 0;
- for(my $k = 0; $k < $eLength; $k++) #check output word against entire truth sentence
- {
- if(lc $refSysOutput->[$j]->[$index] eq lc $refTruth->[$k]->[$index] && $truthWordUsed[$k] == 0)
- {
- $truthWordUsed[$k] = 1;
- $found = 1;
- last;
- }
- }
- if($found == 0)
- {
- $totWER++;
- push @$indices, $j;
- }
- }
- return ($totWER, $indices);
-}
-
-#BLEU calculation for a single sentence
-#arguments: truth sentence (arrayref of arrayrefs of factor strings), sysout sentence (same), factor index to use
-#return: 1- through 4-gram matching and total counts (1-g match, 1-g tot, 2-g match...), candidate length, reference length
-sub sentenceBLEU
-{
- my ($self, $refTruth, $refSysOutput, $factorIndex, $debug) = @_;
- my ($length_reference, $length_translation) = (scalar(@$refTruth), scalar(@$refSysOutput));
- my ($correct1, $correct2, $correct3, $correct4, $total1, $total2, $total3, $total4) = (0, 0, 0, 0, 0, 0, 0, 0);
- my %REF_GRAM = ();
- my ($i, $gram);
- for($i = 0; $i < $length_reference; $i++)
- {
- $gram = $refTruth->[$i]->[$factorIndex];
- $REF_GRAM{$gram}++;
- next if $i<1;
- $gram = $refTruth->[$i - 1]->[$factorIndex] ." ".$gram;
- $REF_GRAM{$gram}++;
- next if $i<2;
- $gram = $refTruth->[$i - 2]->[$factorIndex] ." ".$gram;
- $REF_GRAM{$gram}++;
- next if $i<3;
- $gram = $refTruth->[$i - 3]->[$factorIndex] ." ".$gram;
- $REF_GRAM{$gram}++;
- }
- for($i = 0; $i < $length_translation; $i++)
- {
- $gram = $refSysOutput->[$i]->[$factorIndex];
- if (defined($REF_GRAM{$gram}) && $REF_GRAM{$gram} > 0) {
- $REF_GRAM{$gram}--;
- $correct1++;
- }
- next if $i<1;
- $gram = $refSysOutput->[$i - 1]->[$factorIndex] ." ".$gram;
- if (defined($REF_GRAM{$gram}) && $REF_GRAM{$gram} > 0) {
- $REF_GRAM{$gram}--;
- $correct2++;
- }
- next if $i<2;
- $gram = $refSysOutput->[$i - 2]->[$factorIndex] ." ".$gram;
- if (defined($REF_GRAM{$gram}) && $REF_GRAM{$gram} > 0) {
- $REF_GRAM{$gram}--;
- $correct3++;
- }
- next if $i<3;
- $gram = $refSysOutput->[$i - 3]->[$factorIndex] ." ".$gram;
- if (defined($REF_GRAM{$gram}) && $REF_GRAM{$gram} > 0) {
- $REF_GRAM{$gram}--;
- $correct4++;
- }
- }
- my $total = $length_translation;
- $total1 = max(1, $total);
- $total2 = max(1, $total - 1);
- $total3 = max(1, $total - 2);
- $total4 = max(1, $total - 3);
-
- return ($correct1, $total1, $correct2, $total2, $correct3, $total3, $correct4, $total4, $length_translation, $length_reference);
-}
-
-##### filesystem #####
-
-#open as many given files as possible; only warn about the rest
-#arguments: list of filename extensions to open (assume corpus name is file title)
-#return: hash from type string to filehandleref, giving all files that were successfully opened
-sub openFiles
-{
- my ($self, @extensions) = @_;
- my %openedFiles = ();
- foreach my $ext (@extensions)
- {
- if(!open(FILE, "<" . $self->{'corpusName'} . $ext))
- {
- warn "Corpus::openFiles(): couldn't open '" . $self->{'corpusName'} . $ext . "' for read\n";
- }
- else #success
- {
- $openedFiles{$ext} = \*FILE;
- }
- }
- return %openedFiles;
-}
-
-#read one line from each given file
-#arguments: hash from type string to filehandleref
-#return: hash from type string to sentence (stored as arrayref of arrayrefs of factors) read from corresponding file
-sub readLineFromFiles
-{
- my ($self, %openedFiles) = @_;
- my %lines;
- foreach my $type (keys %openedFiles)
- {
- $lines{$type} = [];
- my $sentence = <$openedFiles{$type}>;
- my @words = split(/\s+/, $sentence);
- foreach my $word (@words)
- {
- my @factors = split(/\|/, $word);
- push @{$lines{$type}}, \@factors;
- }
- }
- return %lines;
-}
-
-#close all given files
-#arguments: hash from type string to filehandleref
-#return: none
-sub closeFiles
-{
- my ($self, %openedFiles) = @_;
- foreach my $type (keys %openedFiles)
- {
- close($openedFiles{$type});
- }
-}
-
-##### write HTML #####
-
-#print HTML for comparing various versions of a sentence, with special processing for each version as appropriate
-#arguments: filehandleref to which to write, sentence ID string, hashref of version string to sentence (stored as arrayref of arrayref of factor strings)
-#return: none
-sub printSingleSentenceComparison
-{
- my ($self, $fh, $sentID, $sentences) = @_;
- my $curFH = select;
- select $fh;
- #javascript to reorder rows to look nice afterward
- print "<script type=\"text/javascript\">
- function reorder_$sentID()
- {/*
- var table = document.getElementById('div_$sentID').firstChild;
- var refTransRow = table.getElementById('row_e');
- var inputRow = table.getElementById('row_f');
- table.removeRow(refTransRow);
- table.removeRow(inputRow);
- var newRow1 = table.insertRow(0);
- var newRow2 = table.insertRow(1);
- newRow1.childNodes = inputRow.childNodes;
- newRow2.childNodes = refTransRow.childNodes;*/
- }
- </script>";
- #html for sentences
- print "<div id=\"div_$sentID\" style=\"padding: 3px; margin: 5px\">";
- print "<table border=\"1\">";
-# my $rowCount = 0;
-# my @bgColors = ("#ffefbf", "#ffdf7f");
- #process all rows in order
- foreach my $sentType (keys %$sentences)
- {
- my $bgcolor = $bgColors[$rowCount % 2];
- print "<tr id=\"row_$sentType\"><td align=right>";
- #description of sentence
- if(defined($self->{'fileDescriptions'}->{$self->{'corpusName'} . $sentType}))
- {
- print "(" . $self->{'fileDescriptions'}->{$self->{'corpusName'} . $sentType} . ")";
- }
- else
- {
- print "($sentType)";
- }
- print "</td><td align=left>";
- #sentence with markup
- if($sentType eq 'f') #input
- {
-# $self->writeHTMLSentenceWithFactors($fh, $sentences->{$sentType}, $inputColor);
- }
- elsif($sentType eq 'e') #reference translation
- {
-# $self->writeHTMLSentenceWithFactors($fh, $sentences->{$sentType}, $reftransColor);
- }
- else #system output
- {
-# $self->writeHTMLTranslationHighlightedWithFactors($fh, $sentences->{$sentType}, $sentences->{'e'}, $highlightColors);
- }
- print "</td></tr>";
-# $rowCount++;
- }
- print "</table>";
- print "</div>\n";
- select $curFH;
-}
diff --git a/scripts/analysis/smtgui/README b/scripts/analysis/smtgui/README
deleted file mode 100644
index e6bcabb2e..000000000
--- a/scripts/analysis/smtgui/README
+++ /dev/null
@@ -1,42 +0,0 @@
-Readme for SMTGUI
-Philipp Koehn, Evan Herbst
-7 / 31 / 06
------------------------------------
-
-SMTGUI is Philipp's and my code to analyze a decoder's output (the decoder doesn't have to be moses, but most of SMTGUI's features relate to factors, so it probably will be). You can view a list of available corpora by running <newsmtgui.cgi?ACTION=> on any web server. When you're viewing a corpus, click the checkboxes and Compare to see sentences from various sources on one screen. Currently they're in an annoying format; feel free to make the display nicer and more useful. There are per-sentence stats stored in a Corpus object; they just aren't used yet. See compare2() in newsmtgui and Corpus::printSingleSentenceComparison() for a start to better display code. For now it's mostly the view-corpus screen that's useful.
-
-newsmtgui.cgi is the main program. Corpus.pm is my module; Error.pm is a standard part of Perl but appears to not always be distributed. The accompanying version is Error.pm v1.15.
-
-The program requires file 'file-factors', which gives the list of factors included in each corpus (see the example file for details). Only corpi included in 'file-factors' are displayed. The file 'file-descriptions' is optional and associates a descriptive string with each included filename. These are used only for display. Again an example is provided.
-
-For the corpus with name CORPUS, there should be present the files:
-- CORPUS.f, the foreign input
-- CORPUS.e, the truth (aka reference translation)
-- CORPUS.SYSTEM_TRANSLATION for each system to be analyzed
-- CORPUS.pt_FACTORNAME for each factor that requires a phrase table (these are currently used only to count unknown source words)
-
-The .f, .e and system-output files should have the usual pipe-delimited format, one sentence per line. Phrase tables should also have standard three-pipe format.
-
-A list of standard factor names is available in @Corpus::FACTORNAMES. Feel free to add, but woe betide you if you muck with 'surf', 'pos' and 'lemma'; those are hardcoded all over the place.
-
-Currently the program assumes you've included factors 'surf', 'pos' and 'lemma', in whatever order; if not you'll want to edit view_corpus() in newsmtgui.cgi to not automatically display all info. To get English POS tags and lemmas from a words-only corpus and put together factors into one file:
-
-$ $BIN/tag-english < CORPUS.lc > CORPUS.pos-tmp (call Brill)
-$ $BIN/morph < CORPUS.pos-tmp > CORPUS.morph
-$ $DATA/test/factor-stem.en.perl < CORPUS.morph > CORPUS.lemma
-$ cat CORPUS.pos-tmp | perl -n -e 's/_/\|/g; print;' > CORPUS.lc+pos (replace _ with |)
-$ $DATA/test/combine-features.perl CORPUS lc+pos lemma > CORPUS.lc+pos+lemma
-$ rm CORPUS.pos-tmp (cleanup)
-
-where $BIN=/export/ws06osmt/bin, $DATA=/export/ws06osmt/data.
-
-To get German POS tags and lemmas from a words-only corpus (the first step must be run on linux):
-
-$ $BIN/recase.perl --in CORPUS.lc --model $MODELS/en-de/recaser/pharaoh.ini > CORPUS.recased (call pharaoh with a lowercase->uppercase model)
-$ $BIN/run-lopar-tagger-lowercase.perl CORPUS.recased CORPUS.recased.lopar (call LOPAR)
-$ $DATA/test/factor-stem.de.perl < CORPUS.recased.lopar > CORPUS.stem
-$ $BIN/lowercase.latin1.perl < CORPUS.stem > CORPUS.lcstem (as you might guess, assumes latin-1 encoding)
-$ $DATA/test/factor-pos.de.perl < CORPUS.recased.lopar > CORPUS.pos
-$ $DATA/test/combine-features.perl CORPUS lc pos lcstem > CORPUS.lc+pos+lcstem
-
-where $MODELS=/export/ws06osmt/models.
diff --git a/scripts/analysis/smtgui/file-descriptions b/scripts/analysis/smtgui/file-descriptions
deleted file mode 100644
index 5c1f9153f..000000000
--- a/scripts/analysis/smtgui/file-descriptions
+++ /dev/null
@@ -1,4 +0,0 @@
-devtest2006.de-en.matrix05-baseline.pharaoh Pharaoh JHUWS baseline run
-devtest2006.de-en.matrix05-baseline.moses-2006-07-20 Moses baseline run
-devtest2006.en-de.matrix05-baseline.pharaoh Pharaoh JHUWS baseline run
-devtest2006.en-de.matrix05-moses.2006-08-02 Moses baseline run
diff --git a/scripts/analysis/smtgui/file-factors b/scripts/analysis/smtgui/file-factors
deleted file mode 100644
index 7938e9297..000000000
--- a/scripts/analysis/smtgui/file-factors
+++ /dev/null
@@ -1,8 +0,0 @@
-#corpus name : list of factors in corpus : [input] factor LMfilename, factor LMfilename, ... : [output] factor LMfilename, factor LMfilename, ...
-#(the given factors should be present in all files for the given corpus)
-devtest2006.de-en : surf pos lemma : surf europarl.de.srilm.gz : surf europarl.en.srilm.gz
-devtest2006.en-de : surf pos lemma : surf europarl.en.srilm.gz : surf europarl.de.srilm.gz
-#pstem: lemmas come from the Porter stemmer (and so are really a mix of stems and lemmas)
-pstem_devtest2006.de-en : surf pos lemma : : surf europarl.en.srilm.gz
-#replace esset with ss in German text
-ss_devtest2006.en-de : surf pos lemma : surf europarl.en.srilm.gz : surf ss_europarl.de.srilm.gz
diff --git a/scripts/analysis/smtgui/filter-phrase-table.pl b/scripts/analysis/smtgui/filter-phrase-table.pl
deleted file mode 100644
index a7e998794..000000000
--- a/scripts/analysis/smtgui/filter-phrase-table.pl
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/perl -w
-
-#by Philipp Koehn, de-augmented by Evan Herbst
-#filter a phrase table for a specific input corpus
-#arguments: phrasetable_filename input_filename factor_index (0...)
-#outputs to phrasetable_filename.short
-
-#similar function to filter-model-given-input.pl, but only operates
-#on the phrase table and doesn't require that any subdirectories exist
-
-use strict;
-
-my $MAX_LENGTH = 10;
-
-my ($file, $input, $source_factor) = @ARGV;
-my $dir = ".";
-
- # get tables to be filtered (and modify config file)
- my (@TABLE,@TABLE_FACTORS,@TABLE_NEW_NAME,%CONSIDER_FACTORS);
- push @TABLE,$file;
-
- my $new_name = "$file.short";#"$dir/phrase-table.$source_factor";
- push @TABLE_NEW_NAME,$new_name;
-
- $CONSIDER_FACTORS{$source_factor} = 1;
- push @TABLE_FACTORS,$source_factor;
-
- # get the phrase pairs appearing in the input text
- my %PHRASE_USED;
- die("could not find input file $input") unless -e $input;
- open(INPUT,$input);
- while(my $line = <INPUT>) {
- chop($line);
- my @WORD = split(/ +/,$line);
- for(my $i=0;$i<=$#WORD;$i++) {
- for(my $j=0;$j<$MAX_LENGTH && $j+$i<=$#WORD;$j++) {
- foreach (keys %CONSIDER_FACTORS) {
- my @FACTOR = split(/,/);
- my $phrase = "";
- for(my $k=$i;$k<=$i+$j;$k++) {
- my @WORD_FACTOR = split(/\|/,$WORD[$k]);
- for(my $f=0;$f<=$#FACTOR;$f++) {
- $phrase .= $WORD_FACTOR[$FACTOR[$f]]."|";
- }
- chop($phrase);
- $phrase .= " ";
- }
- chop($phrase);
- $PHRASE_USED{$_}{$phrase}++;
- }
- }
- }
- }
- close(INPUT);
-
- # filter files
- for(my $i=0;$i<=$#TABLE;$i++) {
- my ($used,$total) = (0,0);
- my $file = $TABLE[$i];
- my $factors = $TABLE_FACTORS[$i];
- my $new_file = $TABLE_NEW_NAME[$i];
- print STDERR "filtering $file -> $new_file...\n";
-
- if (-e $file && $file =~ /\.gz$/) { open(FILE,"zcat $file |"); }
- elsif (! -e $file && -e "$file.gz") { open(FILE,"zcat $file.gz|"); }
- elsif (-e $file) { open(FILE,$file); }
- else { die("could not find model file $file"); }
-
- open(FILE_OUT,">$new_file");
-
- while(my $entry = <FILE>) {
- my ($foreign,$rest) = split(/ \|\|\| /,$entry,2);
- $foreign =~ s/ $//;
- if (defined($PHRASE_USED{$factors}{$foreign})) {
- print FILE_OUT $entry;
- $used++;
- }
- $total++;
- }
- close(FILE);
- close(FILE_OUT);
- printf STDERR "$used of $total phrases pairs used (%.2f%s) - note: max length $MAX_LENGTH\n",(100*$used/$total),'%';
- }
diff --git a/scripts/analysis/smtgui/newsmtgui.cgi b/scripts/analysis/smtgui/newsmtgui.cgi
deleted file mode 100755
index a31ac558e..000000000
--- a/scripts/analysis/smtgui/newsmtgui.cgi
+++ /dev/null
@@ -1,996 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-
-use CGI;
-use Corpus; #Evan's code
-use Error qw(:try);
-
-#files with extensions other than these are interpreted as system translations; see the file 'file-descriptions', if it exists, for the comments that go with them
-my %FILETYPE = ('e' => 'Reference Translation',
- 'f' => 'Foreign Original',
- 'ref.sgm' => 'Reference Translations',
- 'e.sgm' => 'Reference Translations',
- 'src.sgm' => 'Foreign Originals',
- 'f.sgm' => 'Foreign Originals');
-my %DONTSCORE = ('f' => 1, 'f.sgm' => 1, 'src.sgm' => 1,
- 'e' => 1, 'e.sgm' => 1, 'ref.sgm' => 1);
-my @SHOW = ('f', 'e', 'comm');
-my %SHOW_COLOR = ('f' => "BLUE",
- 'e' => "GREEN");
-my $FOREIGN = 'f';
-
-#FILEDESC: textual descriptions associated with specific filenames; to be displayed on the single-corpus view
-my %FILEDESC = (); &load_descriptions();
-my %factorData = loadFactorData('file-factors');
-my %MEMORY; &load_memory();
-my (@mBLEU,@NIST);
-@mBLEU=`cat mbleu-memory.dat` if -e "mbleu-memory.dat"; chop(@mBLEU);
-@NIST = `cat nist-memory.dat` if -e "nist-memory.dat"; chop(@NIST);
-my %in; &ReadParse(); #parse arguments
-
-if (scalar(@ARGV) > 0 && $ARGV[0] eq 'bleu') {
- $in{CORPUS} = $ARGV[1];
- $in{ACTION} = "VIEW_CORPUS";
-}
-
-my %MULTI_REF;
-if ($in{CORPUS} && -e "$in{CORPUS}.ref.sgm") {
- my $sysid;
- open(REF,"$in{CORPUS}.ref.sgm");
- while(<REF>) {
- $sysid = $1 if /<DOC.+sysid=\"([^\"]+)\"/;
- if (/<seg[^>]*> *(\S.+\S) *<\/seg>/) {
- push @{$MULTI_REF{$sysid}}, $1;
- }
- }
- close(REF);
-}
-
-if ($in{ACTION} eq '') { &show_corpora(); }
-elsif ($in{ACTION} eq 'VIEW_CORPUS') { &view_corpus(); }
-elsif ($in{ACTION} eq 'SCORE_FILE') { &score_file(); }
-elsif ($in{ACTION} eq 'RESCORE_FILE') { &score_file(); }
-elsif ($in{ACTION} eq 'COMPARE') { &compare(); }
-else { &htmlhead("Unknown Action $in{ACTION}"); }
-print "</BODY></HTML>\n";
-
-###### SHOW CORPORA IN EVALUATION DIRECTORY
-
-sub show_corpora {
- my %CORPUS = ();
-
- # find corpora in evaluation directory: see the factor-index file, which was already read in
- foreach my $corpusName (keys %factorData)
- {
- $CORPUS{$corpusName} = 1;
- }
-
- # list corpora
- &htmlhead("All Corpora");
- print "<UL>\n";
- foreach (sort (keys %CORPUS)) {
- print "<LI><A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($_)."\">Corpus $_</A>\n";
- }
- print "</UL>\n";
-}
-
-###### SHOW INFORMATION FOR ONE CORPUS
-
-sub view_corpus {
- my @TABLE;
- &htmlhead("View Corpus $in{CORPUS}");
-
- # find corpora in evaluation directory
- my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-info_line' => $factorData{$in{CORPUS}});
-
- my ($sentence_count, $lineInfo);
- if(-e "$in{CORPUS}.f")
- {
- $lineInfo = `wc -l $in{CORPUS}.f`;
- $lineInfo =~ /^\s*(\d+)\s+/;
- $sentence_count = 0 + $1;
- }
- else
- {
- $lineInfo = `wc -l $in{CORPUS}.e`;
- $lineInfo =~ /^\s*(\d+)\s+/;
- $sentence_count = 0 + $1;
- }
-
- print "Corpus '$in{CORPUS}' consists of $sentence_count sentences\n";
- print "(<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&mBLEU=1>with mBLEU</A>)" if ((!defined($in{mBLEU})) && (scalar keys %MEMORY) && -e "$in{CORPUS}.e" && -e "$in{CORPUS}.f");
- print "<P>\n";
- print "<FORM ACTION=''>\n";
- print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=COMPARE>\n";
- print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
- print "<TABLE BORDER=1 CELLSPACING=0><TR>
-<TD>File (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS}).">sort</A>)</TD>
-<TD>Date (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=TIME>sort</A>)</TD>";
- if (-e "$in{CORPUS}.e") {
- print "<TD>IBM BLEU (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=IBM>sort</A>)</TD>";
- }
- if (-e "$in{CORPUS}.ref.sgm" && -e "$in{CORPUS}.src.sgm") {
- print "<TD>NIST (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=NIST>sort</A>)</TD>";
- if (! -e "$in{CORPUS}.e") {
- print "<TD>BLEU (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=BLEU>sort</A>)</TD>";
- }
- }
- if ($in{mBLEU} && (scalar keys %MEMORY) && -e "$in{CORPUS}.e" && -e "$in{CORPUS}.f") {
- print "<TD>mBLEU (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=mBLEU>sort</A>)</TD>";
- }
- print "<TD>Unknown Words</TD>"; #can't sort on; only applies to the input
- print "<TD>Perplexity</TD>"; #applies to truth and system outputs
- print "<TD>WER (<A HREF=?ACTION=VIEW_CORPUS&CORPUS=" . CGI::escape($in{CORPUS})."&SORT=WER>sort</A>)</TD>";
- print "<TD>Noun & adj WER-PWER</TD>"; #can't sort on; only applies to sysoutputs
- print "<TD>Surface vs. lemma PWER</TD>"; #can't sort on; only applies to sysoutputs
- print "<TD>Statistical Measures</TD>";
-
- open(DIR,"ls $in{CORPUS}.*|");
- while(<DIR>) {
- my $sort = "";
- chop;
- my $sgm = 0;
- if (/.sgm$/)
- {
- `grep '<seg' $_ | wc -l` =~ /^\s*(\d+)\s+/;
- next unless $1 == $sentence_count;
- $sgm = 1;
- }
- else
- {
- `wc -l $_` =~ /^\s*(\d+)\s+/;
- next unless $1 == $sentence_count;
- }
- /^$in{CORPUS}.([^\/]+)$/;
- my $file = $1;
- # checkbox for compare
- my $row = "<TR><TD style=\"font-size: small\"><INPUT TYPE=CHECKBOX NAME=FILE_$file VALUE=1>";
- # README
- if (-e "$in{CORPUS}.$file.README") {
- my $readme = `cat $in{CORPUS}.$file.README`;
- $readme =~ s/([\"\'])/\\\"/g;
- $readme =~ s/[\n\r]/\\n/g;
- $readme =~ s/\t/\\t/g;
- $row .= "<A HREF='javascript:FieldInfo(\"$in{CORPUS}.$file\",\"$readme\")'>";
- }
- # filename
- $row .= "$file</A>";
- # description (hard-coded)
- my @TRANSLATION_SENTENCE = `cat $in{CORPUS}.$file`;
- chop(@TRANSLATION_SENTENCE);
-
- #count sentences that contain null words
- my $null_count = 0;
- foreach (@TRANSLATION_SENTENCE)
- {
- $null_count++ if /^NULL$/ || /^NONE$/;
- }
- if ($null_count > 0) {
- $row .= "$null_count NULL ";
- }
-
- $row .= " (".$FILETYPE{$file}.")" if defined($FILETYPE{$file});
- $row .= " (".$FILEDESC{$in{CORPUS}.".".$file}.")" if defined($FILEDESC{$in{CORPUS}.".".$file});
- $row .= " (".$FILEDESC{$file}.")" if defined($FILEDESC{$file});
- # filedate
- my @STAT = stat("$in{CORPUS}.$file");
- my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($STAT[8]); #STAT[8] should be last modify time
- my $time = sprintf("%04d-%02d-%02d %02d:%02d:%02d",$year+1900,$mon+1,$mday,$hour,$min,$sec);
- $row .= "</TD>\n<TD>".$time."</TD>\n";
- if (defined($in{SORT}) && $in{SORT} eq 'TIME') { $sort = $time; }
- # IBM BLEU score
- my $no_bleu =0;
- if (!$sgm && -e "$in{CORPUS}.e") {
- $row .= "<TD>";
- if (!defined($DONTSCORE{$file}) && $file !~ /^f$/ && $file ne "e" && $file !~ /^pt/) {
- my ($score,$p1,$p2,$p3,$p4,$bp) = $corpus->calcBLEU($file, 'surf');
- $row .= sprintf("<B>%.04f</B> %.01f/%.01f/%.01f/%.01f *%.03f", $score, $p1, $p2, $p3, $p4, $bp);
- if (defined($in{SORT}) && $in{SORT} eq 'IBM') { $sort = $score; }
- }
- $row .= "</TD>\n";
- }
- else {
- $no_bleu=1;
- }
- # NIST score
- if (-e "$in{CORPUS}.ref.sgm" && -e "$in{CORPUS}.src.sgm"
- && !$DONTSCORE{$file}) {
- $row .= "<TD>";
- print "$DONTSCORE{$file}+";
- my ($nist,$nist_bleu);
- if ($file =~ /sgm$/) {
- ($nist,$nist_bleu) = &get_nist_score("$in{CORPUS}.ref.sgm","$in{CORPUS}.src.sgm","$in{CORPUS}.$file");
- $row .= sprintf("<B>%.04f</B>",$nist);
- if ($in{SORT} eq 'NIST') { $sort = $nist; }
- }
- $row .= "</TD>\n";
- if ($no_bleu) {
- $row .= "<TD>";
- if ($file =~ /sgm$/) {
- $row .= sprintf("<B>%.04f</B>",$nist_bleu);
- if ($in{SORT} eq 'BLEU') { $sort = $nist_bleu; }
- }
- $row .= "</TD>\n";
- }
- }
- # multi-bleu
- if ($in{mBLEU} && (scalar keys %MEMORY) && -e "$in{CORPUS}.e") {
- $row .= "<TD>";
- if (!defined($DONTSCORE{$file}) && $file !~ /^f$/ && $file ne "e") {
- my ($score,$p1,$p2,$p3,$p4,$bp) = &get_multi_bleu_score("$in{CORPUS}.f","$in{CORPUS}.e","$in{CORPUS}.$file");
- $row .= sprintf("<B>%.04f</B> %.01f/%.01f/%.01f/%.01f *%.03f",$score,$p1,$p2,$p3,$p4,$bp);
- if ($in{SORT} eq 'mBLEU') { $sort = $score; }
- }
- $row .= "</TD>\n";
- }
-
- my $isSystemOutput = ($file ne 'e' && $file ne 'f' && $file !~ /^pt/);
- # misc stats (note the unknown words should come first so the total word count is available for WER)
- $row .= "<TD align=\"center\">";
- if($file eq 'f') #input
- {
- try
- {
- my ($unknownCount, $totalCount) = calc_unknown_words($corpus, 'surf');
- $row .= sprintf("%.4lf (%d / %d)", $unknownCount / $totalCount, $unknownCount, $totalCount);
- }
- catch Error::Simple with {$row .= "[system error]";};
- }
- $row .= "</TD>\n<TD align=\"center\">";
- if($file eq 'e' || $file eq 'f' || $isSystemOutput)
- {
- try
- {
- my $perplexity = $corpus->calcPerplexity(($file eq 'e') ? 'truth' : (($file eq 'f') ? 'input' : $file), 'surf');
- $row .= sprintf("%.2lf", $perplexity);
- }
- catch Error::Simple with {$row .= "[system error]";}
- }
- $row .= "</TD>\n<TD align=\"center\">";
- if($isSystemOutput)
- {
- try
- {
- my $surfaceWER = $corpus->calcOverallWER($file);
- $row .= sprintf("%.4lf", $surfaceWER);
- }
- catch Error::Simple with {$row .= "[system error]";};
- }
- $row .= "</TD>\n<TD align=\"center\">";
- my ($nnAdjWER, $nnAdjPWER, $surfPWER, $lemmaPWER);
- if($isSystemOutput)
- {
- try
- {
- ($nnAdjWER, $nnAdjPWER, $surfPWER, $lemmaPWER) = calc_misc_stats($corpus, $file);
- $row .= sprintf("WER = %.4lg<br>PWER = %.4lg<br><b>ratio = %.3lf</b>", $nnAdjWER, $nnAdjPWER, $nnAdjPWER / $nnAdjWER);
- }
- catch Error::Simple with {$row .= "[system error]";};
- }
- $row .= "</TD>\n<TD align=\"center\">";
- if($isSystemOutput)
- {
- if($surfPWER == -1)
- {
- $row .= "[system error]";
- }
- else
- {
- my ($lemmaBLEU, $p1, $p2, $p3, $p4, $brevity) = $corpus->calcBLEU($file, 'lemma');
- $row .= sprintf("surface = %.3lf<br>lemma = %.3lf<br><b>lemma BLEU = %.04f</b> %.01f/%.01f/%.01f/%.01f *%.03f",
- $surfPWER, $lemmaPWER, $lemmaBLEU, $p1, $p2, $p3, $p4, $brevity);
- }
- }
- $row .= "</TD>\n<TD align=\"center\">";
- if($isSystemOutput)
- {
- try
- {
- my $testInfo = $corpus->statisticallyTestBLEUResults($file, 'surf');
- my @tTestPValues = @{$testInfo->[0]};
- my @confidenceIntervals = @{$testInfo->[1]};
- $row .= "n-gram precision p-values (high p <=> consistent score):<br>t test " . join("/", map {sprintf("%.4lf", $_)} @tTestPValues);
- $row .= "<p>n-gram precision 95% intervals:<br>" . join(",<br>", map {sprintf("[%.4lf - %.4lf]", $_->[0], $_->[1])} @confidenceIntervals);
- my @bleuInterval = (approxBLEUFromNgramScores(map {$_->[0]} @confidenceIntervals), approxBLEUFromNgramScores(map {$_->[1]} @confidenceIntervals));
- $row .= sprintf("<br><b>(BLEU: ~[%.4lf - %.4lf])</b>", $bleuInterval[0], $bleuInterval[1]);
- }
- catch Error::Simple with {$row .= "[system error]";}
- }
- $row .= "</TD>\n";
-
- # correct sentence score
- my($correct,$wrong,$unknown);
- $row .= "<TD>";
- if (!defined($DONTSCORE{$file}) && (scalar keys %MEMORY)) {
- my ($correct,$just_syn,$just_sem,$wrong,$unknown) = &get_score_from_memory("$in{CORPUS}.$FOREIGN",
- "$in{CORPUS}.$file");
- $row .= "<B><FONT COLOR=GREEN>$correct</FONT></B>";
- $row .= "/<FONT COLOR=ORANGE>$just_syn</FONT>";
- $row .= "/<FONT COLOR=ORANGE>$just_sem</FONT>";
- $row .= "/<FONT COLOR=RED>$wrong</FONT> ($unknown)</TD>\n";
- if ($in{SORT} eq 'SCORE') {
- $sort = sprintf("%03d %04d",$correct,$just_syn+$just_sem);
- }
- }
- else
- {
- $row .= "</TD>\n";
- }
-
- $row .= "</TR>\n";
- push @TABLE, "<!-- $sort -->\n$row";
- }
- close(DIR);
- foreach (reverse sort @TABLE) { print $_; }
- print "</TABLE>\n";
- print "<INPUT TYPE=SUBMIT VALUE=\"Compare\">\n";
- print "<INPUT TYPE=CHECKBOX NAME=SURFACE VALUE=1 CHECKED> Compare all different sentences (instead of just differently <I>evaluated</I> sentences) <INPUT TYPE=CHECKBOX NAME=WITH_EVAL VALUE=1 CHECKED> with evaluation</FORM><P>\n";
- print "<P>The score is to be read as: <FONT COLOR=GREEN>correct</FONT>/<FONT COLOR=ORANGE>just-syn-correct</FONT>/<FONT COLOR=ORANGE>just-sem-correct</FONT>/<FONT COLOR=RED>wrong</FONT> (unscored)\n";
- print "<BR>IBM BLEU is to be read as: <B>metric</B> unigram/bigram/trigram/quadgram *brevity-penalty<P>";
- print "<DIV STYLE=\"border: 1px solid #006600\">";
- print "<H2>Comparison of System Translations (p-values)</H2>";
- my @sysnames = $corpus->getSystemNames();
- for(my $i = 0; $i < scalar(@sysnames); $i++)
- {
- for(my $j = $i + 1; $j < scalar(@sysnames); $j++)
- {
- my $comparison = $corpus->statisticallyCompareSystemResults($sysnames[$i], $sysnames[$j], 'surf');
- print "<P><FONT COLOR=#00aa22>" . $sysnames[$i] . " vs. " . $sysnames[$j] . "</FONT>: [<I>t</I> test] ";
- for(my $k = 0; $k < scalar(@{$comparison->[0]}); $k++)
- {
- print sprintf(($k == 0) ? "%.4lg" : "; %.4lg ", $comparison->[0]->[$k]);
- if($comparison->[1]->[$k] == 0) {print "(&larr;)";} else {print "(&rarr;)";}
- }
- print "&nbsp;&nbsp;---&nbsp;&nbsp;[sign test] ";
- for(my $k = 0; $k < scalar(@{$comparison->[2]}); $k++)
- {
- print sprintf(($k == 0) ? "%.4lg " : "; %.4lg ", $comparison->[2]->[$k]);
- if($comparison->[3]->[$k] == 0) {print "(&larr;)";} else {print "(&rarr;)";}
- }
- print "\n";
- }
- }
- print "</DIV\n";
- print "<P><A HREF=\"newsmtgui.cgi?action=\">All corpora</A>\n";
-}
-
-###### SCORE TRANSLATIONS
-
-sub score_file {
- if ($in{VIEW}) {
- &htmlhead("View Translations");
- }
- else {
- &htmlhead("Score Translations");
- }
- print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
- print "<FORM ACTION=\"\" METHOD=POST>\n";
- print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
- print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
- print "<INPUT TYPE=HIDDEN NAME=FILE VALUE=\"$in{FILE}\">\n";
-
- # get sentences
- my @SENTENCES;
- if ($in{FILE} =~ /.sgm$/) {
- @SENTENCES = `grep '<seg' $in{CORPUS}.$in{FILE}`;
- for(my $i=0;$i<$#SENTENCES;$i++) {
- $SENTENCES[$i] =~ s/^<seg[^>]+> *(\S.+\S) *<\/seg> *$/$1/;
- }
- }
- else {
- @SENTENCES = `cat $in{CORPUS}.$in{FILE}`; chop(@SENTENCES);
- }
-
- my %REFERENCE;
- foreach (@SHOW) {
- if (-e "$in{CORPUS}.$_") {
- @{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}});
- }
- }
-
- # update memory
- foreach (keys %in) {
- next unless /^SYN_SCORE_(\d+)$/;
- next unless $in{"SEM_SCORE_$1"};
- &store_in_memory($REFERENCE{$FOREIGN}[$1],
- $SENTENCES[$1],
- "syn_".$in{"SYN_SCORE_$1"}." sem_".$in{"SEM_SCORE_$1"});
- }
-
- # display sentences
- for(my $i=0;$i<=$#SENTENCES;$i++) {
- my $evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES[$i]);
- next if ($in{ACTION} eq 'SCORE_FILE' &&
- ! $in{VIEW} &&
- $evaluation ne '' && $evaluation ne 'wrong');
- print "<P>Sentence ".($i+1).":<BR>\n";
- # color coding
- &color_highlight_ngrams($i,&nist_normalize_text($SENTENCES[$i]),$REFERENCE{"e"}[$i]);
- if (%MULTI_REF) {
- foreach my $sysid (keys %MULTI_REF) {
- print "<FONT COLOR=GREEN>".$MULTI_REF{$sysid}[$i]."</FONT> (Reference $sysid)<BR>\n";
- }
- }
-
- # all sentences
- print "$SENTENCES[$i] (System output)<BR>\n";
- foreach my $ref (@SHOW) {
- if (-e "$in{CORPUS}.$ref") {
- print "<FONT COLOR=$SHOW_COLOR{$ref}>".$REFERENCE{$ref}[$i]."</FONT> (".$FILETYPE{$ref}.")<BR>\n" if $REFERENCE{$ref}[$i];
- }
- }
- if (! $in{VIEW}) {
- print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$i VALUE=correct";
- print " CHECKED" if ($evaluation =~ /syn_correct/);
- print "> perfect English\n";
- print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$i VALUE=wrong";
- print " CHECKED" if ($evaluation =~ /syn_wrong/);
- print "> imperfect English<BR>\n";
- print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$i VALUE=correct";
- print " CHECKED" if ($evaluation =~ /sem_correct/);
- print "> correct meaning\n";
- print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$i VALUE=wrong";
- print " CHECKED" if ($evaluation =~ /sem_wrong/);
- print "> incorrect meaning\n";
- }
- }
- if (! $in{VIEW}) {
- print "<P><INPUT TYPE=SUBMIT VALUE=\"Add evaluation\">\n";
- print "</FORM>\n";
- }
-}
-
-sub color_highlight_ngrams {
- my($i,$sentence,$single_reference) = @_;
- my @REF = ();
- my %NGRAM = ();
- if (%MULTI_REF) {
- foreach my $sysid (keys %MULTI_REF) {
- push @REF,&nist_normalize_text($MULTI_REF{$sysid}[$i]);
- }
- }
- elsif ($single_reference) {
- @REF = ($single_reference);
- }
- if (@REF) {
- foreach my $ref (@REF) {
- my @WORD = split(/\s+/,$ref);
- for(my $n=1;$n<=4;$n++) {
- for(my $w=0;$w<=$#WORD-($n-1);$w++) {
- my $ngram = "$n: ";
- for(my $j=0;$j<$n;$j++) {
- $ngram .= $WORD[$w+$j]." ";
- }
- $NGRAM{$ngram}++;
- }
- }
- }
- $sentence =~ s/^\s+//;
- $sentence =~ s/\s+/ /;
- $sentence =~ s/\s+$//;
- my @WORD = split(/\s+/,$sentence);
- my @CORRECT;
- for(my $w=0;$w<=$#WORD;$w++) {
- $CORRECT[$w] = 0;
- }
- for(my $n=1;$n<=4;$n++) {
- for(my $w=0;$w<=$#WORD-($n-1);$w++) {
- my $ngram = "$n: ";
- for(my $j=0;$j<$n;$j++) {
- $ngram .= $WORD[$w+$j]." ";
- }
- next unless defined($NGRAM{$ngram}) && $NGRAM{$ngram}>0;
- $NGRAM{$ngram}--;
- for(my $j=0;$j<$n;$j++) {
- $CORRECT[$w+$j] = $n;
- }
- }
- }
- my @COLOR;
- $COLOR[0] = "#FF0000";
- $COLOR[1] = "#C000C0";
- $COLOR[2] = "#0000FF";
- $COLOR[3] = "#00C0C0";
- $COLOR[4] = "#00C000";
- for(my $w=0;$w<=$#WORD;$w++) {
- print "<B><FONT COLOR=".$COLOR[$CORRECT[$w]].">$WORD[$w]<SUB>".$CORRECT[$w]."</SUB></FONT></B> ";
- }
- print "\n<BR>";
- }
-}
-
-###### OTHER STATS
-
-#print (in some unspecified way) the offending exception of type Error::Simple
-#arguments: the error object, a context string
-#return: none
-sub printError
-{
- my ($err, $context) = @_;
- warn "$context: " . $err->{'-text'} . " @ " . $err->{'-file'} . " (" .$err->{'-line'} . ")\n";
-}
-
-#compute number and percentage of unknown tokens for a given factor in foreign corpus
-#arguments: corpus object ref, factor name
-#return (unkwordCount, totalWordCount), or (-1, -1) if an error occurs
-sub calc_unknown_words
-{
- my ($corpus, $factorName) = @_;
- try
- {
- my ($unknownCount, $totalCount) = $corpus->calcUnknownTokens($factorName);
- return ($unknownCount, $totalCount);
- }
- catch Error::Simple with
- {
- my $err = shift;
- printError($err, 'calc_unknown_words()');
- return (-1, -1);
- };
-}
-
-#compute (if we have the necessary factors) info for:
-#- diff btwn wer and pwer for NNs & ADJs -- if large, many reordering errors
-#- diff btwn pwer for surface forms and pwer for lemmas -- if large, morphology errors
-#arguments: corpus object, system name
-#return (NN/ADJ (wer, pwer), surf pwer, lemma pwer), or (-1, -1, -1, -1) if an error occurs
-sub calc_misc_stats
-{
- my ($corpus, $sysname) = @_;
- try
- {
- my ($nnAdjWER, $nnAdjPWER) = $corpus->calcNounAdjWER_PWERDiff($sysname);
- my ($surfPWER, $lemmaPWER) = ($corpus->calcOverallPWER($sysname, 'surf'), $corpus->calcOverallPWER($sysname, 'lemma'));
- return ($nnAdjWER, $nnAdjPWER, $surfPWER, $lemmaPWER);
- }
- catch Error::Simple with
- {
- my $err = shift;
- printError($err, 'calc_misc_stats()');
- return (-1, -1, -1, -1);
- };
-}
-
-#approximate BLEU score from n-gram precisions (currently assume no length penalty)
-#arguments: n-gram precisions as an array
-#return: BLEU score
-sub approxBLEUFromNgramScores
-{
- my $logsum = 0;
- foreach my $p (@_) {$logsum += log($p);}
- return exp($logsum / scalar(@_));
-}
-
-###### NIST SCORE
-
-sub get_nist_score {
- my($reference_file,$source_file,$translation_file) = @_;
- my @STAT = stat($translation_file);
- my $current_timestamp = $STAT[9];
- foreach (@NIST) {
- my ($file,$time,$nist,$bleu) = split;
- return ($nist,$bleu)
- if ($file eq $translation_file && $current_timestamp == $time);
- }
-
- my $nist_eval = `/home/pkoehn/statmt/bin/mteval-v10.pl -c -r $reference_file -s $source_file -t $translation_file`;
- return (0,0) unless ($nist_eval =~ /NIST score = (\d+\.\d+) BLEU score = (\d+\.\d+)/i);
-
- open(NIST,">>nist-memory.dat");
- printf NIST "$translation_file $current_timestamp %f %f\n",$1,$2;
- close(NIST);
- return ($1,$2);
-}
-
-sub nist_normalize_text {
- my ($norm_text) = @_;
-
-# language-independent part:
- $norm_text =~ s/<skipped>//g; # strip "skipped" tags
- $norm_text =~ s/-\n//g; # strip end-of-line hyphenation and join lines
- $norm_text =~ s/\n/ /g; # join lines
- $norm_text =~ s/(\d)\s+(\d)/$1$2/g; #join digits
- $norm_text =~ s/&quot;/"/g; # convert SGML tag for quote to "
- $norm_text =~ s/&amp;/&/g; # convert SGML tag for ampersand to &
- $norm_text =~ s/&lt;/</g; # convert SGML tag for less-than to >
- $norm_text =~ s/&gt;/>/g; # convert SGML tag for greater-than to <
-
-# language-dependent part (assuming Western languages):
- $norm_text = " $norm_text ";
-# $norm_text =~ tr/[A-Z]/[a-z]/ unless $preserve_case;
- $norm_text =~ s/([\{-\~\[-\` -\&\(-\+\:-\@\/])/ $1 /g; # tokenize punctuation
- $norm_text =~ s/([^0-9])([\.,])/$1 $2 /g; # tokenize period and comma unless preceded by a digit
- $norm_text =~ s/([\.,])([^0-9])/ $1 $2/g; # tokenize period and comma unless followed by a digit
- $norm_text =~ s/([0-9])(-)/$1 $2 /g; # tokenize dash when preceded by a digit
- $norm_text =~ s/\s+/ /g; # one space only between words
- $norm_text =~ s/^\s+//; # no leading space
- $norm_text =~ s/\s+$//; # no trailing space
-
- return $norm_text;
-}
-
-###### BLEU SCORE
-
-sub get_multi_bleu_score {
- my($foreign_file,$reference_file,$translation_file) = @_;
- my @STAT = stat($translation_file);
- my $current_timestamp = $STAT[9];
- foreach (@mBLEU) {
- my ($file,$time,$score,$g1,$g2,$g3,$g4,$bp) = split;
- if ($file eq $translation_file && $current_timestamp == $time) {
- return ($score,$g1*100,$g2*100,$g3*100,$g4*100,$bp);
- }
- }
-
- # load reference translation from reference file
- my @REFERENCE_SENTENCE = `cat $reference_file`; chop(@REFERENCE_SENTENCE);
- my @TRANSLATION_SENTENCE = `cat $translation_file`; chop(@TRANSLATION_SENTENCE);
- my %REF;
- my @FOREIGN_SENTENCE = `cat $foreign_file`; chop(@FOREIGN_SENTENCE);
- for(my $i=0;$i<=$#TRANSLATION_SENTENCE;$i++) {
- push @{$REF{$FOREIGN_SENTENCE[$i]}},$REFERENCE_SENTENCE[$i];
- }
- # load reference translation from translation memory
- foreach my $memory (keys %MEMORY) {
- next if $MEMORY{$memory} ne 'syn_correct sem_correct';
- my ($foreign,$english) = split(/ .o0O0o. /,$memory);
- next unless defined($REF{$foreign});
- push @{$REF{$foreign}},$english;
- }
- my(@CORRECT,@TOTAL,$length_translation,$length_reference);
- # compute bleu
- for(my $i=0;$i<=$#TRANSLATION_SENTENCE;$i++) {
- my %REF_NGRAM = ();
- my @WORD = split(/ /,$TRANSLATION_SENTENCE[$i]);
- my $length_translation_this_sentence = scalar(@WORD);
- my ($closest_diff,$closest_length) = (9999,9999);
- foreach my $reference (@{$REF{$FOREIGN_SENTENCE[$i]}}) {
- my @WORD = split(/ /,$reference);
- my $length = scalar(@WORD);
- if (abs($length_translation_this_sentence-$length) < $closest_diff) {
- $closest_diff = abs($length_translation_this_sentence-$length);
- $closest_length = $length;
- }
- for(my $n=1;$n<=4;$n++) {
- my %REF_NGRAM_N = ();
- for(my $start=0;$start<=$#WORD-($n-1);$start++) {
- my $ngram = "$n";
- for(my $w=0;$w<$n;$w++) {
- $ngram .= " ".$WORD[$start+$w];
- }
- $REF_NGRAM_N{$ngram}++;
- }
- foreach my $ngram (keys %REF_NGRAM_N) {
- if (!defined($REF_NGRAM{$ngram}) ||
- $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
- $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
- }
- }
- }
- }
- $length_translation += $length_translation_this_sentence;
- $length_reference += $closest_length;
- for(my $n=1;$n<=4;$n++) {
- my %T_NGRAM = ();
- for(my $start=0;$start<=$#WORD-($n-1);$start++) {
- my $ngram = "$n";
- for(my $w=0;$w<$n;$w++) {
- $ngram .= " ".$WORD[$start+$w];
- }
- $T_NGRAM{$ngram}++;
- }
- foreach my $ngram (keys %T_NGRAM) {
- my $n = 0+$ngram;
-# print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
- $TOTAL[$n] += $T_NGRAM{$ngram};
- if (defined($REF_NGRAM{$ngram})) {
- if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
- $CORRECT[$n] += $T_NGRAM{$ngram};
-# print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
- }
- else {
- $CORRECT[$n] += $REF_NGRAM{$ngram};
-# print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
- }
- }
- }
- }
- }
- my $brevity_penalty = 1;
- if ($length_translation<$length_reference) {
- $brevity_penalty = exp(1-$length_reference/$length_translation);
- }
- my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) +
- my_log( $CORRECT[2]/$TOTAL[2] ) +
- my_log( $CORRECT[3]/$TOTAL[3] ) +
- my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4);
-
- open(BLEU,">>mbleu-memory.dat");
- @STAT = stat($translation_file);
- printf BLEU "$translation_file $STAT[9] %f %f %f %f %f %f\n",$bleu,$CORRECT[1]/$TOTAL[1],$CORRECT[2]/$TOTAL[2],$CORRECT[3]/$TOTAL[3],$CORRECT[4]/$TOTAL[4],$brevity_penalty;
- close(BLEU);
-
- return ($bleu,
- 100*$CORRECT[1]/$TOTAL[1],
- 100*$CORRECT[2]/$TOTAL[2],
- 100*$CORRECT[3]/$TOTAL[3],
- 100*$CORRECT[4]/$TOTAL[4],
- $brevity_penalty);
-}
-
-sub my_log {
- return -9999999999 unless $_[0];
- return log($_[0]);
-}
-
-
-###### SCORE TRANSLATIONS
-
-################################ IN PROGRESS ###############################
-sub compare2
-{
- &htmlhead("Compare Translations");
- print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
- print "<FORM ACTION=\"\" METHOD=POST>\n";
- print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
- print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
- my $corpus = new Corpus('-name' => "$in{CORPUS}", '-descriptions' => \%FILEDESC, '-info_line' => $factorData{$in{CORPUS}});
- $corpus->writeComparisonPage(\*STDOUT, /^.*$/);
- print "</FORM>\n";
-}
-
-sub compare {
- &htmlhead("Compare Translations");
- print "<A HREF=\"?ACTION=VIEW_CORPUS&CORPUS=".CGI::escape($in{CORPUS})."\">View Corpus $in{CORPUS}</A><P>\n";
- print "<FORM ACTION=\"\" METHOD=POST>\n";
- print "<INPUT TYPE=HIDDEN NAME=ACTION VALUE=$in{ACTION}>\n";
- print "<INPUT TYPE=HIDDEN NAME=CORPUS VALUE=\"$in{CORPUS}\">\n";
-
- # get sentences
- my %SENTENCES;
- my $sentence_count;
- foreach (keys %in) {
- if (/^FILE_(.+)$/) {
- my $file = $1;
- print "<INPUT TYPE=HIDDEN NAME=\"$file\" VALUE=1>\n";
- my @SENTENCES;
- if ($file =~ /.sgm$/) {
- @{$SENTENCES{$file}} = `grep '<seg' $in{CORPUS}.$file`;
- for(my $i=0;$i<$#{$SENTENCES{$file}};$i++) {
- $SENTENCES{$file}[$i] =~ s/^<seg[^>]+> *(\S.+\S) *<\/seg> *$/$1/;
- }
- }
- else {
- @{$SENTENCES{$file}} = `cat $in{CORPUS}.$1`;
- chop(@{$SENTENCES{$file}});
- }
-
- $sentence_count = scalar @{$SENTENCES{$file}};
- }
- }
- my %REFERENCE;
- foreach (@SHOW) {
- if (-e "$in{CORPUS}.$_") {
- @{$REFERENCE{$_}} = `cat $in{CORPUS}.$_`; chop(@{$REFERENCE{$_}});
- }
- }
-
- # update memory
- foreach (keys %in) {
- next unless /^SYN_SCORE_(.+)_(\d+)$/;
- next unless $in{"SEM_SCORE_$1_$2"};
- &store_in_memory($REFERENCE{$FOREIGN}[$2],
- $SENTENCES{$1}[$2],
- "syn_".$in{"SYN_SCORE_$1_$2"}." sem_".$in{"SEM_SCORE_$1_$2"});
- }
-
- # display sentences
- for(my $i=0;$i<$sentence_count;$i++)
- {
- my $evaluation = "";
- my $show = 0;
- my $surface = "";
- foreach my $file (keys %SENTENCES)
- {
- if ($in{SURFACE}) {
- $SENTENCES{$file}[$i] =~ s/ *$//;
- $surface = $SENTENCES{$file}[$i] if ($surface eq '');
- $show = 1 if ($SENTENCES{$file}[$i] ne $surface);
- }
- else {
- my $this_ev = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);
- $this_ev = "syn_wrong sem_wrong" unless $this_ev;
- $evaluation = $this_ev if ($evaluation eq '');
- $show = 1 if ($evaluation ne $this_ev);
- }
- }
- next unless $show;
- print "<HR>Sentence ".($i+1).":<BR>\n";
- foreach my $ref (@SHOW) {
- if (-e "$in{CORPUS}.$ref") {
- print "<FONT COLOR=$SHOW_COLOR{$ref}>".$REFERENCE{$ref}[$i]."</FONT> (".$FILETYPE{$ref}.")<BR>\n";
- }
- }
- foreach my $file (keys %SENTENCES) {
- print "<B>$SENTENCES{$file}[$i]</B> ($file)<BR>\n";
- &color_highlight_ngrams($i,&nist_normalize_text($SENTENCES{$file}[$i]),$REFERENCE{"e"}[$i]);
- if (0 && $in{WITH_EVAL}) {
- $evaluation = &get_from_memory($REFERENCE{$FOREIGN}[$i],$SENTENCES{$file}[$i]);
- print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=correct";
- print " CHECKED" if ($evaluation =~ /syn_correct/);
- print "> perfect English\n";
- print "<INPUT TYPE=RADIO NAME=SYN_SCORE_$file"."_$i VALUE=wrong";
- print " CHECKED" if ($evaluation =~ /syn_wrong/);
- print "> imperfect English<BR>\n";
- print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=correct";
- print " CHECKED" if ($evaluation =~ /sem_correct/);
- print "> correct meaning\n";
- print "<INPUT TYPE=RADIO NAME=SEM_SCORE_$file"."_$i VALUE=wrong";
- print " CHECKED" if ($evaluation =~ /sem_wrong/);
- print "> incorrect meaning<BR>\n";
- }
- }
- }
- print "<P><INPUT TYPE=SUBMIT VALUE=\"Add evaluation\">\n";
- print "</FORM>\n";
-}
-
-###### MEMORY SUBS
-
-sub load_memory {
- open(MEMORY,"evaluation-memory.dat") or return;
- while(<MEMORY>) {
- chop;
- my($foreign,$translation,$evaluation) = split(/ \.o0O0o\. /);
- $evaluation = 'syn_correct sem_correct' if ($evaluation eq 'correct');
- $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;
- }
- close(MEMORY);
-}
-
-sub get_score_from_memory {
- my($foreign_file,$translation_file) = @_;
- my $unknown=0;
- my $correct=0;
- my $just_syn=0;
- my $just_sem=0;
- my $wrong=0;
- my @FOREIGN = `cat $foreign_file`; chop(@FOREIGN);
- my @TRANSLATION = `cat $translation_file`; chop(@TRANSLATION);
- for(my $i=0;$i<=$#FOREIGN;$i++) {
- if (my $evaluation = &get_from_memory($FOREIGN[$i],$TRANSLATION[$i])) {
- if ($evaluation eq 'syn_correct sem_correct') { $correct++ }
- elsif ($evaluation eq 'syn_correct sem_wrong') { $just_syn++ }
- elsif ($evaluation eq 'syn_wrong sem_correct') { $just_sem++ }
- elsif ($evaluation eq 'syn_wrong sem_wrong') { $wrong++ }
- else { $unknown++; }
- }
- else { $unknown++; }
- }
- return($correct,$just_syn,$just_sem,$wrong,$unknown);
-}
-
-sub store_in_memory {
- my($foreign,$translation,$evaluation) = @_;
- &trim(\$translation);
- return if $MEMORY{"$foreign .o0O0o. $translation"} eq $evaluation;
- $MEMORY{"$foreign .o0O0o. $translation"} = $evaluation;
- open(MEMORY,">>evaluation-memory.dat") or die "store_in_memory(): couldn't open 'evaluation-memory.dat' for append\n";
- print MEMORY "$foreign .o0O0o. $translation .o0O0o. $evaluation\n";
- close(MEMORY);
-}
-
-sub get_from_memory {
- my($foreign,$translation) = @_;
- &trim(\$translation);
- return $MEMORY{"$foreign .o0O0o. $translation"};
-}
-
-sub trim {
- my($translation) = @_;
- $$translation =~ s/ +/ /g;
- $$translation =~ s/^ +//;
- $$translation =~ s/ +$//;
-}
-
-sub load_descriptions {
- open(FD,"file-descriptions") or die "load_descriptions(): couldn't open 'file-descriptions' for read\n";
- while(<FD>) {
- my($file,$description) = split(/\s+/,$_,2);
- $FILEDESC{$file} = $description;
- }
- close(FD);
-}
-
-#read config file giving various corpus config info
-#arguments: filename to read
-#return: hash of corpus names to strings containing formatted info
-sub loadFactorData
-{
- my $filename = shift;
- my %data = ();
- open(INFILE, "<$filename") or die "loadFactorData(): couldn't open '$filename' for read\n";
- while(my $line = <INFILE>)
- {
- if($line =~ /^\#/) {next;} #skip comment lines
- $line =~ /^\s*(\S+)\s*:\s*(\S.*\S)\s*$/;
- my $corpusName = $1;
- $data{$corpusName} = $2;
- }
- close(INFILE);
- return %data;
-}
-
-###### SUBS
-
-sub htmlhead {
- print <<"___ENDHTML";
-Content-type: text/html
-
-<HTML><HEAD>
-<TITLE>MTEval: $_[0]</TITLE>
-<SCRIPT LANGUAGE="JavaScript">
-
-<!-- hide from old browsers
-
-function FieldInfo(field,description) {
- popup = window.open("","popDialog","height=500,width=600,scrollbars=yes,resizable=yes");
- popup.document.write("<HTML><HEAD><TITLE>"+field+"</TITLE></HEAD><BODY BGCOLOR=#FFFFCC><CENTER><B>"+field+"</B><HR SIZE=2 NOSHADE></CENTER><PRE>"+description+"</PRE><CENTER><FORM><INPUT TYPE='BUTTON' VALUE='Okay' onClick='self.close()'></FORM><CENTER></BODY></HTML>");
- popup.focus();
- popup.document.close();
-}
-
-<!-- done hiding -->
-
-</SCRIPT>
-</HEAD>
-<BODY BGCOLOR=white>
-<H2>Evaluation Tool for Machine Translation<BR>$_[0]</H2>
-___ENDHTML
-}
-
-
-############################# parts of cgi-lib.pl
-
-
-sub ReadParse {
- my ($i, $key, $val);
-
- # Read in text
- my $in;
- if (&MethGet) {
- $in = $ENV{'QUERY_STRING'};
- } elsif (&MethPost) {
- read(STDIN,$in,$ENV{'CONTENT_LENGTH'});
- }
-
- my @in = split(/[&;]/,$in);
-
- foreach $i (0 .. $#in) {
- # Convert plus's to spaces
- $in[$i] =~ s/\+/ /g;
-
- # Split into key and value.
- ($key, $val) = split(/=/,$in[$i],2); # splits on the first =.
-
- # Convert %XX from hex numbers to alphanumeric
- $key =~ s/%(..)/pack("c",hex($1))/ge;
- $val =~ s/%(..)/pack("c",hex($1))/ge;
-
- # Associate key and value
- $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator
- $in{$key} .= $val;
-
- }
-
- return scalar(@in);
-}
-
-sub MethGet {
- return ($ENV{'REQUEST_METHOD'} eq "GET");
-}
-
-sub MethPost {
- return ($ENV{'REQUEST_METHOD'} eq "POST");
-}
diff --git a/scripts/generic/extract-factors.pl b/scripts/generic/extract-factors.pl
deleted file mode 100755
index b3eb998b8..000000000
--- a/scripts/generic/extract-factors.pl
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/perl -w
-
-#extract-factors.pl: extract only the desired factors from a factored corpus
-#usage: extract-factors corpusfile factor-index factor-index ... > outfile
-#factor indices start at 0
-#factor indices too large ought to be ignored
-
-use strict;
-
-my ($filename, @factors) = @ARGV;
-my %indices = map {$_ => 1} @factors;
-
-open(INFILE, "<$filename") or die "couldn't open '$filename' for read: $!\n";
-while(my $line = <INFILE>)
-{
- chop $line;
- print join(' ', map {my $i = 0; join('|', grep($indices{$i++}, split(/\|/, $_)))} split(/\s+/, $line)) . "\n";
-}
-close(INFILE);
diff --git a/scripts/generic/lopar2pos.pl b/scripts/generic/lopar2pos.pl
deleted file mode 100755
index 9b97bc806..000000000
--- a/scripts/generic/lopar2pos.pl
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/perl -w
-
-#lopar2pos: extract POSs from LOPAR output
-#usage: lopar2pos.pl CORPUS.lopar > CORPUS.pos
-
-my $infilename = shift @ARGV;
-open(INFILE, "<$infilename") or die "couldn't open '$infilename' for read: $!\n";
-while(my $line = <INFILE>)
-{
- my @words = split(/\s+/, $line);
- my @tags = map {$_ =~ /^[^_]*_([A-Z]+)/; $1} @words;
- print join(' ', @tags) . "\n";
-}
-close(INFILE);
diff --git a/scripts/generic/moses-parallel.pl b/scripts/generic/moses-parallel.pl
deleted file mode 100755
index 2a809c653..000000000
--- a/scripts/generic/moses-parallel.pl
+++ /dev/null
@@ -1,439 +0,0 @@
-#! /usr/bin/perl
-
-#######################
-# Revision history
-#
-# 02 Aug 2006 added strict requirement
-# 01 Aug 2006 fix bug about inputfile parameter
-# fix bug about suffix index generation
-# 31 Jul 2006 added parameter for reading queue parameters
-# 29 Jul 2006 added code to handling consfusion networks
-# 28 Jul 2006 added a better policy for removing jobs from the queue in case of killing signal (CTRL-C)
-# added the parameter -qsub-prefix which sets the prefix for the name of submitted jobs
-# 27 Jul 2006 added safesystem() function and other checks to handle with process failure
-# added checks for existence of decoder and configuration file
-# 26 Jul 2006 fix a bug related to the use of absolute path for srcfile and nbestfile
-
-use strict;
-
-#######################
-#Customizable parameters
-
-#parameters for submiiting processes through SGE
-#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
-my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G -hard";
-
-my $workingdir=$ENV{PWD};
-my $tmpdir="$workingdir/tmp$$";
-my $splitpfx="split$$";
-
-$SIG{'INT'} = \&kill_all_and_quit; # catch exception for CTRL-C
-
-#######################
-#Default parameters
-my $jobscript="$workingdir/job$$";
-my $qsubout="$workingdir/out.job$$";
-my $qsuberr="$workingdir/err.job$$";
-
-
-my $mosesparameters="";
-my $cfgfile=""; #configuration file
-
-my $version=undef;
-my $help=0;
-my $dbg=0;
-my $jobs=4;
-my $mosescmd="$ENV{MOSESBIN}/moses"; #decoder in use
-my $orifile=undef;
-my $testfile=undef;
-my $nbestfile=undef;
-my $orinbestfile=undef;
-my $nbest=undef;
-my $nbestflag=0;
-my $qsubname="MOSES";
-my $inputtype=0;
-
-#######################
-# Command line options processing
-sub init(){
- use Getopt::Long qw(:config pass_through);
- GetOptions('version'=>\$version,
- 'help'=>\$help,
- 'debug'=>\$dbg,
- 'jobs=i'=>\$jobs,
- 'decoder=s'=> \$mosescmd,
- 'i|inputfile|input-file=s'=> \$orifile,
- 'n-best-file=s'=> \$orinbestfile,
- 'n-best-size=i'=> \$nbest,
- 'qsub-prefix=s'=> \$qsubname,
- 'queue-parameters=s'=> \$queueparameters,
- 'inputtype=i'=> \$inputtype,
- 'config=s'=>\$cfgfile
- ) or exit(1);
-
- chomp($nbestfile=`basename $orinbestfile`) if defined $orinbestfile;
- chomp($testfile=`basename $orifile`) if defined $orifile;
-
- $mosesparameters="@ARGV -config $cfgfile -inputtype $inputtype";
- getNbestParameters();
-}
-
-
-#######################
-##print version
-sub version(){
-# print STDERR "version 1.0 (15-07-2006)\n";
-# print STDERR "version 1.1 (17-07-2006)\n";
-# print STDERR "version 1.2 (18-07-2006)\n";
-# print STDERR "version 1.3 (21-07-2006)\n";
-# print STDERR "version 1.4 (26-07-2006)\n";
-# print STDERR "version 1.5 (27-07-2006)\n";
-# print STDERR "version 1.6 (28-07-2006)\n";
-# print STDERR "version 1.7 (29-07-2006)\n";
-# print STDERR "version 1.8 (31-07-2006)\n";
-# print STDERR "version 1.9 (01-08-2006)\n";
- print STDERR "version 1.10 (02-08-2006)\n";
- exit(1);
-}
-
-#usage
-sub usage(){
- print STDERR "moses-parallel.pl [parallel-options] [moses-options]\n";
- print STDERR "Options marked (*) are required.\n";
- print STDERR "Parallel options:\n";
- print STDERR "* -decoder <file> Moses decoder to use\n";
- print STDERR " -i|inputfile|input-file <string> input file\n";
- print STDERR "* -inputfile <file> the input text to translate\n";
- print STDERR "* -jobs <N> number of required jobs\n";
- print STDERR " -qsub-prefix <string> name for sumbitte jobs\n";
- print STDERR " -queue-parameters <string> specific requirements for queue\n";
- print STDERR " -debug debug\n";
- print STDERR " -version print version of the script\n";
- print STDERR " -help this help\n";
- print STDERR "Moses options:\n";
- print STDERR " -inputtype <0|1> 0 for text, 1 for confusion networks\n";
- print STDERR "* -config <cfgfile> configuration file\n";
- print STDERR "All other options are passed to Moses\n";
- exit(1);
-}
-
-#printparameters
-sub print_parameters(){
- print STDERR "Inputfile: $orifile\n";
- print STDERR "Configuration file: $cfgfile\n";
- print STDERR "Decoder in use: $mosescmd\n";
- if ($nbestflag) {
- print STDERR "Nbest file: $orinbestfile\n";
- print STDERR "Nbest size: $nbest\n";
- }
- print STDERR "Number of jobs:$jobs\n";
- print STDERR "Qsub name: $qsubname\n";
- print STDERR "Queue parameters: $queueparameters\n";
- print STDERR "Inputtype: text\n" if $inputtype == 0;
- print STDERR "Inputtype: confusion network\n" if $inputtype == 1;
-
- print STDERR "parameters directly passed to Moses: $mosesparameters\n";
-}
-
-#get parameters for nbest computation from configuration file
-sub getNbestParameters(){
- if ($orinbestfile) { $nbestflag=1; }
- else{
- open (CFG, "$cfgfile");
- while (chomp($_=<CFG>)){
- if (/^\[n-best-list\]/){
- chomp($orinbestfile=<CFG>);
- chomp($nbest=<CFG>);
- $nbestflag=1;
- last;
- }
- }
- close(CFG);
- }
-}
-
-#######################
-#Script starts here
-
-init();
-
-version() if $version;
-usage() if $help;
-
-
-if (!defined $orifile || !defined $mosescmd || ! defined $cfgfile) {
- print STDERR "Please specify -inputfile, -decoder and -config\n";
- usage();
-}
-
-#checking if inputfile exists
-if (! -e ${orifile} ){
- print STDERR "Inputfile ($orifile) does not exists\n";
- usage();
-}
-
-#checking if decoder exists
-if (! -e $mosescmd) {
- print STDERR "Decoder ($mosescmd) does not exists\n";
- usage();
-}
-
-#checking if configfile exists
-if (! -e $cfgfile) {
- print STDERR "Configuration file ($cfgfile) does not exists\n";
- usage();
-}
-
-
-print_parameters(); # so that people know
-exit(1) if $dbg; # debug mode: just print and do not run
-
-
-#splitting test file in several parts
-#$decimal="-d"; #split does not accept this options (on MAC OS)
-my $decimal="";
-
-my $cmd;
-my $sentenceN;
-my $splitN;
-
-my @idxlist=();
-
-if ($inputtype==0){ #text input
-#getting the number of input sentences
- chomp($sentenceN=`wc -l ${orifile} | awk '{print \$1}' `);
-
-#Reducing the number of jobs if less sentences to translate
- if ($jobs>$sentenceN){ $jobs=$sentenceN; }
-
-#Computing the number of sentences for each files
- if ($sentenceN % $jobs == 0){ $splitN=int($sentenceN / $jobs); }
- else{ $splitN=int($sentenceN /$jobs) + 1; }
-
- if ($dbg){
- print STDERR "There are $sentenceN sentences to translate\n";
- print STDERR "There are at most $splitN sentences per job\n";
- }
-
- $cmd="split $decimal -a 2 -l $splitN $orifile ${testfile}.$splitpfx-";
- safesystem("$cmd") or die;
-}
-else{ #confusion network input
- my $tmpfile="/tmp/cnsplit$$";
- $cmd="cat $orifile | perl -pe 's/\\n/ _CNendline_ /g;' | perl -pe 's/_CNendline_ _CNendline_ /_CNendline_\\n/g;' > $tmpfile";
- safesystem("$cmd") or die;
-
-#getting the number of input CNs
- chomp($sentenceN=`wc -l $tmpfile | awk '{print \$1}' `);
-
-#Reducing the number of jobs if less CNs to translate
- if ($jobs>$sentenceN){ $jobs=$sentenceN; }
-
-#Computing the number of CNs for each files
- if ($sentenceN % $jobs == 0){ $splitN=int($sentenceN / $jobs); }
- else{ $splitN=int($sentenceN /$jobs) + 1; }
-
- if ($dbg){
- print STDERR "There are $sentenceN confusion networks to translate\n";
- print STDERR "There are at most $splitN sentences per job\n";
- }
-
- $cmd="split $decimal -a 2 -l $splitN $tmpfile $tmpfile-";
- safesystem("$cmd") or die;
-
- my @idxlist=();
- chomp(@idxlist=`ls $tmpfile-*`);
- grep(s/.+(\-\S+)$/$1/e,@idxlist);
-
- foreach my $idx (@idxlist){
- $cmd="perl -pe 's/ _CNendline_ /\\n/g;s/ _CNendline_/\\n/g;'";
- safesystem("cat $tmpfile$idx | $cmd > ${testfile}.$splitpfx$idx ; rm $tmpfile$idx;");
- }
-}
-
-chomp(@idxlist=`ls ${testfile}.$splitpfx-*`);
-grep(s/.+(\-\S+)$/$1/e,@idxlist);
-
-safesystem("mkdir -p $tmpdir") or die;
-
-preparing_script();
-
-#launching process through the queue
-my @sgepids =();
-
-my $failure=0;
-foreach my $idx (@idxlist){
- print STDERR "qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash\n" if $dbg;
-
- $cmd="qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash >& ${jobscript}${idx}.log";
-
- safesystem($cmd) or die;
-
- my ($res,$id);
-
- open (IN,"${jobscript}${idx}.log");
- chomp($res=<IN>);
- split(/\s+/,$res);
- $id=$_[2];
- close(IN);
-
- push @sgepids, $id;
-}
-
-#waiting until all jobs have finished
-my $hj = "-hold_jid " . join(" -hold_jid ", @sgepids);
-
-$cmd="qsub $queueparameters -sync yes $hj -j yes -o /dev/null -e /dev/null -N $qsubname.W -b yes /bin/ls >& $qsubname.W.log";
-safesystem($cmd) or kill_all_and_quit();
-
-$failure=&check_exit_status();
-
-kill_all_and_quit() if $failure;
-
-check_translation();
-
-#concatenating translations and removing temporary files
-concatenate_1best();
-if ($nbestflag){ concatenate_nbest(); }
-
-remove_temporary_files();
-
-
-#script creation
-sub preparing_script(){
- foreach my $idx (@idxlist){
- my $scriptheader="\#\! /bin/bash\n\n";
- $scriptheader.="uname -a\n\n";
- $scriptheader.="cd $workingdir\n\n";
-
- open (OUT, "> ${jobscript}${idx}.bash");
- print OUT $scriptheader;
- if ($nbestflag){
- print OUT "$mosescmd $mosesparameters -n-best-list $tmpdir/${nbestfile}.$splitpfx$idx $nbest -i ${testfile}.$splitpfx$idx > $tmpdir/${testfile}.$splitpfx$idx.trans\n\n";
- print OUT "echo exit status \$\?\n\n";
- print OUT "mv $tmpdir/${nbestfile}.$splitpfx$idx .\n\n";
- print OUT "echo exit status \$\?\n\n";
- }else{
- print OUT "$mosescmd $mosesparameters -i ${testfile}.$splitpfx$idx > $tmpdir/${testfile}.$splitpfx$idx.trans\n\n";
- }
- print OUT "mv $tmpdir/${testfile}.$splitpfx$idx.trans .\n\n";
- print OUT "echo exit status \$\?\n\n";
- close(OUT);
- }
-}
-
-
-
-sub concatenate_nbest(){
- my $oldcode="";
- my $newcode=-1;
- open (OUT, "> ${orinbestfile}");
- foreach my $idx (@idxlist){
- open (IN, "${nbestfile}.${splitpfx}${idx}");
- while (<IN>){
- my ($code,@extra)=split(/\|\|\|/,$_);
- $newcode++ if $code ne $oldcode;
- $oldcode=$code;
- print OUT join("\|\|\|",($newcode,@extra));
- }
- close(IN);
- $oldcode="";
- }
- close(OUT);
-}
-
-sub concatenate_1best(){
- foreach my $idx (@idxlist){
- my @in=();
- open (IN, "${testfile}.${splitpfx}${idx}.trans");
- @in=<IN>;
- print STDOUT "@in";
- close(IN);
- }
-}
-
-sub check_exit_status(){
- print STDERR "check_exit_status\n";
- my $failure=0;
- foreach my $idx (@idxlist){
- print STDERR "check_exit_status of job $idx\n";
- open(IN,"$qsubout$idx");
- while (<IN>){
- $failure=1 if (/exit status 1/);
- }
- close(IN);
- }
- return $failure;
-}
-
-sub kill_all_and_quit(){
- print STDERR "Got interrupt or something failed.\n";
- print STDERR "kill_all_and_quit\n";
- foreach my $id (@sgepids){
- print STDERR "qdel $id\n";
- safesystem("qdel $id");
- }
-
- print STDERR "Translation was not performed correctly\n";
- print STDERR "Any of the submitted jobs died not correctly\n";
- print STDERR "qdel function was called for all submitted jobs\n";
-
- exit(1);
-}
-
-
-sub check_translation(){
- #checking if all sentences were translated
- my $inputN;
- my $outputN;
- foreach my $idx (@idxlist){
- if ($inputtype==0){#text input
- chomp($inputN=`wc -l ${testfile}.$splitpfx$idx | cut -d' ' -f1`);
- }
- else{
- chomp($inputN=`cat ${testfile}.$splitpfx$idx | perl -pe 's/\\n/ _CNendline_ /g;' | perl -pe 's/_CNendline_ _CNendline_ /_CNendline_\\n/g;' | wc -l | cut -d' ' -f1 `);
- }
- chomp($outputN=`wc -l ${testfile}.$splitpfx$idx.trans | cut -d' ' -f1`);
-
- if ($inputN != $outputN){
- print STDERR "Split ($idx) were not entirely translated\n";
- print STDERR "outputN=$outputN inputN=$inputN\n";
- print STDERR "outputfile=${testfile}.$splitpfx$idx.trans inputfile=${testfile}.$splitpfx$idx\n";
- exit(1);
- }
- }
-}
-
-sub remove_temporary_files(){
- #removing temporary files
- foreach my $idx (@idxlist){
- unlink("${testfile}.${splitpfx}${idx}.trans");
- unlink("${testfile}.${splitpfx}${idx}");
- if ($nbestflag){ unlink("${nbestfile}.${splitpfx}${idx}"); }
- unlink("${jobscript}${idx}.bash");
- unlink("${jobscript}${idx}.log");
- unlink("$qsubname.W.log");
- unlink("$qsubout$idx");
- unlink("$qsuberr$idx");
- rmdir("$tmpdir");
- }
-}
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-
diff --git a/scripts/generic/multi-bleu.perl b/scripts/generic/multi-bleu.perl
deleted file mode 100755
index 9f00e349f..000000000
--- a/scripts/generic/multi-bleu.perl
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-
-my $stem = $ARGV[0];
-if (!defined $stem) {
- print STDERR "usage: multi-bleu.pl reference.filestem < hypothesis\n";
- print STDERR "Reads the references from reference.filestem.0, reference.filestem.1, ...\n";
- exit(1);
-}
-
-my @REF;
-my $ref=0;
-while(-e "$stem$ref") {
- &add_to_ref("$stem$ref",\@REF);
- $ref++;
-}
-&add_to_ref($stem,\@REF) if -e $stem;
-
-die "No reference sentences found!" if 0 == scalar @REF;
-
-sub add_to_ref {
- my ($file,$REF) = @_;
- my $s=0;
- open(REF,$file) or die "Can't read $file";
- while(<REF>) {
- chop;
- push @{$$REF[$s++]}, $_;
- }
- close(REF);
-}
-
-my(@CORRECT,@TOTAL,$length_translation,$length_reference);
-my $s=0;
-while(<STDIN>) {
- chop;
- my @WORD = split;
- my %REF_NGRAM = ();
- my $length_translation_this_sentence = scalar(@WORD);
- my ($closest_diff,$closest_length) = (9999,9999);
- foreach my $reference (@{$REF[$s]}) {
-# print "$s $_ <=> $reference\n";
- my @WORD = split(/ /,$reference);
- my $length = scalar(@WORD);
- if (abs($length_translation_this_sentence-$length) < $closest_diff) {
- $closest_diff = abs($length_translation_this_sentence-$length);
- $closest_length = $length;
-# print "$i: closest diff = abs($length_translation_this_sentence-$length)<BR>\n";
- }
- for(my $n=1;$n<=4;$n++) {
- my %REF_NGRAM_N = ();
- for(my $start=0;$start<=$#WORD-($n-1);$start++) {
- my $ngram = "$n";
- for(my $w=0;$w<$n;$w++) {
- $ngram .= " ".$WORD[$start+$w];
- }
- $REF_NGRAM_N{$ngram}++;
- }
- foreach my $ngram (keys %REF_NGRAM_N) {
- if (!defined($REF_NGRAM{$ngram}) ||
- $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
- $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
-# print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n";
- }
- }
- }
- }
- $length_translation += $length_translation_this_sentence;
- $length_reference += $closest_length;
- for(my $n=1;$n<=4;$n++) {
- my %T_NGRAM = ();
- for(my $start=0;$start<=$#WORD-($n-1);$start++) {
- my $ngram = "$n";
- for(my $w=0;$w<$n;$w++) {
- $ngram .= " ".$WORD[$start+$w];
- }
- $T_NGRAM{$ngram}++;
- }
- foreach my $ngram (keys %T_NGRAM) {
- $ngram =~ /^(\d+) /;
- my $n = $1;
-# print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
- $TOTAL[$n] += $T_NGRAM{$ngram};
- if (defined($REF_NGRAM{$ngram})) {
- if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
- $CORRECT[$n] += $T_NGRAM{$ngram};
-# print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
- }
- else {
- $CORRECT[$n] += $REF_NGRAM{$ngram};
-# print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
- }
- }
- }
- }
- $s++;
-}
-my $brevity_penalty = 1;
-if ($length_translation<$length_reference) {
- $brevity_penalty = exp(1-$length_reference/$length_translation);
-}
-my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) +
- my_log( $CORRECT[2]/$TOTAL[2] ) +
- my_log( $CORRECT[3]/$TOTAL[3] ) +
- my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4);
-
-printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f, %i sents, %i refs)\n",
- 100*$bleu,
- 100*$CORRECT[1]/$TOTAL[1],
- 100*$CORRECT[2]/$TOTAL[2],
- 100*$CORRECT[3]/$TOTAL[3],
- 100*$CORRECT[4]/$TOTAL[4],
- $brevity_penalty,
- $length_translation / $length_reference,
- scalar @REF,
- scalar @{$REF[0]};
-
-sub my_log {
- return -9999999999 unless $_[0];
- return log($_[0]);
-}
diff --git a/scripts/generic/qsub-wrapper.pl b/scripts/generic/qsub-wrapper.pl
deleted file mode 100755
index fbeb72790..000000000
--- a/scripts/generic/qsub-wrapper.pl
+++ /dev/null
@@ -1,200 +0,0 @@
-#! /usr/bin/perl
-
-use strict;
-
-#######################
-#Default parameters
-#parameters for submiiting processes through SGE
-#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
-my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G";
-
-my $workingdir=$ENV{PWD};
-my $tmpdir="$workingdir/tmp$$";
-my $jobscript="$workingdir/job$$";
-my $qsubout="$workingdir/out.job$$";
-my $qsuberr="$workingdir/err.job$$";
-
-
-$SIG{INT} = \&kill_all_and_quit; # catch exception for CTRL-C
-
-my $help="";
-my $dbg="";
-my $version="";
-my $qsubname="WR$$";
-my $cmd="";
-my $cmdout="";
-my $cmderr="";
-my $parameters="";
-
-sub init(){
- use Getopt::Long qw(:config pass_through);
- GetOptions('version'=>\$version,
- 'help'=>\$help,
- 'debug'=>\$dbg,
- 'qsub-prefix=s'=> \$qsubname,
- 'command=s'=> \$cmd,
- 'stdout=s'=> \$cmdout,
- 'stderr=s'=> \$cmderr,
- 'queue-parameter=s'=> \$queueparameters
- ) or exit(1);
- $parameters="@ARGV";
-
- version() if $version;
- usage() if $help;
- print_parameters() if $dbg;
-}
-
-#######################
-##print version
-sub version(){
-# print STDERR "version 1.0 (29-07-2006)\n";
- print STDERR "version 1.1 (31-07-2006)\n";
- exit(1);
-}
-
-#usage
-sub usage(){
- print STDERR "qsub-wrapper.pl [options]\n";
- print STDERR "Options:\n";
- print STDERR "-command <file> command to run\n";
- print STDERR "-stdout <file> file to save stdout of cmd (optional)\n";
- print STDERR "-stderr <file> file to save stderr of cmd (optional)\n";
- print STDERR "-qsub-prefix <string> name for sumbitted jobs (optional)\n";
- print STDERR "-queue-parameters <string> parameter for the queue (optional)\n";
- print STDERR "-debug debug\n";
- print STDERR "-version print version of the script\n";
- print STDERR "-help this help\n";
- exit(1);
-}
-
-#printparameters
-sub print_parameters(){
- print STDERR "command: $cmd\n";
- print STDERR "file for stdout: $cmdout\n";
- print STDERR "file for stderr: $cmderr\n";
- print STDERR "Qsub name: $qsubname\n";
- print STDERR "Queue parameters: $queueparameters\n";
- print STDERR "parameters directly passed to cmd: $parameters\n";
- exit(1);
-}
-
-#script creation
-sub preparing_script(){
- my $scriptheader="\#\! /bin/bash\n\n";
- $scriptheader.="uname -a\n\n";
-
- $scriptheader.="cd $workingdir\n\n";
-
- open (OUT, "> ${jobscript}.bash");
- print OUT $scriptheader;
-
- print OUT "($cmd $parameters > $tmpdir/cmdout$$) 2> $tmpdir/cmderr$$\n\n";
- print OUT "echo exit status \$\?\n\n";
-
- if ($cmdout){
- print OUT "mv $tmpdir/cmdout$$ $cmdout\n\n";
- print OUT "echo exit status \$\?\n\n";
- }
- else{
- print OUT "rm $tmpdir/cmdout$$\n\n";
- print OUT "echo exit status \$\?\n\n";
- }
-
- if ($cmderr){
- print OUT "mv $tmpdir/cmderr$$ $cmderr\n\n";
- print OUT "echo exit status \$\?\n\n";
- }
- else{
- print OUT "rm $tmpdir/cmderr$$\n\n";
- print OUT "echo exit status \$\?\n\n";
- }
-
-
- close(OUT);
-}
-
-#######################
-#Script starts here
-
-init();
-
-usage() if $cmd eq "";
-
-safesystem("mkdir -p $tmpdir") or die;
-
-preparing_script();
-
-my $qsubcmd="qsub $queueparameters -sync yes -o $qsubout -e $qsuberr -N $qsubname ${jobscript}.bash >& ${jobscript}.log";
-
-print STDERR "$qsubcmd\n";
-safesystem($qsubcmd) or die;
-
-#getting id of submitted job
-my $res;
-open (IN,"${jobscript}.log");
-chomp($res=<IN>);
-split(/\s+/,$res);
-my $id=$_[2];
-close(IN);
-
-print SDTERR " res:$res\n";
-print SDTERR " id:$id\n";
-
-my $failure=&check_exit_status();
-
-&kill_all_and_quit() if $failure;
-
-&remove_temporary_files();
-
-sub check_exit_status(){
- my $failure=0;
-
- print STDERR "check_exit_status of submitted job $id\n";
- open(IN,"$qsubout");
- while (<IN>){
- $failure=1 if (/exit status 1/);
- }
- close(IN);
- return $failure;
-}
-
-sub kill_all_and_quit(){
- print STDERR "kill_all_and_quit\n";
- print STDERR "qdel $id\n";
- safesystem("qdel $id");
-
- print STDERR "The submitted jobs died not correctly\n";
- print STDERR "Send qdel signal to the submitted jobs\n";
-
- exit(1);
-}
-
-sub remove_temporary_files(){
- #removing temporary files
-
- unlink("${jobscript}.bash");
- unlink("${jobscript}.log");
- unlink("$qsubout");
- unlink("$qsuberr");
- rmdir("$tmpdir");
-}
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-
diff --git a/scripts/released-files b/scripts/released-files
deleted file mode 100644
index b35391e90..000000000
--- a/scripts/released-files
+++ /dev/null
@@ -1,37 +0,0 @@
-analysis/README
-analysis/sentence-by-sentence.pl
-generic/extract-factors.pl
-generic/lopar2pos.pl
-generic/moses-parallel.pl
-generic/multi-bleu.perl
-generic/qsub-wrapper.pl
-Makefile
-README
-released-files
-training/absolutize_moses_model.pl
-training/build-generation-table.perl
-training/clean-corpus-n.perl
-training/clone_moses_model.pl
-training/cmert-0.5/bleu.py
-training/cmert-0.5/dataset.py
-training/cmert-0.5/log.py
-training/cmert-0.5/mert
-training/cmert-0.5/python/psyco/classes.py
-training/cmert-0.5/python/psyco/core.py
-training/cmert-0.5/python/psyco/__init__.py
-training/cmert-0.5/python/psyco/kdictproxy.py
-training/cmert-0.5/python/psyco/logger.py
-training/cmert-0.5/python/psyco/profiler.py
-training/cmert-0.5/python/psyco/_psyco.so
-training/cmert-0.5/python/psyco/support.py
-training/cmert-0.5/README
-training/cmert-0.5/score-nbest.py
-training/filter-model-given-input.pl
-training/mert-moses.pl
-training/phrase-extract/extract
-training/phrase-extract/score
-training/postprocess-lopar.perl
-training/reduce_combine.pl
-training/train-factored-phrase-model.perl
-training/symal/symal
-training/symal/giza2bal.pl
diff --git a/scripts/tests/README b/scripts/tests/README
deleted file mode 100644
index 2d1db6908..000000000
--- a/scripts/tests/README
+++ /dev/null
@@ -1,28 +0,0 @@
-==== Running the tests:
-
-All *.test files here are run by ../Makefile.
-Please do not run them from here, as they should depend some variables set by the ../Makefile
-
-To run all tests, issue:
- make tests
- !! in the scripts directory
-
-Ro tun a single test, issue:
- make tests/TESTNAME.test.run
- !! in the scripts directory
-
- Example:
- make tests/train-factored-test-step3.test.run
-
-
-
-==== Building new tests
-
-Each of the test should assume that it is launched in a fresh directory:
-
-tests/TESTNAME.test.TIMESTAMP/
-
-The test should prepare any files needed and then should test what there is to test.
-
-The best way is to make a copy of an older test, rename, modify and add to the CVS.
-
diff --git a/scripts/tests/epps-sample/epps.en b/scripts/tests/epps-sample/epps.en
deleted file mode 100644
index 4a8a191d9..000000000
--- a/scripts/tests/epps-sample/epps.en
+++ /dev/null
@@ -1,100 +0,0 @@
-<s> i declare resumed the session of the european parliament adjourned on thursday , 28 march 1996 . </s>
-<s> ladies and gentlemen , on behalf of the house let me welcome a delegation from the grand committee of the finnish parliament , i.e. , the european affairs committee of the finnish parliament , led by its chairman , mr erkki tuomioja . i bid you a warm welcome ! </s>
-<s> we are pleased at this visit , which reflects the increasingly close cooperation between us and the national parliaments in the union , and i wish our finnish colleagues a pleasant stay in strasbourg and , of course , useful and interesting discussions in this house ! </s>
-<s> the minutes of the sitting of thursday , 28 march 1996 have been distributed . </s>
-<s> are there any comments ? </s>
-<s> mr president , on behalf of my fellow-members from the committee on agriculture i should like to ask you to change a few things in the voting about the bse resolution . obviously one or two things have gone wrong . </s>
-<s> points 16 and 17 now contradict one another whereas the voting showed otherwise . </s>
-<s> i shall be passing on to you some comments which you could perhaps take up with regard to the voting . </s>
-<s> i will have to look into that , mrs oomen-ruijten . </s>
-<s> i cannot say anything at this stage . </s>
-<s> we will consider the matter . </s>
-<s> mr president , it concerns the speech made last week by mr fischler on bse and reported in the minutes . </s>
-<s> perhaps the commission or you could clarify a point for me . it would appear that a speech made at the weekend by mr fischler indicates a change of his position . </s>
-<s> i welcome this change because he has said that he will eat british beef and that the ban was imposed specifically for economic and political reasons . </s>
-<s> could somebody clarify that he has actually said this please , mr president , because it is a change of views . </s>
-<s> mr sturdy , i cannot see what that has to do with the minutes . </s>
-<s> mr president , on exactly the same point as mr sturdy has raised . </s>
-<s> if commission fischler has made this statement , then he has said that it is not a matter of public health . </s>
-<s> if it is not a matter of public health , he has no legal base , if he has no legal base then he may very well face legal action from the people his statement has damaged . </s>
-<s> mr smith , the same applies to your point ; it has nothing to do with the minutes . </s>
-<s> mr president , again on bse . if mr fischler has changed his views does this mean the commission has ? </s>
-<s> this was actually referred to in the minutes . </s>
-<s> he said that any funding would also go towards supporting workers who had directly lost their jobs . </s>
-<s> so far nothing has come of this in any scheme that has been put forward . </s>
-<s> does this mean the commission has changed its view on supporting workers who have lost their jobs through this scare ? </s>
-<s> i note that there are no comments on the minutes . </s>
-<s> ( parliament approved the minutes ) </s>
-<s> mr president , since our last meeting in strasbourg there was a report from the news agency afp on 26 march that thirteen irregularly employed workers were identified in a police blitz on the building site of our new hemicycle . </s>
-<s> eight of these workers were undeclared and five others were citizens of one member state employed by a building contractor in another member state who were drawing unemployment benefit in france . </s>
-<s> now that is clearly a fraud on the french taxpayer as well as a fraud on the european citizens . </s>
-<s> can we ask the bureau to look into this fact . </s>
-<s> when the european parliament , that so frequently insists on workers' rights and proper social protection , is building a hemicycle , it should make sure that the contracts concerning those employed in the buildings should provide for full social protection and full payment . </s>
-<s> can we ask you as president to make sure that is happening with the taxpayers' money for which we are collectively responsible . </s>
-<s> mr tomlinson , would you please give me the documents concerned . </s>
-<s> then i will look into the matter , both in the presidency and with the other organizations in question . </s>
-<s> you yourself know best how difficult and complicated the legal situation is between parliament and the firms contracted to build the hemicycle . </s>
-<s> after all , we have in you an expert who is in any case closely concerned with these matters . </s>
-<s> mr president , i should like to deal very briefly with two matters . </s>
-<s> first , since we condemned at the time eta's terrorist murders and kidnappings , today we can give you the news that the hostage who beats all records in spain - almost a year's captivity , 342 days , - has been freed , not , it is true , by the forces of public order , but he is at liberty . </s>
-<s> and i should be very pleased - and so , i think , would all members , particularly the spanish members and perhaps still more those who are distinguished by being spanish basques - if you would send a message to the hostage's family to congratulate them on his freedom . </s>
-<s> secondly i should like to point out , before the agenda is settled , that the importance of the bombardment of lebanese territory by the state of israel will make it necessary to ask for a statement from the council . </s>
-<s> these indiscriminate and unfair bombardments not only infringe human rights but may also pose a threat to the independence and integrity of the state of lebanon , which is guaranteed by international law and by resolution 245 of the united nations security council . </s>
-<s> thank you ! </s>
-<s> the next item is the order of business . </s>
-<s> the final version of the draft agenda as drawn up by the conference of presidents at its meeting of 11 april 1996 pursuant to rule 95 of the rules of procedure has been distributed . no amendments have been proposed . </s>
-<s> mr president , i should like to ask a question . </s>
-<s> we were too late to hand it in , but it has been discussed with a number of members . </s>
-<s> it is the following . </s>
-<s> i think that in view of present events in the middle east , we ought to ask whether the council can make a statement on wednesday afternoon about the way things are going . we note that the situation is changing every day . </s>
-<s> we see that the french government has sent a mediator . </s>
-<s> we hear nothing from the presidency . </s>
-<s> nor have we heard anything , unfortunately , from the commission . </s>
-<s> that is reason enough for us to be put in the picture . </s>
-<s> i should like to ask you to see whether it is possible for the italian presidency to do that on wednesday afternoon . </s>
-<s> we recognize that the agenda is very full . </s>
-<s> i might suggest that , at least as far as the epp group is concerned , we could perhaps take this question up on wednesday afternoon and then omit the chernobyl question , otherwise i think we shall not get through . </s>
-<s> then we could deal with chernobyl some time later . </s>
-<s> i offer that as a suggestion , mr president . </s>
-<s> if you would like to arrange things some other way so that we do not need to take chernobyl off the agenda that would suit us just as well . </s>
-<s> mr president , my group is certainly of the view that what is happening in the middle east is very significant and that we cannot let this week with parliament in full session pass without any response or discussion . </s>
-<s> could we leave it to you , mr president , to see whether the council is prepared to make a statement on the middle east and what is happening in the lebanon ? </s>
-<s> could we then look to you to see what is possible . </s>
-<s> i am reluctant , as i know many of my colleagues are , to remove chernobyl from the agenda , as suggested by mrs oomen-ruijten . </s>
-<s> however , it would be possible for us to come back to the issue of chernobyl which does not have such immediacy although it is the tenth anniversary . </s>
-<s> we could take this in two weeks' time , whereas the issue of lebanon is crucial at this moment . </s>
-<s> perhaps , mr president , we could ask you to look at this with the council and see what would be possible . </s>
-<s> mr president , i am glad to support mrs oomen-ruijten's suggestion , supported by mrs green - with one slight reservation . </s>
-<s> i think that as a matter of courtesy we should naturally ask the council whether it is ready to make a statement . </s>
-<s> but politically i am not concerned about a question of courtesy ; i am concerned about asking the council presidency to come here to explain why it not playing any part in this crisis either . </s>
-<s> i understand the french government's motives for trying to mediate . </s>
-<s> i hope the council has taken the decision that mr hervé de charette should try to mediate on behalf of the union , but all the indications are that it has not and that the french government is working independently here . </s>
-<s> that is no problem for me ; i think it is a good initiative , but again europe is absent . </s>
-<s> that was previously the case in the crisis between turkey and greece , and then mr holbrooke rightly said : ' europe is sleeping through the night' . </s>
-<s> it should not happen again , mr president . </s>
-<s> my group wants the italian presidency to come here and explain what its role is . </s>
-<s> mr president , ladies and gentlemen , i think it is important that we should discuss the situation in the middle east this week . </s>
-<s> we all agree on that . </s>
-<s> but i think it is equally important that we discuss chernobyl this week . </s>
-<s> i cannot support this proposal . </s>
-<s> the whole world is talking about the tenth anniversary . </s>
-<s> a major conference was held in vienna . </s>
-<s> there was a tribunal . </s>
-<s> and then we say we will postpone it to whenever . </s>
-<s> i really think it would show parliament in a poor light if we do not discuss it on wednesday . </s>
-<s> we must not turn it into a competition between chernobyl and the proposal to ask the council to report on the situation in the middle east this week and to debate on it , and then put chernobyl in second place ! </s>
-<s> there are various other options . </s>
-<s> we can listen to the council on the subject on wednesday morning , but please not just in a week's time . </s>
-<s> at a time when the whole world is discussing chernobyl and the effects of that disaster , we in this parliament cannot say , we shall hold back and say nothing about it ! </s>
-<s> mr president , i have already given you the reasons why the council should make a statement about the bombardment of lebanon . </s>
-<s> in discussing the euro-mediterranean conference , to which we gave the greatest prominence , we cannot then wash our hands of it and leave it to the european powers to take diplomatic steps in the middle east outside the necessary agreement within the union . </s>
-<s> that would be a bad sign for the intergovernmental conference and for the future of the european union . </s>
-<s> but i think , like mrs roth , that we cannot make the subject of chernobyl and the subject of the middle east compete in the 'free market for news items' . </s>
-<s> we must seek out the opportunity to discuss both subjects . </s>
-<s> mr president , like previous colleagues , i believe it to be absolutely essential that the council make a statement on wednesday about the middle east situation , particularly in southern lebanon . </s>
-<s> what is happening there is worrying and distressing because the whole peace process that has been under way for several months could be threatened . </s>
-<s> and the most important task is to stop the fighting and give priority to diplomatic moves , wherever they come from , provided they finally lead to the silencing of the rockets and guns and some real progress towards peace and stability in the region . </s>
-<s> we should also remember lebanon itself , which has been ravaged for many years , under the domination of one foreign power while part of its territory is occupied by another . </s>
-<s> so it is surely our duty to make a statement on the subject : we are witnessing events which , i am convinced , should persuade us to find a slot in wednesday's agenda for such a statement and the subsequent debate . </s>
-<s> ladies and gentlemen , this is one of the cases where there is no satisfactory solution because someone is bound to be disappointed and any decision is bound to be to the detriment of someone's interests . </s>
-<s> let me describe the situation to you as i see it . </s>
diff --git a/scripts/tests/epps-sample/epps.es b/scripts/tests/epps-sample/epps.es
deleted file mode 100644
index 46ced1739..000000000
--- a/scripts/tests/epps-sample/epps.es
+++ /dev/null
@@ -1,100 +0,0 @@
-<s> declaro reanudado el período de sesiones del parlamento europeo , interrumpido el 28 de marzo de 1996 . </s>
-<s> deseo dar la bienvenida a los miembros de una delegación de la " gran comisión " , es decir , la comisión de asuntos europeos , del parlamento finlandés , dirigida por su presidente , el sr. erkki tuomioja , delegación que acaba de llegar a la tribuna de invitados . </s>
-<s> nos alegramos de esta visita , que se enmarca en la cooperación cada vez más estrecha entre nosotros y los parlamentos nacionales de la unión . deseo que nuestros colegas finlandeses tengan una agradable estancia en estrasburgo y también , naturalmente , que tengamos ocasión de hablar en esta asamblea de manera provechosa e interesante . </s>
-<s> el acta de la sesión del jueves 28 de marzo de 1996 ha sido distribuida . </s>
-<s> ¿ hay alguna observación ? </s>
-<s> señor presidente , en nombre de los miembros de la comisión de agricultura quisiera rogarle que se introdujeran algunas correcciones en la resolución sobre la eeb . por lo visto se han cometido algunos errores . </s>
-<s> los puntos 16 y 17 se contradicen , mientras que la votación había reflejado otra cosa . </s>
-<s> le entregaré las observaciones realizadas al respecto . quizás pueda incluirlas en la votación . </s>
-<s> primero debo aclararlo , señora oomen-ruijten . </s>
-<s> así , de momento , no puedo pronunciarme . </s>
-<s> deberemos examinar la cuestión . </s>
-<s> señor presidente , me referiré al discurso que el sr. fischler pronunció la semana pasada en relación con la encefalopatía espongiforme bovina , que se reseñó en el acta . </s>
-<s> quizá la comisión o usted mismo pudieran aclararme una cuestión : por lo visto el sr. fischler pronunció un discurso este fin de semana en el que parecía haber cambiado de actitud . </s>
-<s> me alegra que así sea , pues el sr. fischler dijo que iba a comer carne de bovino británico y que la prohibición se debía concretamente a motivos económicos y políticos . </s>
-<s> agradecería que alguien me pudiera confirmar que eso es lo que ha dicho el sr. fischler , pues representa un cambio de actitud . </s>
-<s> señoría , no consigo ver qué tiene que ver eso con el acta . </s>
-<s> señor presidente , me referiré exactamente a la misma cuestión que el sr. sturdy . </s>
-<s> si el comisario fischler ha hecho esta declaración , entonces lo que ha dicho es que no se trata de una cuestión de salud pública . </s>
-<s> si no se trata de una cuestión de salud pública , entonces el sr. fischler carece de base jurídica , y si carece de base jurídica entonces es muy posible que le lleven ante los tribunales las personas que ha dañado con su declaración . </s>
-<s> señor smith , le digo lo mismo , que eso no tiene nada que ver con el acta . </s>
-<s> señor presidente , yo también voy a referirme a la eeb . si el sr. fischler ha cambiado de opinión , ¿ quiere decir eso que también la comisión ha cambiado de opinión ? </s>
-<s> la cuestión se mencionó en el acta . </s>
-<s> el sr. fischler dijo que los fondos que se destinasen a gran bretaña serían también para apoyar a los trabajadores que han perdido directamente sus puestos de trabajo . </s>
-<s> hasta ahora no se ha recibido ningún fondo proveniente de ese plan . </s>
-<s> ¿ quiere decir eso que la comisión ha cambiado de parecer y no va a apoyar a los trabajadores que han perdido sus puestos de trabajo como consecuencia del pánico desencadenado ? </s>
-<s> constato que no hay ninguna observación al acta . </s>
-<s> ( el acta queda aprobada ) </s>
-<s> señor presidente , después de nuestra última reunión en estrasburgo , el 26 de marzo hubo una noticia de la agencia afp según la cual 13 empleados en situación irregular fueron identificados en una incursión efectuada por la policía en las obras de edificación de nuestro nuevo hemiciclo . </s>
-<s> ocho de esos trabajadores no habían sido declarados y otros cinco eran ciudadanos de un estado miembro empleados por un aparejador de obras en otro estado miembro , que percibían un subsidio de desempleo en francia . </s>
-<s> es evidente que esto constituye un fraude para los contribuyentes franceses y también para los ciudadanos europeos . </s>
-<s> ¿ podemos pedir a la mesa que se ocupe de esta cuestión ? </s>
-<s> cuando el parlamento europeo , que tan frecuentemente insiste en los derechos de los trabajadores y en la debida protección social , está construyendo un hemiciclo , debería cerciorarse de que los contratos de las personas que trabajan en las obras prevén la debida remuneración y la plena protección social . </s>
-<s> ¿ podemos pedirle a usted , como presidente , que averigüe si eso está sucediendo con el dinero de los contribuyentes del que todos somos responsables a título colectivo ? </s>
-<s> señoría , déme esos documentos , por favor . </s>
-<s> examinaré el asunto tanto con la mesa como con todas las organizaciones afectadas . </s>
-<s> usted sabe perfectamente lo difícil y complicada que es la situación jurídica entre el parlamento y las empresas que tienen la contrata de la construcción del edificio . </s>
-<s> en cualquier caso , en usted tenemos un experto que se ocupa con rigor de estas cuestiones . </s>
-<s> señor presidente , quiero tratar muy brevemente de dos cuestiones . </s>
-<s> en primer lugar , y puesto que hemos denunciado en su día los atentados terroristas y los secuestros de eta , hoy podemos darles la noticia de que el secuestrado que bate todos los récords en españa -casi un año de secuestro , 342 días - ha sido liberado . cierto que no por las fuerzas de orden público , pero está en libertad . </s>
-<s> y a mí me gustaría -y creo que a todos los diputados , especialmente los diputados españoles , y quizá más todavía los que tienen la singularidad de ser españoles vascos - , que usted se dirigiera a la familia del secuestrado para felicitarle por su puesta en libertad . </s>
-<s> en segundo lugar , quiero hacerle notar , antes de la fijación del orden del día , que la importancia que tienen los bombardeos del estado de israel en territorio libanés exigiría pedir una declaración del consejo . </s>
-<s> con esos bombardeos indiscriminados e injustos no solamente se conculcan los derechos humanos , sino que puede ponerse también en peligro la independencia y la integridad del estado del líbano , que está garantizada por el derecho internacional y por la resolución 245 del consejo de seguridad de las naciones unidas . </s>
-<s> muchas gracias . </s>
-<s> procedemos a continuación a la fijación del orden de los trabajos . </s>
-<s> se ha distribuido el proyecto definitivo de orden del día establecido , de conformidad con el artículo 95 del reglamento , por la conferencia de presidentes en la reunión del 11 de abril de 1996 , al cual no se han propuesto las siguientes modificaciones . </s>
-<s> señor presidente , quisiera plantear una pregunta . </s>
-<s> hemos llegado demasiado tarde para presentarla , pero ya hemos hablado de ello con varios diputados . </s>
-<s> se trata de lo siguiente . </s>
-<s> debido a los acontecimientos que están teniendo lugar en estos momentos en oriente medio , creo que deberíamos preguntar al consejo si el miércoles por la tarde no puede emitir una declaración sobre la situación . </s>
-<s> constatamos que la situación cambia día a día . </s>
-<s> vemos que el gobierno francés ha enviado a un mediador . </s>
-<s> por desgracia , todavía no hemos oído nada de la comisión europea . </s>
-<s> razón de más para ponernos al día . </s>
-<s> pediría que se considerara la posibilidad de que lo hiciera la presidencia italiana el miércoles por la tarde . </s>
-<s> somos conscientes de que el orden del día es muy apretado . </s>
-<s> y podría decir como sugerencia que nosotros , por lo menos el grupo del ppe , estaríamos dispuestos a incluir esta cuestión el miércoles por la tarde y suprimir la cuestión de chernóbil , porque creo que de otro modo no nos dará tiempo de acabar . </s>
-<s> en tal caso podríamos tratar chernóbil un poco más tarde . </s>
-<s> es una sugerencia , señor presidente , que le hago a usted . </s>
-<s> si cree usted poder solucionarlo de otro modo , sin necesidad de suprimir la cuestión de chernóbil del orden del día , mejor que mejor . </s>
-<s> señor presidente , no hace falta decir que mi grupo opina que lo que está sucediendo en el oriente medio es importantísimo y que no podemos dejar pasar esta semana , en la que el parlamento está reunido , sin manifestar nuestra reacción o discutir la cuestión . </s>
-<s> señor presidente , ¿ podría encargarse de averiguar si el consejo está dispuesto a hacer una declaración sobre el oriente medio y sobre lo que está sucediendo en el líbano ? </s>
-<s> ¿ podemos confiar en que averigüe usted lo que se pueda hacer ? </s>
-<s> me cuesta mucho trabajo , como a muchos de mis colegas , acceder a que desaparezca chernóbil del orden del día , según ha sugerido la sra. oomen-ruijten . </s>
-<s> ahora bien , ¿ no podríamos ocuparnos más adelante de la cuestión de chernóbil , que no es de una urgencia tan inmediata a pesar de que se trata de su décimo aniversario ? </s>
-<s> podríamos ocuparnos de la cuestión dentro de dos semanas , mientras que la cuestión del líbano reviste importancia capital en estos precisos momentos . </s>
-<s> señor presidente , quizá pudiera discutir usted esta cuestión con el consejo y ver lo que cabe hacer . </s>
-<s> señor presidente , me sumo a la sugerencia de la sra. oomen-ruijten , tal como la ha apoyado la sra. green , es decir , con una observación . </s>
-<s> pienso que por educación hemos de preguntar al consejo si está dispuesto a hacer una declaración . </s>
-<s> pero , en lo que respecta a la política , no me interesan las preguntas educadas ; lo que me interesa es que la presidencia del consejo venga aquí a explicar por qué en esta crisis también está del todo ausente . </s>
-<s> comprendo las razones del gobierno francés para intentar mediar . </s>
-<s> espero que el consejo haya decidido que el sr. hervé de charette intente mediar en nombre de la unión , pero todo parece indicar que ocurre lo contrario , que el gobierno francés actúa por su lado en esta cuestión . </s>
-<s> eso me parece bien ; me parece una buena iniciativa , pero una vez más , europa está ausente . </s>
-<s> este ya fue el caso en la crisis entre turquía y grecia , y entonces el sr. holbrooke dijo con razón : europe is sleeping through the night . </s>
-<s> que no vuelva a repetirse , señor presidente . </s>
-<s> mi grupo exige que la presidencia italiana haga acto de presencia y explique cuál es su papel . </s>
-<s> señor presidente , señorías , me parece importante que hablemos esta semana de la situación en oriente medio . </s>
-<s> en esto estamos todos de acuerdo . </s>
-<s> pero me parece igualmente importante y necesario que hablemos esta semana de chernóbil . </s>
-<s> no puedo entender esta propuesta . </s>
-<s> el mundo entero habla del décimo aniversario . </s>
-<s> se ha celebrado en viena una gran conferencia . </s>
-<s> hubo un tribunal . </s>
-<s> y nosotros decimos que aplazamos la cuestión hasta una fecha no determinada . </s>
-<s> verdaderamente , considero que no tratar el tema el miércoles es un testimonio de pobreza por parte del parlamento . </s>
-<s> no debemos permitir que la pugna entre la propuesta al consejo de que informe esta semana sobre la situación en oriente medio y que debata el tema con nosotros y la cuestión de chernóbil dé lugar a un aplazamiento de esta última . </s>
-<s> existen otras posibilidades . </s>
-<s> podemos escuchar al consejo sobre este punto el miércoles por la mañana , pero , por favor , no dentro de una semana . </s>
-<s> en el momento en que el mundo entero habla de chernóbil y de las repercusiones de esta catástrofe , nosotros no podemos , como parlamento , retraernos y no decir nada en absoluto . </s>
-<s> señor presidente , yo ya le he dado las razones para que el consejo haga una declaración sobre los bombardeos del líbano . </s>
-<s> cuando estamos hablando de la conferencia euromediterránea , a la que hemos dado el máximo relieve , no podemos desentendernos luego y dejar que las diplomacias de las potencias europeas intervengan en oriente próximo al margen de la necesaria concertación en el seno de la unión . </s>
-<s> sería una mala señal para la conferencia intergubernamental y para el futuro de la unión europea . </s>
-<s> pero opino , como la sra. roth , que no se puede hacer competir en el " libre mercado de los temas de la actualidad " el tema de chernóbil y el tema de oriente próximo . </s>
-<s> hay que buscar la posibilidad de discutir sobre los dos temas . </s>
-<s> señor presidente , tal como han dicho los colegas que se han expresado antes de mí , creo que es totalmente indispensable que el miércoles el consejo haga una declaración relativa a la situación en oriente medio y , en particular , en el sur del líbano . </s>
-<s> lo que ocurre allí nos preocupa y angustia , en la medida en que peligra todo el proceso de paz iniciado ahora hace unos cuantos meses . </s>
-<s> lo que importa ante todo es que cesen los combates y , por lo tanto , que se dé prioridad a las iniciativas diplomáticas , vengan de donde vengan , siempre que con estos esfuerzos se obtenga algún resultado , esto es , que los misiles y los cañones enmudezcan y que se progrese de verdad en el camino hacia la paz y la estabilidad en la región . </s>
-<s> asimismo debemos ocuparnos del líbano , que es un estado asolado desde hace años , bajo dominio de una potencia extranjera y , al mismo tiempo , con una parte de su territorio ocupado por otra potencia extranjera . </s>
-<s> debemos pues pronunciarnos al respecto , ya que estamos ante un tema de actualidad que , estoy convencido de ello , debe hacer que encontremos un hueco en nuestro orden del día del miércoles para situar esta declaración y el debate subsiguiente . </s>
-<s> señorías , estamos ante uno de esos casos para los que no existe solución satisfactoria porque siempre va en perjuicio de alguien y la decisión atenta siempre contra los intereses de alguien . </s>
-<s> les expondré la situación desde mi punto de vista . </s>
diff --git a/scripts/tests/epps-sample/giza.en-es/en-es.A3.final.gz b/scripts/tests/epps-sample/giza.en-es/en-es.A3.final.gz
deleted file mode 100644
index e3e49125d..000000000
--- a/scripts/tests/epps-sample/giza.en-es/en-es.A3.final.gz
+++ /dev/null
Binary files differ
diff --git a/scripts/tests/epps-sample/giza.es-en/es-en.A3.final.gz b/scripts/tests/epps-sample/giza.es-en/es-en.A3.final.gz
deleted file mode 100644
index 0d2cafc88..000000000
--- a/scripts/tests/epps-sample/giza.es-en/es-en.A3.final.gz
+++ /dev/null
Binary files differ
diff --git a/scripts/tests/mert-moses-parallel.test b/scripts/tests/mert-moses-parallel.test
deleted file mode 100755
index a6228ed68..000000000
--- a/scripts/tests/mert-moses-parallel.test
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-
-[ -d $WORKSPACE ] || exit 1
-echo "Workspace: $WORKSPACE"
-
-MOSESCMD=$WORKSPACE/moses-cmd/src/moses
-DATADIR=/export/ws06osmt/example/met-parallel
-export SCRIPTS_ROOTDIR=$WORKSPACE/scripts
-
-[ -e $MOSESCMD ] || exit 1
-echo "Moses: $MOSESCMD"
-
-
-cp $DATADIR/moses.ini ./ || exit 1
-cp $DATADIR/dev.input ./ || exit 1
-cp $DATADIR/dev.ref ./ || exit 1
-
-echo "Starting mert-moses"
-
-$SCRIPTS_ROOTDIR/training/mert-moses.pl --jobs=10 dev.input dev.ref $MOSESCMD ./moses.ini \
-|| exit 1
-
-echo "Success"
diff --git a/scripts/tests/mert-moses-serial.test b/scripts/tests/mert-moses-serial.test
deleted file mode 100755
index 29c7b199f..000000000
--- a/scripts/tests/mert-moses-serial.test
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-[ -d $WORKSPACE ] || exit 1
-
-MOSESCMD=$WORKSPACE/moses-cmd/src/moses
-DATADIR=/export/ws06osmt/example/met-parallel
-export SCRIPTS_ROOTDIR=$WORKSPACE/scripts
-
-[ -e $MOSESCMD ] || exit 1
-
-cp $DATADIR/moses.ini ./ || exit 1
-cp $DATADIR/dev.input ./ || exit 1
-cp $DATADIR/dev.ref ./ || exit 1
-
-echo "Starting mert-moses"
-
-$SCRIPTS_ROOTDIR/training/mert-moses.pl dev.input dev.ref $MOSESCMD ./moses.ini \
-|| exit 1
-
-echo "Success"
diff --git a/scripts/tests/train-factored-test-step3.test b/scripts/tests/train-factored-test-step3.test
deleted file mode 100755
index cf0648de8..000000000
--- a/scripts/tests/train-factored-test-step3.test
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-[ -d $WORKSPACE ] || exit 1
-echo "Workspace: $WORKSPACE"
-
-export SCRIPTS_ROOTDIR=$WORKSPACE/scripts
-
-cp -r ../epps-sample ./corpus || exit 1
-echo "Copied epps-sample files"
-
-echo "Starting training script."
-
-$SCRIPTS_ROOTDIR/training/train-factored-phrase-model.perl \
- --f es --e en \
- --translation-factors 0-0 \
- --decoding-steps t0 \
- --first-step 3 \
- --last-step 3 \
- --corpus corpus \
- --root corpus \
- || exit 1
-
-echo "Success."
diff --git a/scripts/tests/train-factored-test-step9.test b/scripts/tests/train-factored-test-step9.test
deleted file mode 100755
index 06fef86de..000000000
--- a/scripts/tests/train-factored-test-step9.test
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-[ -d $WORKSPACE ] || exit 1
-echo "Workspace: $WORKSPACE"
-
-export SCRIPTS_ROOTDIR=$WORKSPACE/scripts
-
-
-echo "fake" > lm0.3gr
-echo "fake" > lm0.4gr
-
-mkdir model || exit 1
-
-echo "Starting training script."
-
-$SCRIPTS_ROOTDIR/training/train-factored-phrase-model.perl \
- --f src --e tgt \
- --lm 0:3:lm0.3gr \
- --lm 0:4:lm0.4gr \
- --decoding-steps t0,g0,t1,g1,g2 \
- --translation-factors 0-0+1-1 \
- --generation-factors 0-0+0-1+0,1-2 \
- --first-step 9 \
- || exit 1
-
-echo "Success"
diff --git a/scripts/training/absolutize_moses_model.pl b/scripts/training/absolutize_moses_model.pl
deleted file mode 100755
index ee0ec981c..000000000
--- a/scripts/training/absolutize_moses_model.pl
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/perl
-# given a moses.ini file, prints a copy to stdout but replaces all relative
-# paths with absolute paths.
-#
-# Ondrej Bojar.
-
-my $ini = shift;
-die "usage: absolutize_moses_model.pl path-to-moses.ini > moses.abs.ini"
- if !defined $ini;
-
-open INI, $ini or die "Can't read $ini";
-while (<INI>) {
- if (/^\[([^\]]*)\]\s*$/) {
- $section = $1;
- }
- if (/^[0-9]/) {
- if ($section eq "ttable-file" || $section eq "lmodel-file") {
- chomp;
- my ($a, $b, $c, $fn) = split / /;
- $abs = ensure_absolute($fn, $ini);
- die "File not found or empty: $fn (interpreted as $abs)"
- if ! -s $abs;
- $_ = "$a $b $c $abs\n";
- }
- if ($section eq "generation-file") {
- chomp;
- my ($a, $b, $c, $fn) = split / /;
- $abs = ensure_absolute($fn, $ini);
- die "File not found or empty: $fn (interpreted as $abs)"
- if ! -s $abs;
- $_ = "$a $b $c $abs\n";
- }
- if ($section eq "distortion-file") {
- chomp;
- my $fn = $_;
- $abs = ensure_absolute($fn, $ini);
- die "File not found or empty: $fn (interpreted as $abs)"
- if ! -s $abs;
- $_ = "$abs\n";
- }
- }
- print $_;
-}
-close INI;
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-
-sub ensure_absolute {
- my $target = shift;
- my $originfile = shift;
-
- my $cwd = `pwd`;
- chomp $cwd;
- $cwd.="/";
-
- my $absorigin = ensure_relative_to_origin($originfile, $cwd);
- return ensure_relative_to_origin($target, $absorigin);
-}
-
-sub ensure_relative_to_origin {
- my $target = shift;
- my $originfile = shift;
- return $target if $target =~ /^\/|^~/; # the target path is absolute already
- $originfile =~ s/[^\/]*$//; # where does the origin reside
- my $out = $originfile."/".$target;
- $out =~ s/\/+/\//g;
- $out =~ s/\/(\.\/)+/\//g;
- return $out;
-}
diff --git a/scripts/training/analyse_moses_model.pl b/scripts/training/analyse_moses_model.pl
deleted file mode 100755
index 47f549a00..000000000
--- a/scripts/training/analyse_moses_model.pl
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/perl
-# given a moses.ini file, checks the translation and generation tables and reports
-# statistics on ambiguity
-
-use warnings;
-use strict;
-use Getopt::Long;
-
-GetOptions(
-);
-
-my $ini = shift;
-die "usage!" if !defined $ini;
-
-open INI, $ini or die "Can't read $ini";
-my $section = undef;
-while (<INI>) {
- if (/^\[([^\]]*)\]\s*$/) {
- $section = $1;
- }
- if (/^[0-9]/) {
- if ($section eq "ttable-file") {
- chomp;
- my ($src, $tgt, $c, $fn) = split / /;
- # $fn = ensure_relative_to_origin($fn, $ini);
- my $ttstats = get_ttable_stats($fn);
- print_ttable_stats($src, $tgt, $fn, $ttstats);
- }
- if ($section eq "lmodel-file") {
- chomp;
- my ($a, $factor, $c, $fn) = split / /;
- # $fn = ensure_relative_to_origin($fn, $ini);
- my $lmstats = get_lmodel_stats($fn);
- print_lmodel_stats($factor, $fn, $lmstats);
- }
- if ($section eq "generation-file") {
- chomp;
- my ($src, $tgt, $c, $fn) = split / /;
- # $fn = ensure_relative_to_origin($fn, $ini);
- my $gstats = get_generation_stats($fn);
- print_generation_stats($src, $tgt, $fn, $gstats);
- }
- }
-}
-close INI;
-
-
-
-sub ensure_relative_to_origin {
- my $target = shift;
- my $originfile = shift;
- return $target if $target =~ /^\/|^~/; # the target path is absolute already
- $originfile =~ s/[^\/]*$//;
- return $originfile."/".$target;
-}
-
-
-sub get_ttable_stats {
- my $fn = shift;
- my $opn = $fn =~ /\.gz$/ ? "zcat $fn |" : $fn;
- open IN, $opn or die "Can't open $opn";
- my $totphrs = 0;
- my $srcphrs = 0;
- my $lastsrc = undef;
- while (<IN>) {
- chomp;
- my ($src, $tgt, undef) = split /\|\|\|/;
- $totphrs ++;
- next if defined $lastsrc && $src eq $lastsrc;
- $lastsrc = $src;
- $srcphrs ++;
- }
- die "No phrases in $fn!" if !$totphrs;
- return { "totphrs"=>$totphrs, "srcphrs"=>$srcphrs };
-}
-
-sub print_ttable_stats {
- my ($src, $tgt, $fn, $stat) = @_;
- print "Translation $src -> $tgt ($fn):\n";
- print " $stat->{totphrs}\tphrases total\n";
- printf " %.2f\tphrases per source phrase\n", $stat->{totphrs}/$stat->{srcphrs};
-}
-
-sub get_generation_stats {
- my $fn = shift;
- my $opn = $fn =~ /\.gz$/ ? "zcat $fn |" : $fn;
- open IN, $opn or die "Can't open $opn";
- my $totphrs = 0;
- my $srcphrs = 0;
- my $lastsrc = undef;
- while (<IN>) {
- chomp;
- my ($src, $tgt, undef) = split /\s+/;
- $totphrs ++;
- next if defined $lastsrc && $src eq $lastsrc;
- $lastsrc = $src;
- $srcphrs ++;
- }
- die "No items in $fn!" if !$totphrs;
- return { "tot"=>$totphrs, "src"=>$srcphrs };
-}
-
-sub print_generation_stats {
- my ($src, $tgt, $fn, $stat) = @_;
- print "Generation $src -> $tgt ($fn):\n";
- printf " %.2f\toutputs per source token\n", $stat->{tot}/$stat->{src};
-}
-
-sub get_lmodel_stats {
- my $fn = shift;
- my $opn = $fn =~ /\.gz$/ ? "zcat $fn |" : $fn;
- open IN, $opn or die "Can't open $opn";
- my %cnts;
- while (<IN>) {
- chomp;
- last if /^\\1-grams/;
- $cnts{$1} = $2 if /^ngram ([0-9]+)=([0-9]+)$/;
- }
- return { "ngrams"=>\%cnts };
-}
-
-sub print_lmodel_stats {
- my ($fact, $fn, $stat) = @_;
- print "Language model over $fact ($fn):\n";
- my @ngrams = sort {$a<=>$b} keys %{$stat->{ngrams}};
- print " ".join("\t", @ngrams)."\n";
- print " ".join("\t", map {$stat->{ngrams}->{$_}} @ngrams)."\n";
-}
-
-
diff --git a/scripts/training/build-generation-table.perl b/scripts/training/build-generation-table.perl
deleted file mode 100755
index 372b1f352..000000000
--- a/scripts/training/build-generation-table.perl
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use Getopt::Long "GetOptions";
-
-my $_CORPUS;
-my $_OUTPUT = "generation";
-my $_GENERATION_FACTORS;
-
-die "specify options" unless &GetOptions('corpus=s' => \$_CORPUS,
- 'output=s' => \$_OUTPUT,
- 'generation-factors=s' => \$_GENERATION_FACTORS);
-
-
-die "Please use --corpus to specify the factored input corpus\n" unless $_CORPUS;
-
-if (! defined $_GENERATION_FACTORS) {
- die "Please use --generation-factors to set generation factors\n";
-}
-
-my $___GENERATION_FACTORS = $_GENERATION_FACTORS || "0-0";
-die("format for generation factors is \"0-1\" or \"0-1+0-2\" or \"0-1+0,1-1,2\", you provided $___GENERATION_FACTORS\n")
- if $___GENERATION_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*(\+\d+(\,\d+)*\-\d+(\,\d+)*)*$/;
-
-print "output=$_OUTPUT.<factor-map>\n";
-
-get_generation_factored();
-print "Done\n";
-exit 0;
-
-sub get_generation_factored {
- print STDERR "(8) learn generation model @ ".`date`;
- foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {
- my $factor = $f;
- my ($factor_e_source,$factor_e) = split(/\-/,$factor);
- &get_generation($factor, $factor_e_source, $factor_e);
- }
-}
-
-
-sub get_generation {
- my ($factor, $factor_e_source, $factor_e) = @_;
-
- print STDERR "(8) [$factor] generate generation table @ ".`date`;
- my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);
-
- my %INCLUDE_SOURCE;
- foreach my $factor (split(/,/,$factor_e_source)) {
-
- $INCLUDE_SOURCE{$factor} = 1;
- }
- my %INCLUDE;
- foreach my $factor (split(/,/,$factor_e)) {
- $INCLUDE{$factor} = 1;
- }
-
- my (%GENERATION,%GENERATION_TOTAL_SOURCE,%GENERATION_TOTAL_TARGET);
- open(E,$_CORPUS) or die "Can't read ".$_CORPUS;
- while(<E>) {
- chomp;
- foreach (split) {
- my @FACTOR = split(/\|/);
-
- my ($source,$target);
- my $first_factor = 1;
- foreach my $factor (split(/,/,$factor_e_source)) {
- $source .= "|" unless $first_factor;
- $first_factor = 0;
- $source .= $FACTOR[$factor];
- }
-
- $first_factor = 1;
- foreach my $factor (split(/,/,$factor_e)) {
- $target .= "|" unless $first_factor;
- $first_factor = 0;
- $target .= $FACTOR[$factor];
- }
- $GENERATION{$source}{$target}++;
- $GENERATION_TOTAL_SOURCE{$source}++;
- $GENERATION_TOTAL_TARGET{$target}++;
- }
- }
- close(E);
-
- open(GEN,">$_OUTPUT.$factor") or die "Can't write $_OUTPUT.$factor";
- foreach my $source (keys %GENERATION) {
- foreach my $target (keys %{$GENERATION{$source}}) {
- printf GEN ("%s %s %.7f %.7f\n",$source,$target,
- $GENERATION{$source}{$target}/$GENERATION_TOTAL_SOURCE{$source},
- $GENERATION{$source}{$target}/$GENERATION_TOTAL_TARGET{$target});
- }
- }
- close(GEN);
- safesystem("rm -f $_OUTPUT.$factor.gz") or die;
- safesystem("gzip $_OUTPUT.$factor") or die;
-}
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-
diff --git a/scripts/training/clean-corpus-n.perl b/scripts/training/clean-corpus-n.perl
deleted file mode 100755
index 40c6362f2..000000000
--- a/scripts/training/clean-corpus-n.perl
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-use Getopt::Long;
-my $help;
-my $lc = 0; # lowercase the corpus?
-my $enc = "utf8"; # encoding of the input and output files
- # set to anything else you wish, but I have not tested it yet
-
-GetOptions(
- "help" => \$help,
- "lowercase|lc" => \$lc,
- "encoding" => \$enc,
-) or exit(1);
-
-if (scalar(@ARGV) != 6 || $help) {
- print "syntax: clean-corpus-n.perl corpus l1 l2 clean-corpus min max\n";
- exit;
-}
-
-my $corpus = $ARGV[0];
-my $l1 = $ARGV[1];
-my $l2 = $ARGV[2];
-my $out = $ARGV[3];
-my $min = $ARGV[4];
-my $max = $ARGV[5];
-
-print STDERR "clean-corpus.perl: processing $corpus.$l1 & .$l2 to $out, cutoff $min-$max\n";
-
-open(F,"$corpus.$l1") or die "Can't read $corpus.$l1";
-open(E,"$corpus.$l2") or die "Can't read $corpus.$l2";
-open(FO,">$out.$l1") or die "Can't write $out.$l1";
-open(EO,">$out.$l2") or die "Can't write $out.$l2";
-
-# necessary for proper lowercasing
-my $binmode;
-if ($enc eq "utf8") {
- $binmode = ":utf8";
-} else {
- $binmode = ":encoding($enc)";
-}
-binmode(F, $binmode);
-binmode(E, $binmode);
-binmode(FO, $binmode);
-binmode(EO, $binmode);
-
-my $innr = 0;
-my $outnr = 0;
-while(my $f = <F>) {
- $innr++;
- print STDERR "." if $innr % 10000 == 0;
- print STDERR "($innr)" if $innr % 100000 == 0;
- my $e = <E>;
- die "$corpus.$l2 is too short!" if !defined $e;
- chomp($e);
- chomp($f);
-
- #if lowercasing, lowercase
- if ($lc) {
- $e = lc($e);
- $f = lc($f);
- }
-
- # $e =~ s/\|//g; # kinda hurts in factored input
- $e =~ s/\s+/ /g;
- $e =~ s/^ //;
- $e =~ s/ $//;
- # $f =~ s/\|//g; # kinda hurts in factored input
- $f =~ s/\s+/ /g;
- $f =~ s/^ //;
- $f =~ s/ $//;
- next if $f eq '';
- next if $e eq '';
- my @E = split(/ /,$e);
- my @F = split(/ /,$f);
- next if scalar(@E) > $max;
- next if scalar(@F) > $max;
- next if scalar(@E) < $min;
- next if scalar(@F) < $min;
- next if scalar(@E)/scalar(@F) > 9;
- next if scalar(@F)/scalar(@E) > 9;
-
- # An extra check: none of the factors can be blank!
- die "There is a blank factor in $corpus.$l1 on line $innr: $f"
- if $f =~ /[ \|]\|/;
- die "There is a blank factor in $corpus.$l2 on line $innr: $f"
- if $e =~ /[ \|]\|/;
-
-
- $outnr++;
- print FO $f."\n";
- print EO $e."\n";
-}
-print STDERR "\n";
-my $e = <E>;
-die "$corpus.$l2 is too long!" if defined $e;
-
-print STDERR "Input sentences: $innr Output sentences: $outnr\n";
diff --git a/scripts/training/clone_moses_model.pl b/scripts/training/clone_moses_model.pl
deleted file mode 100755
index 2395284cb..000000000
--- a/scripts/training/clone_moses_model.pl
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/usr/bin/perl
-# given a moses.ini file, creates a wiseln of it and all the included bits
-# in the current directory
-
-# relies on wiseln, a wise variant of linking. You might just use ln -s instead.
-
-use strict;
-use Getopt::Long;
-
-my @fixpath = ();
- # specify search-replace pattern to fix paths.
- # use a space to delimit source and target pathnames
-GetOptions(
- "fixpath=s" => \@fixpath,
-);
-my @fixrepls = map {
- my ($fixsrc, $fixtgt) = split / /, $_;
- print STDERR "Will replace >$fixsrc< with >$fixtgt<\n";
- [ $fixsrc, $fixtgt ];
- } @fixpath;
-
-my $ini = shift;
-die "usage!" if !defined $ini;
-
-my %cnt; # count files per section
-open INI, $ini or die "Can't read $ini";
-open OUT, ">moses.ini" or die "Can't write ./moses.ini";
-my $section = undef;
-while (<INI>) {
- if (/^\[([^\]]*)\]\s*$/) {
- $section = $1;
- }
- if (/^[0-9]/) {
- if ($section eq "ttable-file" || $section eq "lmodel-file") {
- chomp;
- my ($a, $b, $c, $fn) = split / /;
- $cnt{$section}++;
- my $suffix = ($fn =~ /\.gz$/ ? ".gz" : "");
- $fn = fixpath($fn);
- $fn = ensure_relative_to_origin($fn, $ini);
- safesystem("wiseln $fn ./$section.$cnt{$section}$suffix") or die;
- $_ = "$a $b $c ./$section.$cnt{$section}$suffix\n";
- }
- if ($section eq "generation-file") {
- chomp;
- my ($a, $b, $c, $fn) = split / /;
- $cnt{$section}++;
- my $suffix = ($fn =~ /\.gz$/ ? ".gz" : "");
- $fn = fixpath($fn);
- safesystem("wiseln $fn ./$section.$cnt{$section}$suffix") or die;
- $_ = "$a $b $c ./$section.$cnt{$section}$suffix\n";
- }
- if ($section eq "distortion-file") {
- chomp;
- my $fn = $_;
- $cnt{$section}++;
- my $suffix = ($fn =~ /\.gz$/ ? ".gz" : "");
- $fn = fixpath($fn);
- $fn = ensure_relative_to_origin($fn, $ini);
- safesystem("wiseln $fn ./$section.$cnt{$section}$suffix") or die;
- $_ = "./$section.$cnt{$section}$suffix\n";
- }
- }
- print OUT $_;
-}
-close INI;
-close OUT;
-
-
-sub fixpath {
- my $fn = shift;
- foreach my $pair (@fixrepls) {
- $fn =~ s/$pair->[0]/$pair->[1]/g;
- }
- return $fn;
-}
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-
-sub ensure_relative_to_origin {
- my $target = shift;
- my $originfile = shift;
- return $target if $target =~ /^\/|^~/; # the target path is absolute already
- $originfile =~ s/[^\/]*$//;
- return $originfile."/".$target;
-}
diff --git a/scripts/training/cmert-0.5/Makefile b/scripts/training/cmert-0.5/Makefile
deleted file mode 100755
index d2f446443..000000000
--- a/scripts/training/cmert-0.5/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-CC=gcc
-OBJS=mert.o data.o point.o score.o
-CFLAGS=-O3
-LDFLAGS=
-LDLIBS=-lm
-
-mert: $(OBJS)
- gcc $(OBJS) $(LDLIBS) -o $@
-
-mert_p: $(OBJS)
- gcc $(LDFLAGS) $(OBJS) $(LDLIBS) -o $@
diff --git a/scripts/training/cmert-0.5/README b/scripts/training/cmert-0.5/README
deleted file mode 100755
index 0642abf47..000000000
--- a/scripts/training/cmert-0.5/README
+++ /dev/null
@@ -1,9 +0,0 @@
-CMERT 0.5
-5 Nov 2005
-Copyright (c) 2005 David Chiang. All rights reserved (for now).
-
-Minimalist installation instructions:
-
-- make
-- set #! lines and sys.path lines in Python scripts
-- see run-cmert for example
diff --git a/scripts/training/cmert-0.5/bleu.py b/scripts/training/cmert-0.5/bleu.py
deleted file mode 100755
index 6a8dd42b0..000000000
--- a/scripts/training/cmert-0.5/bleu.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/python2.3
-
-'''Provides:
-
-cook_refs(refs, n=4): Transform a list of reference sentences as strings into a form usable by cook_test().
-cook_test(test, refs, n=4): Transform a test sentence as a string (together with the cooked reference sentences) into a form usable by score_cooked().
-score_cooked(alltest, n=4): Score a list of cooked test sentences.
-
-score_set(s, testid, refids, n=4): Interface with dataset.py; calculate BLEU score of testid against refids.
-
-The reason for breaking the BLEU computation into three phases cook_refs(), cook_test(), and score_cooked() is to allow the caller to calculate BLEU scores for multiple test sets as efficiently as possible.
-'''
-
-import optparse
-import sys, math, re, xml.sax.saxutils
-sys.path.append('/fs/clip-mteval/Programs/hiero')
-import dataset
-import log
-
-# Added to bypass NIST-style pre-processing of hyp and ref files -- wade
-nonorm = 0
-
-preserve_case = False
-eff_ref_len = "shortest"
-
-normalize1 = [
- ('<skipped>', ''), # strip "skipped" tags
- (r'-\n', ''), # strip end-of-line hyphenation and join lines
- (r'\n', ' '), # join lines
-# (r'(\d)\s+(?=\d)', r'\1'), # join digits
-]
-normalize1 = [(re.compile(pattern), replace) for (pattern, replace) in normalize1]
-
-normalize2 = [
- (r'([\{-\~\[-\` -\&\(-\+\:-\@\/])',r' \1 '), # tokenize punctuation. apostrophe is missing
- (r'([^0-9])([\.,])',r'\1 \2 '), # tokenize period and comma unless preceded by a digit
- (r'([\.,])([^0-9])',r' \1 \2'), # tokenize period and comma unless followed by a digit
- (r'([0-9])(-)',r'\1 \2 ') # tokenize dash when preceded by a digit
-]
-normalize2 = [(re.compile(pattern), replace) for (pattern, replace) in normalize2]
-
-def normalize(s):
- '''Normalize and tokenize text. This is lifted from NIST mteval-v11a.pl.'''
- # Added to bypass NIST-style pre-processing of hyp and ref files -- wade
- if (nonorm):
- return s.split()
- if type(s) is not str:
- s = " ".join(s)
- # language-independent part:
- for (pattern, replace) in normalize1:
- s = re.sub(pattern, replace, s)
- s = xml.sax.saxutils.unescape(s, {'&quot;':'"'})
- # language-dependent part (assuming Western languages):
- s = " %s " % s
- if not preserve_case:
- s = s.lower() # this might not be identical to the original
- for (pattern, replace) in normalize2:
- s = re.sub(pattern, replace, s)
- return s.split()
-
-def count_ngrams(words, n=4):
- counts = {}
- for k in xrange(1,n+1):
- for i in xrange(len(words)-k+1):
- ngram = tuple(words[i:i+k])
- counts[ngram] = counts.get(ngram, 0)+1
- return counts
-
-def cook_refs(refs, n=4):
- '''Takes a list of reference sentences for a single segment
- and returns an object that encapsulates everything that BLEU
- needs to know about them.'''
-
- refs = [normalize(ref) for ref in refs]
- maxcounts = {}
- for ref in refs:
- counts = count_ngrams(ref, n)
- for (ngram,count) in counts.iteritems():
- maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
- return ([len(ref) for ref in refs], maxcounts)
-
-def cook_test(test, (reflens, refmaxcounts), n=4):
- '''Takes a test sentence and returns an object that
- encapsulates everything that BLEU needs to know about it.'''
-
- test = normalize(test)
- result = {}
- result["testlen"] = len(test)
-
- # Calculate effective reference sentence length.
-
- if eff_ref_len == "shortest":
- result["reflen"] = min(reflens)
- elif eff_ref_len == "average":
- result["reflen"] = float(sum(reflens))/len(reflens)
-
- # Original:
- '''min_diff = None
- for reflen in reflens:
- if min_diff is None or abs(reflen-len(test)) < min_diff:
- min_diff = abs(reflen-len(test))
- result['reflen'] = reflen'''
-
- result["guess"] = [len(test)-k+1 for k in xrange(1,n+1)]
-
- result['correct'] = [0]*n
- counts = count_ngrams(test, n)
- for (ngram, count) in counts.iteritems():
- result["correct"][len(ngram)-1] += min(refmaxcounts.get(ngram,0), count)
-
- return result
-
-def score_cooked(allcomps, n=4):
- totalcomps = {'testlen':0, 'reflen':0, 'guess':[0]*n, 'correct':[0]*n}
- for comps in allcomps:
- for key in ['testlen','reflen']:
- totalcomps[key] += comps[key]
- for key in ['guess','correct']:
- for k in xrange(n):
- totalcomps[key][k] += comps[key][k]
- logbleu = 0.0
- for k in xrange(n):
- if totalcomps['correct'][k] == 0:
- return 0.0
- log.write("%d-grams: %f\n" % (k,float(totalcomps['correct'][k])/totalcomps['guess'][k]))
- logbleu += math.log(totalcomps['correct'][k])-math.log(totalcomps['guess'][k])
- logbleu /= float(n)
- log.write("Effective reference length: %d test length: %d\n" % (totalcomps['reflen'], totalcomps['testlen']))
- logbleu += min(0,1-float(totalcomps['reflen'])/totalcomps['testlen'])
- return math.exp(logbleu)
-
-def score_set(set, testid, refids, n=4):
- alltest = []
- for seg in set.segs():
- try:
- test = seg.versions[testid].words
- except KeyError:
- log.write("Warning: missing test sentence\n")
- continue
- try:
- refs = [seg.versions[refid].words for refid in refids]
- except KeyError:
- log.write("Warning: missing reference sentence, %s\n" % seg.id)
- refs = cook_refs(refs, n)
- alltest.append(cook_test(test, refs, n))
- log.write("%d sentences\n" % len(alltest))
- return score_cooked(alltest, n)
-
-if __name__ == "__main__":
- import psyco
- psyco.full()
-
- import getopt
- raw_test = False
- (opts,args) = getopt.getopt(sys.argv[1:], "rc", [])
- for (opt,parm) in opts:
- if opt == "-r":
- raw_test = True
- elif opt == "-c":
- preserve_case = True
-
- s = dataset.Dataset()
- if args[0] == '-':
- infile = sys.stdin
- else:
- infile = args[0]
- if raw_test:
- (root, testids) = s.read_raw(infile, docid='whatever', sysid='testsys')
- else:
- (root, testids) = s.read(infile)
- print "Test systems: %s" % ", ".join(testids)
- (root, refids) = s.read(args[1])
- print "Reference systems: %s" % ", ".join(refids)
-
- for testid in testids:
- print "BLEU score: ", score_set(s, testid, refids)
-
-
diff --git a/scripts/training/cmert-0.5/data.c b/scripts/training/cmert-0.5/data.c
deleted file mode 100755
index e1c56fa31..000000000
--- a/scripts/training/cmert-0.5/data.c
+++ /dev/null
@@ -1,92 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "data.h"
-#include "point.h"
-
-extern int comps_n;
-
-data_t *read_data(void) {
- FILE *fp;
- static char buf[1000];
- char *tok, *s;
- int field;
- int sent_i, cand_i, cands_n;
- int total_cands_n;
- data_t *data;
- candidate_t *cands;
-
- data = malloc(sizeof(data_t));
-
- data->sents_max = 100;
- data->sents_n = 0;
- data->cands_n = malloc(data->sents_max*sizeof(int));
-
- total_cands_n = 0;
-
- fp = fopen("cands.opt", "r");
- while (fgets(buf, sizeof(buf), fp) != NULL) {
- // should we check to make sure every sentence is accounted for?
- sscanf(buf, "%d %d", &sent_i, &cands_n);
- if (sent_i >= data->sents_n)
- data->sents_n = sent_i+1;
- if (sent_i >= data->sents_max) {
- data->sents_max = (sent_i+1)*2;
- data->cands_n = realloc(data->cands_n, data->sents_max*sizeof(int));
- }
- data->cands_n[sent_i] = cands_n;
- total_cands_n += cands_n;
- }
- fclose(fp);
-
- /* create master array for candidates and then set data->sents
- to point into it */
- cands = malloc(total_cands_n * sizeof(candidate_t));
- data->sents = malloc(data->sents_n * sizeof(candidate_t *));
- total_cands_n = 0;
- for (sent_i=0; sent_i<data->sents_n; sent_i++) {
- data->sents[sent_i] = cands+total_cands_n;
- total_cands_n += data->cands_n[sent_i];
- }
-
-
- cand_i = 0;
- fp = fopen("feats.opt", "r");
- while (fgets(buf, sizeof(buf), fp) != NULL) {
- cands[cand_i].features = malloc(dim*sizeof(float));
- cands[cand_i].comps = malloc(comps_n*sizeof(int));
-
- field = 0;
- s = buf;
- while ((tok = strsep(&s, " \t\n")) != NULL) {
- if (!*tok) // empty token
- continue;
- // read dim floats and then comps_n ints
- if (field < dim)
- cands[cand_i].features[field] = -strtod(tok, NULL); // Venugopal format uses costs
- else if (field < dim+comps_n)
- cands[cand_i].comps[field-dim] = strtol(tok, NULL, 10);
- else {
- fprintf(stderr, "read_data(): too many fields in line in feats.opt\n");
- return NULL;
- }
- field++;
- }
- if (field != dim+comps_n) {
- fprintf(stderr, "read_data(): wrong number of fields in line in feats.opt\n");
- return NULL;
- }
- cand_i++;
- }
-
- if (cand_i != total_cands_n) {
- fprintf(stderr, "read_data(): wrong number of lines in cands.opt\n");
- return NULL;
- }
-
- fclose(fp);
-
- return data;
-}
-
diff --git a/scripts/training/cmert-0.5/data.h b/scripts/training/cmert-0.5/data.h
deleted file mode 100755
index 1a17d15c3..000000000
--- a/scripts/training/cmert-0.5/data.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef DATA_H
-#define DATA_H
-
-typedef struct {
- float *features;
- int *comps;
- float m, b; // slope and intercept, used as scratch space
-} candidate_t;
-
-typedef struct {
- candidate_t **sents;
- int sents_n, sents_max, *cands_n;
-} data_t;
-
-data_t *read_data(void);
-
-#endif
diff --git a/scripts/training/cmert-0.5/dataset.py b/scripts/training/cmert-0.5/dataset.py
deleted file mode 100755
index 8300393d0..000000000
--- a/scripts/training/cmert-0.5/dataset.py
+++ /dev/null
@@ -1,391 +0,0 @@
-#!/usr/bin/python2.3
-
-'''Decoder interface:
-
-Dataset.process() expects a function, which in turn takes a Sentence as input
-and produces a Sentence or list of Sentences as output.
-
-The input Sentence will be marked with the <seg> tag it was found in
-the input file with.
-
-The output Sentences should be marked with <seg> tags if they are to
-be marked as such in the output file.
-'''
-
-import sys, sgmllib, xml.sax.saxutils, log
-
-def attrs_to_str(d):
- if len(d) == 0:
- return ""
- l = [""]+["%s=%s" % (name, xml.sax.saxutils.quoteattr(value)) for (name, value) in d]
- return " ".join(l)
-
-def attrs_to_dict(a):
- d = {}
- for (name, value) in a:
- if d.has_key(name.lower()):
- raise ValueError, "duplicate attribute names"
- d[name.lower()] = value
- return d
-
-def strip_newlines(s):
- return " ".join(s.split())
-
-class Sentence(object):
- def __init__(self, words=None, meta=None):
- if words is not None:
- self.words = list(words)
- else:
- self.words = []
- if meta is not None:
- self.meta = meta
- else:
- self.meta = []
-
- def mark(self, tag, attrs):
- self.meta.append((tag, attrs, 0, len(self.words)))
-
- def getmark(self):
- if len(self.meta) > 0:
- (tag, attrs, i, j) = self.meta[-1]
- if i == 0 and j == len(self.words):
- return (tag, attrs)
- else:
- return None
- else:
- return None
-
- def unmark(self):
- mark = self.getmark()
- if mark is not None:
- self.meta = self.meta[:-1]
- return mark
-
- def __cmp__(self, other):
- return cmp((self.words, self.meta), (other.words, other.meta))
-
- def __str__(self):
- def cmp_spans((tag1,attr1,i1,j1),(tag2,attr2,i2,j2)):
- if i1==i2<=j1==j2:
- return 0
- elif i2<=i1<=j1<=j2:
- return -1
- elif i1<=i2<=j2<=j1:
- return 1
- else:
- return cmp((i1,j1),(i2,j2)) # don't care
- # this guarantees that equal spans will come out nested
- # we want the later spans to be outer
- # this relies on stable sort
- open = [[] for i in xrange(len(self.words)+1)]
- # there seems to be a bug still with empty spans
- empty = [[] for i in xrange(len(self.words)+1)]
- close = [[] for j in xrange(len(self.words)+1)]
- for (tag,attrs,i,j) in sorted(self.meta, cmp=cmp_spans):
- if i == j:
- # do we want these to nest?
- empty[i].append("<%s%s/>" % (tag, attrs_to_str(attrs)))
- open[i].append("<%s%s>" % (tag, attrs_to_str(attrs)))
- close[j].append("</%s>" % tag)
-
- result = []
- if len(empty[0]) > 0:
- result.extend(empty[0])
- for i in xrange(len(self.words)):
- if i > 0:
- result.append(" ")
- result.extend(reversed(open[i]))
- result.append(self.words[i])
- result.extend(close[i+1])
- if len(empty[i+1]) > 0:
- result.extend(empty[i+1])
-
- return "".join(result)
-
- def __add__(self, other):
- if type(other) in (list, tuple):
- return Sentence(self.words + list(other), self.meta)
- else:
- othermeta = [(tag, attrs, i+len(self.words), j+len(self.words)) for (tag, attrs, i, j) in other.meta]
- return Sentence(self.words + other.words, self.meta+othermeta)
-
-def read_raw(f):
- """Read a raw file into a list of Sentences."""
- if type(f) is str:
- f = file(f, "r")
- inputs = []
- i = 0
- for line in f:
- sent = process_sgml_line(line, i)
- sent.mark('seg', [('id',str(i))])
- inputs.append(sent)
- i += 1
- return inputs
-
-class Dataset(object):
- def __init__(self, id=None):
- self.id = id
- self.docs = {}
- self.sysids = []
- self.langs = {}
-
- def read(self, f):
- '''Read a file into the dataset. Returns (root, sysids)'''
- if type(f) is str:
- f = file(f, "r")
- p = DatasetParser(self)
- p.feed(f.read())
- p.close()
- return (p.root,p.sysids)
-
- def read_raw(self, f, docid, setid=None, sysid=None, lang=None):
- """Read a raw file into the dataset."""
- if setid is not None:
- if self.id is not None and self.id != setid:
- raise ValueError, "Set ID does not match"
- else:
- self.id = setid
- if sysid not in self.sysids:
- self.sysids.append(sysid)
- self.langs[sysid] = lang
- if type(f) is str:
- f = file(f, "r")
- doc = self.docs.setdefault(docid, Document(docid))
- i = 0
- for line in f:
- if len(doc.segs)-1 < i:
- doc.segs.append(Segment(i))
- if doc.segs[i].versions.has_key(sysid):
- raise ValueError, "multiple versions from same system"
- doc.segs[i].versions[sysid] = process_sgml_line(line, i)
- doc.segs[i].versions[sysid].mark('seg', [('id',str(i))])
- i += 1
- return (None, [sysid])
-
- def write(self, f, tag, sysids=None):
- if type(f) is str:
- f = file(f, "w")
- f.write(self.string(tag, sysids))
-
- def write_raw(self, f, sysid=None):
- if type(f) is str:
- f = file(f, "w")
- for seg in self.segs():
- f.write(" ".join(seg.versions[sysid].words))
- f.write("\n")
-
- def string(self, tag, sysids=None):
- if sysids is None:
- sysids = self.sysids
- elif type(sysids) is str:
- sysids = [sysids]
- attrs = [('setid', self.id)]
- if self.langs.has_key(None):
- attrs.append(('srclang', self.langs[None]))
- trglangs = [self.langs[sysid] for sysid in sysids if sysid is not None]
- for lang in trglangs[1:]:
- if lang != trglangs[0]:
- raise ValueError, "Inconsistent target language"
- if len(trglangs) >= 1:
- attrs.append(('trglang', trglangs[0]))
-
- return "<%s%s>\n%s</%s>\n" % (tag,
- attrs_to_str(attrs),
- "".join([doc.string(sysid) for doc in self.docs.values() for sysid in sysids]),
- tag)
-
- def process(self, processor, sysid, lang, srcsysid=None):
- if sysid in self.sysids:
- raise ValueError, "sysid already in use"
- else:
- self.sysids.append(sysid)
- self.langs[sysid] = lang
- for seg in self.segs():
- if log.level >= 2:
- sys.stderr.write("Input: %s\n" % str(seg.versions[srcsysid]))
- seg.versions[sysid] = processor(seg.versions[srcsysid])
- if log.level >= 2:
- if type(seg.versions[sysid]) is not list:
- sys.stderr.write("Output: %s\n" % str(seg.versions[sysid]))
- else:
- sys.stderr.write("Output (1st): %s\n" % str(seg.versions[sysid][0]))
-
- def segs(self):
- for doc in self.docs.values():
- for seg in doc.segs:
- yield seg
-
-class Document(object):
- def __init__(self, id):
- self.id = id
- self.segs = []
-
- def string(self, sysid):
- attrs = [('docid', self.id)]
- if sysid is not None:
- attrs.append(('sysid', sysid))
- return "<doc%s>\n%s</doc>\n" % (attrs_to_str(attrs),
- "".join([seg.string(sysid) for seg in self.segs]))
-
-class Segment(object):
- def __init__(self, id=None):
- self.id = id
- self.versions = {}
-
- def string(self, sysid):
- v = self.versions[sysid]
- if type(v) is not list:
- v = [v]
- output = []
- for i in xrange(len(v)):
- output.append(str(v[i]))
- output.append('\n')
- return "".join(output)
-
-def process_sgml_line(line, id=None):
- p = DatasetParser(None)
- p.pos = 0
- p.words = []
- p.meta = []
- p.feed(line)
- p.close()
- sent = Sentence(p.words, p.meta)
- return sent
-
-class DatasetParser(sgmllib.SGMLParser):
- def __init__(self, set):
- sgmllib.SGMLParser.__init__(self)
- self.words = None
- self.sysids = []
- self.set = set
- self.mystack = []
-
- def handle_starttag(self, tag, method, attrs):
- thing = method(attrs)
- self.mystack.append(thing)
-
- def handle_endtag(self, tag, method):
- thing = self.mystack.pop()
- method(thing)
-
- def unknown_starttag(self, tag, attrs):
- thing = self.start(tag, attrs)
- self.mystack.append(thing)
-
- def unknown_endtag(self, tag):
- thing = self.mystack.pop()
- self.end(tag, thing)
-
- def start_srcset(self, attrs):
- attrs = attrs_to_dict(attrs)
- if self.set.id is None:
- self.set.id = attrs['setid']
- if 0 and self.set.id != attrs['setid']:
- raise ValueError, "Set ID does not match"
- self.lang = attrs['srclang']
- self.root = 'srcset'
- return None
-
- def start_refset(self, attrs):
- attrs = attrs_to_dict(attrs)
- if self.set.id is None:
- self.set.id = attrs['setid']
- if 0 and self.set.id != attrs['setid']:
- raise ValueError, "Set ID does not match"
- if self.set.langs.setdefault(None, attrs['srclang']) != attrs['srclang']:
- raise ValueError, "Source language does not match"
- self.lang = attrs['trglang']
- self.root = 'refset'
- return None
-
- def start_tstset(self, attrs):
- attrs = attrs_to_dict(attrs)
- if self.set.id is None:
- self.set.id = attrs['setid']
- if 0 and self.set.id != attrs['setid']:
- raise ValueError, "Set ID does not match"
- if 0 and self.set.langs.setdefault(None, attrs['srclang']) != attrs['srclang']:
- raise ValueError, "Source language does not match"
- self.lang = attrs['trglang']
- self.root = 'tstset'
- return None
-
- def end_srcset(self, thing):
- for sysid in self.sysids:
- if sysid not in self.set.sysids:
- self.set.sysids.append(sysid)
- self.set.langs[sysid] = self.lang
- end_refset = end_tstset = end_srcset
-
- def start_doc(self, attrs):
- attrs = attrs_to_dict(attrs)
- self.doc = self.set.docs.setdefault(attrs['docid'], Document(attrs['docid']))
- self.seg_i = 0
- if self.root == 'srcset':
- self.sysid = None
- else:
- self.sysid = attrs['sysid']
- if self.sysid not in self.sysids:
- self.sysids.append(self.sysid)
- return None
-
- def end_doc(self, thing):
- pass
-
- def start_seg(self, attrs):
- thing = ('seg', attrs, 0, None)
- attrs = attrs_to_dict(attrs)
- if len(self.doc.segs)-1 < self.seg_i:
- self.doc.segs.append(Segment(attrs.get('id', None)))
- self.seg = self.doc.segs[self.seg_i]
- if 0 and self.seg.id is not None and attrs.has_key('id') and self.seg.id != attrs['id']:
- raise ValueError, "segment ids do not match (%s != %s)" % (str(self.seg.id), str(attrs.get('id', None)))
- if self.seg.versions.has_key(self.sysid):
- raise ValueError, "multiple versions from same system"
- self.pos = 0
- self.words = []
- self.meta = []
- return thing
-
- def end_seg(self, thing):
- (tag, attrs, i, j) = thing
- self.meta.append((tag, attrs, i, self.pos))
- self.seg_i += 1
- self.seg.versions[self.sysid] = Sentence(self.words, self.meta)
- self.words = None
-
- """# Special case for start and end of sentence
- def start_s(self, attrs):
- if self.words is not None:
- self.pos += 1
- self.words.append('<s>')
- return None
-
- def end_s(self, thing):
- if self.words is not None:
- self.pos += 1
- self.words.append('</s>')"""
-
- def start(self, tag, attrs):
- if self.words is not None:
- return (tag, attrs, self.pos, None)
- else:
- return None
-
- def end(self, tag, thing):
- if self.words is not None:
- (tag, attrs, i, j) = thing
- self.meta.append((tag, attrs, i, self.pos))
-
- def handle_data(self, s):
- if self.words is not None:
- words = s.split()
- self.pos += len(words)
- self.words.extend(words)
-
-if __name__ == "__main__":
- s = Dataset()
-
- for filename in sys.argv[1:]:
- s.read_raw(filename, 'whatever', 'whatever', filename, 'English')
- s.write(sys.stdout, 'tstset')
diff --git a/scripts/training/cmert-0.5/log.py b/scripts/training/cmert-0.5/log.py
deleted file mode 100755
index bdf5da359..000000000
--- a/scripts/training/cmert-0.5/log.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/python2.3
-import sys
-
-level = 1
-file = sys.stderr
-
-def writeln(s=""):
- file.write("%s\n" % s)
- file.flush()
-
-def write(s):
- file.write(s)
- file.flush()
-
-
-
-
diff --git a/scripts/training/cmert-0.5/makeinitopt b/scripts/training/cmert-0.5/makeinitopt
deleted file mode 100755
index 5714b2482..000000000
--- a/scripts/training/cmert-0.5/makeinitopt
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/perl -w
-
-if ($#ARGV != 2) {
- die "usage: makeinitopt <ranges> <weightfile> <rangefile>"
-}
-$s = $ARGV[0];
-$woutput = $ARGV[1];
-$routput = $ARGV[2];
-open WOUT, ">$woutput" || die "couldn't open $woutput";
-open ROUT, ">$routput" || die "couldn't open $routput";
-
-@w = ();
-@lo = ();
-@hi = ();
-foreach $x (split(/;/, $s)) {
- if ($x =~ /(.*),(-?[\d.]+)-(-?[\d.]+)/) {
- push(@w, $1);
- push(@lo, $2);
- push(@hi, $3);
- } else {
- print STDERR "bad weight range: $x\n";
- }
-}
-
-print WOUT join(" ", @w), "\n";
-print ROUT join(" ", @lo), "\n";
-print ROUT join(" ", @hi), "\n";
diff --git a/scripts/training/cmert-0.5/mert-driver b/scripts/training/cmert-0.5/mert-driver
deleted file mode 100755
index 07db0b3fc..000000000
--- a/scripts/training/cmert-0.5/mert-driver
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/bin/sh
-
-WORKDIR=$1
-if [ ! -d $WORKDIR ]; then
- mkdir -p $WORKDIR
-fi
-RUNDIR=`pwd`
-
-SRCFILE=$2
-REFPREFIX=$3
-REFFILES=$REFPREFIX[0-9]*
-NBEST=$4
-DECODER=$5
-DECODEROPTS=$6
-RANGES=$7
-START=$8
-
-makeinitopt "$RANGES" $WORKDIR/weights.txt $WORKDIR/ranges.txt
-DIM=`cat $WORKDIR/weights.txt | awk '{print NF; exit}'`
-echo $DIM dimensions
-
-PATH=/group/project/statmt/pkoehn/user/abhishek:/group/project/statmt/pkoehn/user/abhishek/cmert-0.5:$PATH
-export PATH
-
-date
-
-echo Reference sets: $REFFILES
-
-if [ "x$START" == "x" ]; then
- START=1
-fi
-
-I=$START
-PREVLINECOUNT=0
-#$DECODEROPTS =~ s / \-f / -config /;
-#$DECODEROPTS =~ s/^\-f /-config /;
-filename=$WORKDIR/run$I.best$NBEST.out
-
-while true; do
- echo Run decoder
-
- WEIGHTS=`cat $WORKDIR/weights.txt`
-
- ###Changes - AA 29/11/05
- #echo "$DECODER $NBEST \"$WEIGHTS\" $WORKDIR/run$I \"$DECODEROPTS\" < $SRCFILE > $WORKDIR/run$I.nbest"
- #$DECODER $NBEST \"$WEIGHTS\" $WORKDIR/run$I \"$DECODEROPTS\" < $SRCFILE > $WORKDIR/run$I.nbest
-
- echo "$DECODER $DECODEROPTS \"$WEIGHTS\" -n-best-list $filename $NBEST < $SRCFILE > $WORKDIR/run$I.nbest"
- $DECODER $DECODEROPTS "$WEIGHTS" -n-best-list $filename $NBEST < $SRCFILE > $WORKDIR/run$I.nbest
-
- echo Calculate BLEU component scores
-
- sort -mn -t\| -k 1,1 $WORKDIR/run*.nbest | score-nbest.py $REFFILES $WORKDIR/
-
- #LINECOUNT=`cat $WORKDIR/feats.opt | awk '{n++} END {print n}'`
- LINECOUNT=`cat $WORKDIR/cands.opt | awk '{n += $2} END {print n}'`
- echo $LINECOUNT accumulated translations
- if [ $LINECOUNT -le $PREVLINECOUNT ]; then
- echo "Training finished"
- date
- break
- fi
-
- echo Optimize feature weights
-
- cd $WORKDIR
- cat ranges.txt weights.txt > init.opt
- rm -f weights.txt
- mert -d$DIM
- cd $RUNDIR
-
- if [ "x`cat $WORKDIR/weights.txt`" == "x" ]; then
- echo Optimization failed
- break
- fi
-
- I=`expr $I + 1`
- PREVLINECOUNT=$LINECOUNT
-
- date
-done
diff --git a/scripts/training/cmert-0.5/mert.c b/scripts/training/cmert-0.5/mert.c
deleted file mode 100755
index 3ef762f6d..000000000
--- a/scripts/training/cmert-0.5/mert.c
+++ /dev/null
@@ -1,430 +0,0 @@
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "data.h"
-#include "point.h"
-#include "score.h"
-
-int verbose = 2;
-
-float min_interval = 1e-3;
-
-typedef struct {
- float x;
- int cand;
- int *delta_comps;
-} intersection_t;
-
-intersection_t *new_intersection(float x, int cand, int *comps1, int *comps2) {
- intersection_t *inter;
- int i;
- inter = malloc(sizeof(intersection_t));
- inter->x = x;
- inter->cand = cand; // this is not used but sometimes it's handy
- inter->delta_comps = malloc(comps_n * sizeof(int));
- for (i=0; i<comps_n; i++)
- inter->delta_comps[i] = comps1[i]-comps2[i];
- return inter;
-}
-
-void intersection_delete(intersection_t *inter) {
- free(inter->delta_comps);
- free(inter);
-}
-
-int compare_intersections(intersection_t **i1, intersection_t **i2) {
- if ((*i1)->x == (*i2)->x)
- return 0;
- else if ((*i1)->x < (*i2)->x)
- return -1;
- else
- return 1;
-}
-
-float slow_bleu(data_t *data, point_t *point) {
- int sent_i, cand_i, cand_n, i;
- candidate_t *cands;
- float p, best_p;
- int best;
- int *comps;
- float score;
- int ties, totalties;
-
- comps = calloc(comps_n, sizeof(int));
-
- totalties = 0;
-
- for (sent_i = 0; sent_i < data->sents_n; sent_i++) {
- cands = data->sents[sent_i];
- cand_n = data->cands_n[sent_i];
-
- ties = 0;
-
- best = 0;
- best_p = point_dotproduct(point, cands[0].features);
- for (cand_i = 1; cand_i < cand_n; cand_i++) {
- p = point_dotproduct(point, cands[cand_i].features);
- if (p > best_p) {
- best_p = p;
- best = cand_i;
- ties = 0;
- } else if (p == best_p) {
- ties++;
- }
- }
- totalties += ties;
- comps_addto(comps, cands[best].comps);
- }
- //point_print(point, stderr, 1);
- //fprintf(stderr, "\n");
- //fprintf(stderr, "slow bleu => %f\n", compute_score(comps));
- score = compute_score(comps);
- free(comps);
- return score;
-}
-
-/* Global optimization along a line (Och, 2004) */
-point_t *line_optimize(data_t *data, point_t *origin, point_t *dir) {
- int sent_i, cand_i, cand_n, intersection_i;
- candidate_t *cands;
- static intersection_t **intersections = NULL;
- intersection_t *inter;
- static int intersection_max;
- int intersection_n = 0;
- int prev, leftmost;
- float x, leftmost_x, prev_x, best_x;
- float score, best_score;
- int *comps;
- point_t *point;
- int first;
-
- if (!origin->has_score)
- point_set_score(origin, slow_bleu(data, origin));
-
- if (verbose >= 2) {
- fprintf(stderr, "starting point: ");
- point_print(origin, stderr, 1);
- fprintf(stderr, "\n direction: ");
- point_print(dir, stderr, 1);
- fprintf(stderr, "\n");
- }
-
- comps = calloc(comps_n, sizeof(int));
-
- if (intersections == NULL) {
- intersection_max = 10;
- intersections = malloc(intersection_max*sizeof(intersection_t *));
- }
-
- for (sent_i = 0; sent_i < data->sents_n; sent_i++) {
- cands = data->sents[sent_i];
- cand_n = data->cands_n[sent_i];
-
- if (verbose >= 3)
- fprintf(stderr, "sentence %d\n", sent_i);
-
- if (cand_n < 1)
- continue;
-
- /* calculate slopes and intercepts */
- for (cand_i = 0; cand_i < cand_n; cand_i++) {
- cands[cand_i].m = point_dotproduct(dir, cands[cand_i].features);
- cands[cand_i].b = point_dotproduct(origin, cands[cand_i].features);
- }
-
- /* find intersection points */
-
- /* find best candidate for x -> -inf */
- prev = -1;
- for (cand_i = 0; cand_i < cand_n; cand_i++)
- if (prev < 0 ||
- cands[cand_i].m < cands[prev].m ||
- cands[cand_i].m == cands[prev].m && cands[prev].b < cands[cand_i].b)
- prev = cand_i;
-
- if (verbose >= 3) {
- fprintf(stderr, "x->-inf cand %d\n", prev);
- }
-
- comps_addto(comps, cands[prev].comps);
-
- first = 1;
- while (1) {
- // find leftmost intersection
- leftmost = -1;
- for (cand_i = 0; cand_i < cand_n; cand_i++) {
- if (cands[prev].m == cands[cand_i].m) {
- if (cands[cand_i].b > cands[cand_i].b)
- fprintf(stderr, "two parallel lines and discarding the higher -- this shouldn't happen\n");
- continue; // no intersection
- }
-
- /* optimization: piecewise linear function must be concave up.
- Maybe it would be still faster to sort by slope beforehand */
- if (cands[cand_i].m < cands[prev].m)
- continue;
-
- x = -(cands[prev].b-cands[cand_i].b)/(cands[prev].m-cands[cand_i].m);
-
- if (leftmost < 0 || x < leftmost_x) {
- leftmost = cand_i;
- leftmost_x = x;
- }
- }
-
- if (leftmost < 0)
- break; // no more intersections
-
- /* Require that the intersection point be at least min_interval
- to the right of the previous one. If not, we replace the
- previous intersection point with this one. Yes, it can even
- happen that the new intersection point is slightly to the
- left of the old one, because of numerical imprecision. We
- don't check that the new point is also min_interval to the
- right of the penultimate one. In that case, the points would
- switch places in the sort, resulting in a bogus score for
- that inteval. */
-
- if (first || leftmost_x - prev_x > min_interval) {
- if (intersection_n == intersection_max) {
- intersection_max *= 2;
- intersections = realloc(intersections, intersection_max*sizeof(intersection_t));
- if (intersections == NULL)
- fprintf(stderr, "couldn't realloc intersections\n");
- }
- intersections[intersection_n++] = new_intersection(leftmost_x, leftmost, cands[leftmost].comps, cands[prev].comps);
- } else {
- // replace the old one
- inter = new_intersection(leftmost_x, leftmost, cands[leftmost].comps, cands[prev].comps);
- comps_addto(inter->delta_comps, intersections[intersection_n-1]->delta_comps);
- intersection_delete(intersections[intersection_n-1]);
- intersections[intersection_n-1] = inter;
- }
-
- if (verbose >= 3)
- fprintf(stderr, "found intersection point: %f, right cand %d\n", leftmost_x, leftmost);
- prev = leftmost;
- prev_x = leftmost_x;
- first = 0;
- }
- }
-
- best_score = compute_score(comps);
- //fprintf(stderr, "x->-inf => %f\n", best_score);
-
- if (intersection_n == 0)
- best_x = 0.0;
- else {
- qsort(intersections, intersection_n, sizeof(intersection_t *), compare_intersections);
- best_x = intersections[0]->x - 1000.0; // whatever
- }
- for (intersection_i = 0; intersection_i < intersection_n; intersection_i++) {
- comps_addto(comps, intersections[intersection_i]->delta_comps);
- score = compute_score(comps);
- //fprintf(stderr, "x=%f => %f\n", intersections[intersection_i]->x, score);
- if (score > best_score) {
- best_score = score;
- if (intersection_i+1 < intersection_n)
- // what if interval is zero-width?
- best_x = 0.5*(intersections[intersection_i]->x + intersections[intersection_i+1]->x);
- else
- best_x = intersections[intersection_i]->x + 0.1; // whatever
- }
- }
- //fprintf(stderr, "best_x = %f\n", best_x);
- point = point_copy(dir);
- point_multiplyby(point, best_x);
- point_addto(point, origin);
- point_set_score(point, best_score);
-
- if (verbose >= 2) {
- fprintf(stderr, " ending point: ");
- point_print(point, stderr, 1);
- fprintf(stderr, "\n");
- //check_comps(data, point, comps);
- }
-
- for (intersection_i = 0; intersection_i < intersection_n; intersection_i++)
- intersection_delete(intersections[intersection_i]);
- free(comps);
-
- if (best_score < origin->score) {
- /* this can happen in the case of a tie between two candidates with different bleu component scores. just trash the point and return the starting point */
- point_delete(point);
- return point_copy(origin);
- }
-
- return point;
-}
-
-point_t *optimize_powell(data_t *data, point_t *point) {
- int i;
- point_t **u, **p;
- float biggestwin, totalwin, extrapolatedwin;
- int biggestwin_i;
- point_t *point_e;
-
- u = malloc(dim*sizeof(point_t *));
- p = malloc(dim*sizeof(point_t *));
-
- point = point_copy(point);
- if (!point->has_score)
- point_set_score(point, slow_bleu(data, point));
-
- for (i=0; i<dim; i++) {
- u[i] = new_point();
- u[i]->weights[i] = 1.0;
- }
-
- while (1) {
- p[0] = line_optimize(data, point, u[0]);
- biggestwin_i = 0;
- biggestwin = p[0]->score - point->score;
- for (i=1; i<dim; i++) {
- p[i] = line_optimize(data, p[i-1], u[i]);
- if (p[i]->score - p[i-1]->score > biggestwin) {
- biggestwin_i = i;
- biggestwin = p[i]->score - p[i-1]->score;
- }
- }
-
- totalwin = p[dim-1]->score - point->score;
-
- if (totalwin < 0.000001)
- break;
-
- // last point minus first point
- point_multiplyby(point, -1.0);
- point_addto(point, p[dim-1]);
-
- point_e = point_copy(point);
- point_addto(point_e, p[dim-1]);
- point_set_score(point_e, slow_bleu(data, point_e));
- extrapolatedwin = point_e->score - point->score; // point->score is the original point
-
- if (extrapolatedwin > 0 &&
- 2*(2*totalwin - extrapolatedwin) *
- powf(totalwin - biggestwin, 2.0) <
- powf(extrapolatedwin, 2.0)*biggestwin) {
- // replace dominant direction vector with sum vector
- point_delete(u[biggestwin_i]);
- point_normalize(point);
- u[biggestwin_i] = point;
- }
-
- point_delete(point_e);
-
- // optimization continues with last point
- point = p[dim-1];
-
- for (i=0; i<dim-1; i++)
- if (i != biggestwin_i)
- point_delete(p[i]);
- }
-
- for (i=0; i<dim; i++)
- point_delete(u[i]);
-
- free(u);
- free(p);
-
- point_normalize(point);
- return point;
-}
-
-point_t *optimize_koehn(data_t *data, point_t *point) {
- point_t *dir, **newpoints;
- int dir_i;
- int best_dir = -1;
- dir = new_point();
- newpoints = malloc(dim*sizeof(point_t *));
-
- point = point_copy(point);
-
- while (1) {
- for (dir_i = 0; dir_i < dim; dir_i++) {
- dir->weights[dir_i] = 1.0;
- newpoints[dir_i] = line_optimize(data, point, dir);
- if (best_dir < 0 || newpoints[dir_i]->score > newpoints[best_dir]->score)
- best_dir = dir_i;
- dir->weights[dir_i] = 0.0;
- }
- if (point->has_score && newpoints[best_dir]->score - point->score < 0.000001)
- break;
-
- point_delete(point);
- point = newpoints[best_dir];
-
- // discard the other points
- for (dir_i = 0; dir_i < dim; dir_i++)
- if (dir_i != best_dir)
- point_delete(newpoints[dir_i]);
- }
-
- point_delete(dir);
- free(newpoints);
-
- point_normalize(point);
- return point;
-}
-
-void usage(void) {
- fprintf(stderr, "usage: mert -d <dimensions>\n");
- exit(1);
-}
-
-int main (int argc, char **argv) {
- int point_i;
- int points_n = 20;
- point_t *min, *max;
- data_t *data;
- point_t *bestpoint, *newpoint, *startpoint;
- int i, c;
- FILE *fp;
-
- while ((c = getopt(argc, argv, "d:n:")) != -1) {
- switch (c) {
- case 'd':
- dim = strtol(optarg, NULL, 10);
- break;
- case 'n':
- points_n = strtol(optarg, NULL, 10);
- break;
- default:
- usage();
- }
- }
- argc -= optind;
- argv += optind;
-
- if (dim < 0)
- usage();
-
- if ((data = read_data()) == NULL) exit(1);
-
- fp = fopen("init.opt", "r");
- if ((min = read_point(fp)) == NULL) exit(1);
- if ((max = read_point(fp)) == NULL) exit(1);
- if ((startpoint = read_point(fp)) == NULL) exit(1);
- fclose(fp);
-
- bestpoint = NULL;
- for (point_i=0; point_i<points_n; point_i++) {
- fprintf(stderr, "*** point %d ***\n", point_i);
- if (point_i == 0)
- newpoint = startpoint;
- else
- newpoint = random_point(min, max);
- newpoint = optimize_koehn(data, newpoint);
- if (bestpoint == NULL || newpoint->score > bestpoint->score)
- bestpoint = newpoint; // who cares about the leak
- }
- fprintf(stderr, "Best point: ");
- point_print(bestpoint, stderr, 1);
- fprintf(stderr, "\n");
-
- fp = fopen("weights.txt", "w");
- point_print(bestpoint, fp, 0);
- fprintf(fp, "\n");
- fclose(fp);
-}
diff --git a/scripts/training/cmert-0.5/point.c b/scripts/training/cmert-0.5/point.c
deleted file mode 100755
index 3fdb6f697..000000000
--- a/scripts/training/cmert-0.5/point.c
+++ /dev/null
@@ -1,116 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-
-#include "point.h"
-
-int dim = -1;
-
-point_t *new_point() {
- point_t *point;
- point = malloc(sizeof(point_t));
- point->score = 0.0;
- point->weights = calloc(dim, sizeof(float));
- point->has_score = 0;
- return point;
-}
-
-void point_set_score(point_t *point, float score) {
- point->has_score = 1;
- point->score = score;
-}
-
-void point_delete(point_t *point) {
- free(point->weights);
- free(point);
-}
-
-point_t *random_point(point_t *min, point_t *max) {
- int i;
- point_t *point = new_point();
- for (i=0; i<dim; i++)
- point->weights[i] = min->weights[i] + (float)random()/RAND_MAX * (max->weights[i]-min->weights[i]);
- return point;
-}
-
-point_t *point_copy(point_t *point) {
- point_t *newpoint;
- int i;
- newpoint = new_point();
- newpoint->score = point->score;
- newpoint->has_score = point->has_score;
- for (i=0; i<dim; i++)
- newpoint->weights[i] = point->weights[i];
- return newpoint;
-}
-
-float point_dotproduct(point_t *point, float *y) {
- float result;
- int i;
- result = 0.0;
- for (i=0; i<dim; i++)
- result += point->weights[i] * y[i];
- return result;
-}
-
-/* Destructive operations */
-void point_multiplyby(point_t *point, float k) {
- int i;
- for (i=0; i<dim; i++)
- point->weights[i] *= k;
-}
-
-void point_addto(point_t *point1, point_t *point2) {
- int i;
- for (i=0; i<dim; i++)
- point1->weights[i] += point2->weights[i];
-}
-
-void point_normalize(point_t *point) {
- int i;
- float norm = 0.0;
- for (i=0; i<dim; i++)
- //norm += point->weights[i] * point->weights[i];
- norm += fabs(point->weights[i]);
- // norm = sqrt(norm);
- for (i=0; i<dim; i++)
- point->weights[i] /= norm;
-}
-
-void point_print(point_t *point, FILE *fp, int with_score) {
- int i;
- fprintf(fp, "%f", point->weights[0]);
- for (i=1; i<dim; i++)
- fprintf(fp, " %f", point->weights[i]);
- if (point->has_score && with_score)
- fprintf(fp, " => %f", point->score);
-}
-
-point_t *read_point(FILE *fp) {
- static char buf[1000];
- char *tok, *s;
- int field;
- point_t *point;
-
- point = new_point();
-
- fgets(buf, sizeof(buf), fp);
- s = buf;
- field = 0;
- while ((tok = strsep(&s, " \t\n")) != NULL) {
- if (!*tok) // empty token
- continue;
- if (field >= dim) {
- fprintf(stderr, "read_point(): too many fields in line\n");
- return NULL;
- } else
- point->weights[field] = strtod(tok, NULL);
- field++;
- }
- if (field < dim) {
- fprintf(stderr, "read_point(): wrong number of fields in line\n");
- return NULL;
- }
- return point;
-}
diff --git a/scripts/training/cmert-0.5/point.h b/scripts/training/cmert-0.5/point.h
deleted file mode 100755
index 67a8386f7..000000000
--- a/scripts/training/cmert-0.5/point.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef POINT_H
-#define POINT_H
-
-typedef struct {
- float *weights;
- int has_score;
- float score;
-} point_t;
-
-extern int dim;
-
-point_t *new_point();
-void point_set_score(point_t *point, float score);
-void point_delete(point_t *point);
-point_t *point_copy(point_t *point);
-point_t *random_point(point_t *min, point_t *max);
-float point_dotproduct(point_t *point, float *y);
-void point_multiplyby(point_t *point, float k);
-void point_normalize(point_t *point);
-void point_addto(point_t *point1, point_t *point2);
-#include <stdio.h>
-point_t *read_point(FILE *fp);
-void point_print(point_t *point, FILE *fp, int with_score);
-
-#endif
diff --git a/scripts/training/cmert-0.5/python/psyco/__init__.py b/scripts/training/cmert-0.5/python/psyco/__init__.py
deleted file mode 100644
index 23a12495a..000000000
--- a/scripts/training/cmert-0.5/python/psyco/__init__.py
+++ /dev/null
@@ -1,57 +0,0 @@
-###########################################################################
-#
-# Psyco top-level file of the Psyco package.
-# Copyright (C) 2001-2002 Armin Rigo et.al.
-
-"""Psyco -- the Python Specializing Compiler.
-
-Typical usage: add the following lines to your application's main module:
-
-try:
- import psyco
- psyco.profile()
-except:
- print 'Psyco not found, ignoring it'
-"""
-###########################################################################
-
-
-#
-# This module is present to make 'psyco' a package and to
-# publish the main functions and variables.
-#
-# More documentation can be found in core.py.
-#
-
-
-# Try to import the dynamic-loading _psyco and report errors
-try:
- import _psyco
-except ImportError, e:
- extramsg = ''
- import sys, imp
- try:
- file, filename, (suffix, mode, type) = imp.find_module('_psyco', __path__)
- except ImportError:
- ext = [suffix for suffix, mode, type in imp.get_suffixes()
- if type == imp.C_EXTENSION]
- if ext:
- extramsg = (" (cannot locate the compiled extension '_psyco%s' "
- "in the package path '%s')" % (ext[0], '; '.join(__path__)))
- else:
- extramsg = (" (check that the compiled extension '%s' is for "
- "the correct Python version; this is Python %s)" %
- (filename, sys.version.split()[0]))
- raise ImportError, str(e) + extramsg
-
-# Publish important data by importing them in the package
-from support import __version__, error, warning, _getrealframe, _getemulframe
-from support import version_info, __version__ as hexversion
-from core import full, profile, background, runonly, stop, cannotcompile
-from core import log, bind, unbind, proxy, unproxy, dumpcodebuf
-from _psyco import setfilter
-
-try:
- from _psyco import compact, compacttype # Python 2.2 and above only
-except ImportError:
- pass
diff --git a/scripts/training/cmert-0.5/python/psyco/_psyco.so b/scripts/training/cmert-0.5/python/psyco/_psyco.so
deleted file mode 100755
index f7c8fc883..000000000
--- a/scripts/training/cmert-0.5/python/psyco/_psyco.so
+++ /dev/null
Binary files differ
diff --git a/scripts/training/cmert-0.5/python/psyco/classes.py b/scripts/training/cmert-0.5/python/psyco/classes.py
deleted file mode 100644
index 11c2d169b..000000000
--- a/scripts/training/cmert-0.5/python/psyco/classes.py
+++ /dev/null
@@ -1,53 +0,0 @@
-###########################################################################
-#
-# Psyco class support module.
-# Copyright (C) 2001-2002 Armin Rigo et.al.
-
-"""Psyco class support module.
-
-'psyco.classes.psyobj' is an alternate Psyco-optimized root for classes.
-Any class inheriting from it or using the metaclass '__metaclass__' might
-get optimized specifically for Psyco. It is equivalent to call
-psyco.bind() on the class object after its creation.
-
-Note that this module has no effect with Python version 2.1 or earlier.
-
-Importing everything from psyco.classes in a module will import the
-'__metaclass__' name, so all classes defined after a
-
- from psyco.classes import *
-
-will automatically use the Psyco-optimized metaclass.
-"""
-###########################################################################
-
-__all__ = ['psyobj', 'psymetaclass', '__metaclass__']
-
-
-# Python version check
-try:
- from _psyco import compacttype
-except ImportError:
- class psyobj: # compatilibity
- pass
- psymetaclass = None
-else:
- # version >= 2.2 only
-
- import core
- from types import FunctionType
-
- class psymetaclass(compacttype):
- "Psyco-optimized meta-class. Turns all methods into Psyco proxies."
-
- def __new__(cls, name, bases, dict):
- bindlist = dict.get('__psyco__bind__')
- if bindlist is None:
- bindlist = [key for key, value in dict.items()
- if isinstance(value, FunctionType)]
- for attr in bindlist:
- dict[attr] = core.proxy(dict[attr])
- return super(psymetaclass, cls).__new__(cls, name, bases, dict)
-
- psyobj = psymetaclass("psyobj", (), {})
-__metaclass__ = psymetaclass
diff --git a/scripts/training/cmert-0.5/python/psyco/core.py b/scripts/training/cmert-0.5/python/psyco/core.py
deleted file mode 100644
index 42a6c2fb3..000000000
--- a/scripts/training/cmert-0.5/python/psyco/core.py
+++ /dev/null
@@ -1,232 +0,0 @@
-###########################################################################
-#
-# Psyco main functions.
-# Copyright (C) 2001-2002 Armin Rigo et.al.
-
-"""Psyco main functions.
-
-Here are the routines that you can use from your applications.
-These are mostly interfaces to the C core, but they depend on
-the Python version.
-
-You can use these functions from the 'psyco' module instead of
-'psyco.core', e.g.
-
- import psyco
- psyco.log('/tmp/psyco.log')
- psyco.profile()
-"""
-###########################################################################
-
-import _psyco
-import types, new
-from support import *
-
-
-# Default charge profiler values
-default_watermark = 0.09 # between 0.0 (0%) and 1.0 (100%)
-default_halflife = 0.5 # seconds
-default_pollfreq_profile = 20 # Hz
-default_pollfreq_background = 100 # Hz -- a maximum for sleep's resolution
-default_parentframe = 0.25 # should not be more than 0.5 (50%)
-
-
-def full(memory=None, time=None, memorymax=None, timemax=None):
- """Compile as much as possible.
-
-Typical use is for small scripts performing intensive computations
-or string handling."""
- import profiler
- if PYTHON_SUPPORT:
- p = profiler.FullCompiler()
- else:
- p = profiler.ActiveProfiler(0.0, 0.5)
- p.run(memory, time, memorymax, timemax)
-
-
-def profile(watermark = default_watermark,
- halflife = default_halflife,
- pollfreq = default_pollfreq_profile,
- parentframe = default_parentframe,
- memory=None, time=None, memorymax=None, timemax=None):
- """Turn on profiling.
-
-The 'watermark' parameter controls how easily running functions will
-be compiled. The smaller the value, the more functions are compiled."""
- import profiler
- p = profiler.ActivePassiveProfiler(watermark, halflife,
- pollfreq, parentframe)
- p.run(memory, time, memorymax, timemax)
-
-
-def background(watermark = default_watermark,
- halflife = default_halflife,
- pollfreq = default_pollfreq_background,
- parentframe = default_parentframe,
- memory=None, time=None, memorymax=None, timemax=None):
- """Turn on passive profiling.
-
-This is a very lightweight mode in which only intensively computing
-functions can be detected. The smaller the 'watermark', the more functions
-are compiled."""
- import profiler
- p = profiler.PassiveProfiler(watermark, halflife, pollfreq, parentframe)
- p.run(memory, time, memorymax, timemax)
-
-
-def runonly(memory=None, time=None, memorymax=None, timemax=None):
- """Nonprofiler.
-
-XXX check if this is useful and document."""
- if PYTHON_SUPPORT:
- import profiler
- p = profiler.RunOnly()
- p.run(memory, time, memorymax, timemax)
-
-
-def stop():
- """Turn off all automatic compilation. bind() calls remain in effect."""
- import profiler
- profiler.go([])
-
-
-def log(logfile='', mode='w', top=10):
- """Enable logging to the given file.
-
-If the file name is unspecified, a default name is built by appending
-a 'log-psyco' extension to the main script name.
-
-Mode is 'a' to append to a possibly existing file or 'w' to overwrite
-an existing file. Note that the log file may grow quickly in 'a' mode."""
- import profiler, logger
- if not logfile:
- import os
- logfile, dummy = os.path.splitext(sys.argv[0])
- if os.path.basename(logfile):
- logfile += '.'
- logfile += 'log-psyco'
- if hasattr(_psyco, 'VERBOSE_LEVEL'):
- print >> sys.stderr, 'psyco: logging to', logfile
- # logger.current should be a real file object; subtle problems
- # will show up if its write() and flush() methods are written
- # in Python, as Psyco will invoke them while compiling.
- logger.current = open(logfile, mode)
- logger.print_charges = top
- profiler.logger = logger
- logger.writedate('Logging started')
- cannotcompile(logger.psycowrite)
- _psyco.statwrite(logger=logger.psycowrite)
-
-
-def bind(x, rec=None):
- """Enable compilation of the given function, method, or class object.
-
-If C is a class (or anything with a '__dict__' attribute), bind(C) will
-rebind all functions and methods found in C.__dict__ (which means, for
-classes, all methods defined in the class but not in its parents).
-
-The optional second argument specifies the number of recursive
-compilation levels: all functions called by func are compiled
-up to the given depth of indirection."""
- if isinstance(x, types.MethodType):
- x = x.im_func
- if isinstance(x, types.FunctionType):
- if rec is None:
- x.func_code = _psyco.proxycode(x)
- else:
- x.func_code = _psyco.proxycode(x, rec)
- return
- if hasattr(x, '__dict__'):
- funcs = [o for o in x.__dict__.values()
- if isinstance(o, types.MethodType)
- or isinstance(o, types.FunctionType)]
- if not funcs:
- raise error, ("nothing bindable found in %s object" %
- type(x).__name__)
- for o in funcs:
- bind(o, rec)
- return
- raise TypeError, "cannot bind %s objects" % type(x).__name__
-
-
-def unbind(x):
- """Reverse of bind()."""
- if isinstance(x, types.MethodType):
- x = x.im_func
- if isinstance(x, types.FunctionType):
- try:
- f = _psyco.unproxycode(x.func_code)
- except error:
- pass
- else:
- x.func_code = f.func_code
- return
- if hasattr(x, '__dict__'):
- for o in x.__dict__.values():
- if (isinstance(o, types.MethodType)
- or isinstance(o, types.FunctionType)):
- unbind(o)
- return
- raise TypeError, "cannot unbind %s objects" % type(x).__name__
-
-
-def proxy(x, rec=None):
- """Return a Psyco-enabled copy of the function.
-
-The original function is still available for non-compiled calls.
-The optional second argument specifies the number of recursive
-compilation levels: all functions called by func are compiled
-up to the given depth of indirection."""
- if isinstance(x, types.FunctionType):
- if rec is None:
- code = _psyco.proxycode(x)
- else:
- code = _psyco.proxycode(x, rec)
- return new.function(code, x.func_globals, x.func_name)
- if isinstance(x, types.MethodType):
- p = proxy(x.im_func, rec)
- return new.instancemethod(p, x.im_self, x.im_class)
- raise TypeError, "cannot proxy %s objects" % type(x).__name__
-
-
-def unproxy(proxy):
- """Return a new copy of the original function of method behind a proxy.
-The result behaves like the original function in that calling it
-does not trigger compilation nor execution of any compiled code."""
- if isinstance(proxy, types.FunctionType):
- return _psyco.unproxycode(proxy.func_code)
- if isinstance(proxy, types.MethodType):
- f = unproxy(proxy.im_func)
- return new.instancemethod(f, proxy.im_self, proxy.im_class)
- raise TypeError, "%s objects cannot be proxies" % type(proxy).__name__
-
-
-def cannotcompile(x):
- """Instruct Psyco never to compile the given function, method
-or code object."""
- if isinstance(x, types.MethodType):
- x = x.im_func
- if isinstance(x, types.FunctionType):
- x = x.func_code
- if isinstance(x, types.CodeType):
- _psyco.cannotcompile(x)
- else:
- raise TypeError, "unexpected %s object" % type(x).__name__
-
-
-def dumpcodebuf():
- """Write in file psyco.dump a copy of the emitted machine code,
-provided Psyco was compiled with a non-zero CODE_DUMP.
-See py-utils/httpxam.py to examine psyco.dump."""
- if hasattr(_psyco, 'dumpcodebuf'):
- _psyco.dumpcodebuf()
-
-
-###########################################################################
-# Psyco variables
-# error * the error raised by Psyco
-# warning * the warning raised by Psyco
-# __in_psyco__ * a new built-in variable which is always zero, but which
-# Psyco special-cases by returning 1 instead. So
-# __in_psyco__ can be used in a function to know if
-# that function is being executed by Psyco or not.
diff --git a/scripts/training/cmert-0.5/python/psyco/kdictproxy.py b/scripts/training/cmert-0.5/python/psyco/kdictproxy.py
deleted file mode 100644
index 710677b00..000000000
--- a/scripts/training/cmert-0.5/python/psyco/kdictproxy.py
+++ /dev/null
@@ -1,133 +0,0 @@
-###########################################################################
-#
-# Support code for the 'psyco.compact' type.
-
-from __future__ import generators
-
-try:
- from UserDict import DictMixin
-except ImportError:
-
- # backported from Python 2.3 to Python 2.2
- class DictMixin:
- # Mixin defining all dictionary methods for classes that already have
- # a minimum dictionary interface including getitem, setitem, delitem,
- # and keys. Without knowledge of the subclass constructor, the mixin
- # does not define __init__() or copy(). In addition to the four base
- # methods, progressively more efficiency comes with defining
- # __contains__(), __iter__(), and iteritems().
-
- # second level definitions support higher levels
- def __iter__(self):
- for k in self.keys():
- yield k
- def has_key(self, key):
- try:
- value = self[key]
- except KeyError:
- return False
- return True
- def __contains__(self, key):
- return self.has_key(key)
-
- # third level takes advantage of second level definitions
- def iteritems(self):
- for k in self:
- yield (k, self[k])
- def iterkeys(self):
- return self.__iter__()
-
- # fourth level uses definitions from lower levels
- def itervalues(self):
- for _, v in self.iteritems():
- yield v
- def values(self):
- return [v for _, v in self.iteritems()]
- def items(self):
- return list(self.iteritems())
- def clear(self):
- for key in self.keys():
- del self[key]
- def setdefault(self, key, default):
- try:
- return self[key]
- except KeyError:
- self[key] = default
- return default
- def pop(self, key, *args):
- if len(args) > 1:
- raise TypeError, "pop expected at most 2 arguments, got "\
- + repr(1 + len(args))
- try:
- value = self[key]
- except KeyError:
- if args:
- return args[0]
- raise
- del self[key]
- return value
- def popitem(self):
- try:
- k, v = self.iteritems().next()
- except StopIteration:
- raise KeyError, 'container is empty'
- del self[k]
- return (k, v)
- def update(self, other):
- # Make progressively weaker assumptions about "other"
- if hasattr(other, 'iteritems'): # iteritems saves memory and lookups
- for k, v in other.iteritems():
- self[k] = v
- elif hasattr(other, '__iter__'): # iter saves memory
- for k in other:
- self[k] = other[k]
- else:
- for k in other.keys():
- self[k] = other[k]
- def get(self, key, default=None):
- try:
- return self[key]
- except KeyError:
- return default
- def __repr__(self):
- return repr(dict(self.iteritems()))
- def __cmp__(self, other):
- if other is None:
- return 1
- if isinstance(other, DictMixin):
- other = dict(other.iteritems())
- return cmp(dict(self.iteritems()), other)
- def __len__(self):
- return len(self.keys())
-
-###########################################################################
-
-from _psyco import compact # Python 2.2 and above only
-
-
-class compactdictproxy(DictMixin):
-
- def __init__(self, ko):
- self._ko = ko # compact object of which 'self' is the dict
-
- def __getitem__(self, key):
- return compact.__getslot__(self._ko, key)
-
- def __setitem__(self, key, value):
- compact.__setslot__(self._ko, key, value)
-
- def __delitem__(self, key):
- compact.__delslot__(self._ko, key)
-
- def keys(self):
- return compact.__members__.__get__(self._ko)
-
- def clear(self):
- keys = self.keys()
- keys.reverse()
- for key in keys:
- del self[key]
-
- def __repr__(self):
- keys = ', '.join(self.keys())
- return '<compactdictproxy object {%s}>' % (keys,)
diff --git a/scripts/training/cmert-0.5/python/psyco/logger.py b/scripts/training/cmert-0.5/python/psyco/logger.py
deleted file mode 100644
index 33cb90a4e..000000000
--- a/scripts/training/cmert-0.5/python/psyco/logger.py
+++ /dev/null
@@ -1,90 +0,0 @@
-###########################################################################
-#
-# Psyco logger.
-# Copyright (C) 2001-2002 Armin Rigo et.al.
-
-"""Psyco logger.
-
-See log() in core.py.
-"""
-###########################################################################
-
-
-import _psyco
-from time import time, localtime, strftime
-
-
-current = None
-print_charges = 10
-dump_delay = 0.2
-dump_last = 0.0
-
-def write(s, level):
- t = time()
- f = t-int(t)
- current.write("%s.%02d %-*s%s\n" % (
- strftime("%X", localtime(int(t))),
- int(f*100.0), 63-level, s,
- "%"*level))
- current.flush()
-
-def psycowrite(s):
- t = time()
- f = t-int(t)
- current.write("%s.%02d %-*s%s\n" % (
- strftime("%X", localtime(int(t))),
- int(f*100.0), 60, s.strip(),
- "% %"))
- current.flush()
-
-##def writelines(lines, level=0):
-## if lines:
-## t = time()
-## f = t-int(t)
-## timedesc = strftime("%x %X", localtime(int(t)))
-## print >> current, "%s.%03d %-*s %s" % (
-## timedesc, int(f*1000),
-## 50-level, lines[0],
-## "+"*level)
-## timedesc = " " * (len(timedesc)+5)
-## for line in lines[1:]:
-## print >> current, timedesc, line
-
-def writememory():
- write("memory usage: %d+ kb" % _psyco.memory(), 1)
-
-def dumpcharges():
- global dump_last
- if print_charges:
- t = time()
- if not (dump_last <= t < dump_last+dump_delay):
- if t <= dump_last+1.5*dump_delay:
- dump_last += dump_delay
- else:
- dump_last = t
- #write("%s: charges:" % who, 0)
- lst = _psyco.stattop(print_charges)
- if lst:
- f = t-int(t)
- lines = ["%s.%02d ______\n" % (
- strftime("%X", localtime(int(t))),
- int(f*100.0))]
- i = 1
- for co, charge in lst:
- detail = co.co_filename
- if len(detail) > 19:
- detail = '...' + detail[-17:]
- lines.append(" #%-3d |%4.1f %%| %-26s%20s:%d\n" %
- (i, charge*100.0, co.co_name, detail,
- co.co_firstlineno))
- i += 1
- current.writelines(lines)
- current.flush()
-
-def writefinalstats():
- dumpcharges()
- writememory()
- writedate("program exit")
-
-def writedate(msg):
- write('%s, %s' % (msg, strftime("%x")), 20)
diff --git a/scripts/training/cmert-0.5/python/psyco/profiler.py b/scripts/training/cmert-0.5/python/psyco/profiler.py
deleted file mode 100644
index ef7bf8e59..000000000
--- a/scripts/training/cmert-0.5/python/psyco/profiler.py
+++ /dev/null
@@ -1,388 +0,0 @@
-###########################################################################
-#
-# Psyco profiler (Python part).
-# Copyright (C) 2001-2002 Armin Rigo et.al.
-
-"""Psyco profiler (Python part).
-
-The implementation of the non-time-critical parts of the profiler.
-See profile() and full() in core.py for the easy interface.
-"""
-###########################################################################
-
-import _psyco
-from support import *
-import math, time, types, atexit
-now = time.time
-try:
- import thread
-except ImportError:
- import dummy_thread as thread
-
-
-# current profiler instance
-current = None
-
-# enabled profilers, in order of priority
-profilers = []
-
-# logger module (when enabled by core.log())
-logger = None
-
-# a lock for a thread-safe go()
-go_lock = thread.allocate_lock()
-
-def go(stop=0):
- # run the highest-priority profiler in 'profilers'
- global current
- go_lock.acquire()
- try:
- prev = current
- if stop:
- del profilers[:]
- if prev:
- if profilers and profilers[0] is prev:
- return # best profiler already running
- prev.stop()
- current = None
- for p in profilers[:]:
- if p.start():
- current = p
- if logger: # and p is not prev:
- logger.write("%s: starting" % p.__class__.__name__, 5)
- return
- finally:
- go_lock.release()
- # no profiler is running now
- if stop:
- if logger:
- logger.writefinalstats()
- else:
- tag2bind()
-
-atexit.register(go, 1)
-
-
-def buildfncache(globals, cache):
- if hasattr(types.IntType, '__dict__'):
- clstypes = (types.ClassType, types.TypeType)
- else:
- clstypes = types.ClassType
- for x in globals.values():
- if isinstance(x, types.MethodType):
- x = x.im_func
- if isinstance(x, types.FunctionType):
- cache[x.func_code] = x, ''
- elif isinstance(x, clstypes):
- for y in x.__dict__.values():
- if isinstance(y, types.MethodType):
- y = y.im_func
- if isinstance(y, types.FunctionType):
- cache[y.func_code] = y, x.__name__
-
-# code-to-function mapping (cache)
-function_cache = {}
-
-def trytobind(co, globals, log=1):
- try:
- f, clsname = function_cache[co]
- except KeyError:
- buildfncache(globals, function_cache)
- try:
- f, clsname = function_cache[co]
- except KeyError:
- if logger:
- logger.write('warning: cannot find function %s in %s' %
- (co.co_name, globals.get('__name__', '?')), 3)
- return # give up
- if logger and log:
- modulename = globals.get('__name__', '?')
- if clsname:
- modulename += '.' + clsname
- logger.write('bind function: %s.%s' % (modulename, co.co_name), 1)
- f.func_code = _psyco.proxycode(f)
-
-
-if PYTHON_SUPPORT:
- # the list of code objects that have been tagged
- tagged_codes = []
-
- def tag(co, globals):
- if logger:
- try:
- f, clsname = function_cache[co]
- except KeyError:
- buildfncache(globals, function_cache)
- try:
- f, clsname = function_cache[co]
- except KeyError:
- clsname = '' # give up
- modulename = globals.get('__name__', '?')
- if clsname:
- modulename += '.' + clsname
- logger.write('tag function: %s.%s' % (modulename, co.co_name), 1)
- tagged_codes.append((co, globals))
- _psyco.turbo_frame(co)
- _psyco.turbo_code(co)
-
- def tag2bind():
- if tagged_codes:
- if logger:
- logger.write('profiling stopped, binding %d functions' %
- len(tagged_codes), 2)
- for co, globals in tagged_codes:
- trytobind(co, globals, 0)
- function_cache.clear()
- del tagged_codes[:]
-
-else:
- # tagging is impossible, always bind
- tag = trytobind
- def tag2bind():
- pass
-
-
-
-class Profiler:
- MemoryTimerResolution = 0.103
-
- def run(self, memory, time, memorymax, timemax):
- self.memory = memory
- self.memorymax = memorymax
- self.time = time
- if timemax is None:
- self.endtime = None
- else:
- self.endtime = now() + timemax
- self.alarms = []
- profilers.append(self)
- go()
-
- def start(self):
- curmem = _psyco.memory()
- memlimits = []
- if self.memorymax is not None:
- if curmem >= self.memorymax:
- if logger:
- logger.writememory()
- return self.limitreached('memorymax')
- memlimits.append(self.memorymax)
- if self.memory is not None:
- if self.memory <= 0:
- if logger:
- logger.writememory()
- return self.limitreached('memory')
- memlimits.append(curmem + self.memory)
- self.memory_at_start = curmem
-
- curtime = now()
- timelimits = []
- if self.endtime is not None:
- if curtime >= self.endtime:
- return self.limitreached('timemax')
- timelimits.append(self.endtime - curtime)
- if self.time is not None:
- if self.time <= 0.0:
- return self.limitreached('time')
- timelimits.append(self.time)
- self.time_at_start = curtime
-
- try:
- self.do_start()
- except error, e:
- if logger:
- logger.write('%s: disabled by psyco.error:' % (
- self.__class__.__name__), 4)
- logger.write(' %s' % str(e), 3)
- return 0
-
- if memlimits:
- self.memlimits_args = (time.sleep, (self.MemoryTimerResolution,),
- self.check_memory, (min(memlimits),))
- self.alarms.append(_psyco.alarm(*self.memlimits_args))
- if timelimits:
- self.alarms.append(_psyco.alarm(time.sleep, (min(timelimits),),
- self.time_out))
- return 1
-
- def stop(self):
- for alarm in self.alarms:
- alarm.stop(0)
- for alarm in self.alarms:
- alarm.stop(1) # wait for parallel threads to stop
- del self.alarms[:]
- if self.time is not None:
- self.time -= now() - self.time_at_start
- if self.memory is not None:
- self.memory -= _psyco.memory() - self.memory_at_start
-
- try:
- self.do_stop()
- except error:
- return 0
- return 1
-
- def check_memory(self, limit):
- if _psyco.memory() < limit:
- return self.memlimits_args
- go()
-
- def time_out(self):
- self.time = 0.0
- go()
-
- def limitreached(self, limitname):
- try:
- profilers.remove(self)
- except ValueError:
- pass
- if logger:
- logger.write('%s: disabled (%s limit reached)' % (
- self.__class__.__name__, limitname), 4)
- return 0
-
-
-class FullCompiler(Profiler):
-
- def do_start(self):
- _psyco.profiling('f')
-
- def do_stop(self):
- _psyco.profiling('.')
-
-
-class RunOnly(Profiler):
-
- def do_start(self):
- _psyco.profiling('n')
-
- def do_stop(self):
- _psyco.profiling('.')
-
-
-class ChargeProfiler(Profiler):
-
- def __init__(self, watermark, parentframe):
- self.watermark = watermark
- self.parent2 = parentframe * 2.0
- self.lock = thread.allocate_lock()
-
- def init_charges(self):
- _psyco.statwrite(watermark = self.watermark,
- parent2 = self.parent2)
-
- def do_stop(self):
- _psyco.profiling('.')
- _psyco.statwrite(callback = None)
-
-
-class ActiveProfiler(ChargeProfiler):
-
- def active_start(self):
- _psyco.profiling('p')
-
- def do_start(self):
- self.init_charges()
- self.active_start()
- _psyco.statwrite(callback = self.charge_callback)
-
- def charge_callback(self, frame, charge):
- tag(frame.f_code, frame.f_globals)
-
-
-class PassiveProfiler(ChargeProfiler):
-
- initial_charge_unit = _psyco.statread('unit')
- reset_stats_after = 120 # half-lives (maximum 200!)
- reset_limit = initial_charge_unit * (2.0 ** reset_stats_after)
-
- def __init__(self, watermark, halflife, pollfreq, parentframe):
- ChargeProfiler.__init__(self, watermark, parentframe)
- self.pollfreq = pollfreq
- # self.progress is slightly more than 1.0, and computed so that
- # do_profile() will double the change_unit every 'halflife' seconds.
- self.progress = 2.0 ** (1.0 / (halflife * pollfreq))
-
- def reset(self):
- _psyco.statwrite(unit = self.initial_charge_unit, callback = None)
- _psyco.statreset()
- if logger:
- logger.write("%s: resetting stats" % self.__class__.__name__, 1)
-
- def passive_start(self):
- self.passivealarm_args = (time.sleep, (1.0 / self.pollfreq,),
- self.do_profile)
- self.alarms.append(_psyco.alarm(*self.passivealarm_args))
-
- def do_start(self):
- tag2bind()
- self.init_charges()
- self.passive_start()
-
- def do_profile(self):
- _psyco.statcollect()
- if logger:
- logger.dumpcharges()
- nunit = _psyco.statread('unit') * self.progress
- if nunit > self.reset_limit:
- self.reset()
- else:
- _psyco.statwrite(unit = nunit, callback = self.charge_callback)
- return self.passivealarm_args
-
- def charge_callback(self, frame, charge):
- trytobind(frame.f_code, frame.f_globals)
-
-
-class ActivePassiveProfiler(PassiveProfiler, ActiveProfiler):
-
- def do_start(self):
- self.init_charges()
- self.active_start()
- self.passive_start()
-
- def charge_callback(self, frame, charge):
- tag(frame.f_code, frame.f_globals)
-
-
-
-#
-# we register our own version of sys.settrace(), sys.setprofile()
-# and thread.start_new_thread().
-#
-
-def psyco_settrace(*args, **kw):
- "This is the Psyco-aware version of sys.settrace()."
- result = original_settrace(*args, **kw)
- go()
- return result
-
-def psyco_setprofile(*args, **kw):
- "This is the Psyco-aware version of sys.setprofile()."
- result = original_setprofile(*args, **kw)
- go()
- return result
-
-def psyco_thread_stub(callable, args, kw):
- _psyco.statcollect()
- if kw is None:
- return callable(*args)
- else:
- return callable(*args, **kw)
-
-def psyco_start_new_thread(callable, args, kw=None):
- "This is the Psyco-aware version of thread.start_new_thread()."
- return original_start_new_thread(psyco_thread_stub, (callable, args, kw))
-
-original_settrace = sys.settrace
-original_setprofile = sys.setprofile
-original_start_new_thread = thread.start_new_thread
-sys.settrace = psyco_settrace
-sys.setprofile = psyco_setprofile
-if PYTHON_SUPPORT:
- thread.start_new_thread = psyco_start_new_thread
- # hack to patch threading._start_new_thread if the module is
- # already loaded
- if (sys.modules.has_key('threading') and
- hasattr(sys.modules['threading'], '_start_new_thread')):
- sys.modules['threading']._start_new_thread = psyco_start_new_thread
diff --git a/scripts/training/cmert-0.5/python/psyco/support.py b/scripts/training/cmert-0.5/python/psyco/support.py
deleted file mode 100644
index 596ca7f59..000000000
--- a/scripts/training/cmert-0.5/python/psyco/support.py
+++ /dev/null
@@ -1,196 +0,0 @@
-###########################################################################
-#
-# Psyco general support module.
-# Copyright (C) 2001-2002 Armin Rigo et.al.
-
-"""Psyco general support module.
-
-For internal use.
-"""
-###########################################################################
-
-import sys, _psyco, __builtin__
-
-error = _psyco.error
-class warning(Warning):
- pass
-
-_psyco.NoLocalsWarning = warning
-
-def warn(msg):
- from warnings import warn
- warn(msg, warning, stacklevel=2)
-
-#
-# Version checks
-#
-__version__ = 0x010500f0
-if _psyco.PSYVER != __version__:
- raise error, "version mismatch between Psyco parts, reinstall it"
-
-version_info = (__version__ >> 24,
- (__version__ >> 16) & 0xff,
- (__version__ >> 8) & 0xff,
- {0xa0: 'alpha',
- 0xb0: 'beta',
- 0xc0: 'candidate',
- 0xf0: 'final'}[__version__ & 0xf0],
- __version__ & 0xf)
-
-
-VERSION_LIMITS = [0x02010000, # 2.1
- 0x02020000, # 2.2
- 0x02020200, # 2.2.2
- 0x02030000, # 2.3
- 0x02040000] # 2.4
-
-if ([v for v in VERSION_LIMITS if v <= sys.hexversion] !=
- [v for v in VERSION_LIMITS if v <= _psyco.PYVER ]):
- if sys.hexversion < VERSION_LIMITS[0]:
- warn("Psyco requires Python version 2.1 or later")
- else:
- warn("Psyco version does not match Python version. "
- "Psyco must be updated or recompiled")
-
-PYTHON_SUPPORT = hasattr(_psyco, 'turbo_code')
-
-
-if hasattr(_psyco, 'ALL_CHECKS') and hasattr(_psyco, 'VERBOSE_LEVEL'):
- print >> sys.stderr, ('psyco: running in debugging mode on %s' %
- _psyco.PROCESSOR)
-
-
-###########################################################################
-# sys._getframe() gives strange results on a mixed Psyco- and Python-style
-# stack frame. Psyco provides a replacement that partially emulates Python
-# frames from Psyco frames. The new sys._getframe() may return objects of
-# a custom "Psyco frame" type, which with Python >=2.2 is a subtype of the
-# normal frame type.
-#
-# The same problems require some other built-in functions to be replaced
-# as well. Note that the local variables are not available in any
-# dictionary with Psyco.
-
-
-class Frame:
- pass
-
-
-class PythonFrame(Frame):
-
- def __init__(self, frame):
- self.__dict__.update({
- '_frame': frame,
- })
-
- def __getattr__(self, attr):
- if attr == 'f_back':
- try:
- result = embedframe(_psyco.getframe(self._frame))
- except ValueError:
- result = None
- except error:
- warn("f_back is skipping dead Psyco frames")
- result = self._frame.f_back
- self.__dict__['f_back'] = result
- return result
- else:
- return getattr(self._frame, attr)
-
- def __setattr__(self, attr, value):
- setattr(self._frame, attr, value)
-
- def __delattr__(self, attr):
- delattr(self._frame, attr)
-
-
-class PsycoFrame(Frame):
-
- def __init__(self, tag):
- self.__dict__.update({
- '_tag' : tag,
- 'f_code' : tag[0],
- 'f_globals': tag[1],
- })
-
- def __getattr__(self, attr):
- if attr == 'f_back':
- try:
- result = embedframe(_psyco.getframe(self._tag))
- except ValueError:
- result = None
- elif attr == 'f_lineno':
- result = self.f_code.co_firstlineno # better than nothing
- elif attr == 'f_builtins':
- result = self.f_globals['__builtins__']
- elif attr == 'f_restricted':
- result = self.f_builtins is not __builtins__
- elif attr == 'f_locals':
- raise AttributeError, ("local variables of functions run by Psyco "
- "cannot be accessed in any way, sorry")
- else:
- raise AttributeError, ("emulated Psyco frames have "
- "no '%s' attribute" % attr)
- self.__dict__[attr] = result
- return result
-
- def __setattr__(self, attr, value):
- raise AttributeError, "Psyco frame objects are read-only"
-
- def __delattr__(self, attr):
- if attr == 'f_trace':
- # for bdb which relies on CPython frames exhibiting a slightly
- # buggy behavior: you can 'del f.f_trace' as often as you like
- # even without having set it previously.
- return
- raise AttributeError, "Psyco frame objects are read-only"
-
-
-def embedframe(result):
- if type(result) is type(()):
- return PsycoFrame(result)
- else:
- return PythonFrame(result)
-
-def _getframe(depth=0):
- """Return a frame object from the call stack. This is a replacement for
-sys._getframe() which is aware of Psyco frames.
-
-The returned objects are instances of either PythonFrame or PsycoFrame
-instead of being real Python-level frame object, so that they can emulate
-the common attributes of frame objects.
-
-The original sys._getframe() ignoring Psyco frames altogether is stored in
-psyco._getrealframe(). See also psyco._getemulframe()."""
- # 'depth+1' to account for this _getframe() Python function
- return embedframe(_psyco.getframe(depth+1))
-
-def _getemulframe(depth=0):
- """As _getframe(), but the returned objects are real Python frame objects
-emulating Psyco frames. Some of their attributes can be wrong or missing,
-however."""
- # 'depth+1' to account for this _getemulframe() Python function
- return _psyco.getframe(depth+1, 1)
-
-def patch(name, module=__builtin__):
- f = getattr(_psyco, name)
- org = getattr(module, name)
- if org is not f:
- setattr(module, name, f)
- setattr(_psyco, 'original_' + name, org)
-
-_getrealframe = sys._getframe
-sys._getframe = _getframe
-patch('globals')
-patch('eval')
-patch('execfile')
-patch('locals')
-patch('vars')
-patch('dir')
-patch('input')
-_psyco.original_raw_input = raw_input
-__builtin__.__in_psyco__ = 0==1 # False
-
-if hasattr(_psyco, 'compact'):
- import kdictproxy
- _psyco.compactdictproxy = kdictproxy.compactdictproxy
diff --git a/scripts/training/cmert-0.5/run-cmert b/scripts/training/cmert-0.5/run-cmert
deleted file mode 100755
index 52ebc1723..000000000
--- a/scripts/training/cmert-0.5/run-cmert
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-
-unset LANG
-export PATH=$PATH:/group/project/statmt/pkoehn/user/abhishek:/group/project/statmt/pkoehn/user/abhishek/cmert-0.5
-export EVAL=/group/project/statmt/pkoehn/user/abhishek/WST05/fr-en-train/dev
-
-mert-driver cmert-work $EVAL/low.test400.fr.rest $EVAL/low.test400.en 100 pharaoh.2005-07-21 "-config /group/project/statmt/pkoehn/user/abhishek/WST05/fr-en-train/model/pharaoh.ini -dl 4 -b 0.1 -ttable-limit 100" "0.2,0-1;0.2,0.2-0.2;0.2,0-1;0.2,0-1;0.2,0-1;0.2,0-1;0.2,-1-1;0.2,-1-1"
-
diff --git a/scripts/training/cmert-0.5/score-nbest.py b/scripts/training/cmert-0.5/score-nbest.py
deleted file mode 100755
index c89c994a8..000000000
--- a/scripts/training/cmert-0.5/score-nbest.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/python2.3
-
-"""Convert n-best list in mert.perl format to format required by
-Venugopal's MER trainer. This entails calculating the BLEU component scores."""
-
-"""usage: score-nbest.py <reffile>+ <outprefix>
-
- The input should be sorted by sentence number and piped into stdin
- Run it like this: sort -mnk 1,1 *.nbest | score-nbest.py ...
-"""
-
-import sys, itertools, re
-import bleu
-#The default python version on DICE is currently 2.3, which does not contain sets as a built-in module.
-#Comment out this line when moving to python 2.4
-from sets import Set as set
-
-def process(sentnum, testsents):
- candsfile.write("%d %d\n" % (cur_sentnum, len(testsents)))
- for (sent,vector) in testsents:
- comps = bleu.cook_test(sent, cookedrefs[sentnum])
- if comps['testlen'] != comps['guess'][0]:
- sys.stderr.write("ERROR: test length != guessed 1-grams\n")
- featsfile.write("%s %s %d\n" % (" ".join([str(v) for v in vector]),
- " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]),
- comps['reflen']))
-
-
-if __name__ == "__main__":
- import psyco
- psyco.full()
-
- import getopt
- (opts,args) = getopt.getopt(sys.argv[1:], "casn", [])
-
- for (opt,parm) in opts:
- if opt == "-c":
- bleu.preserve_case = True
- if opt == "-a":
- bleu.eff_ref_len = "average"
- if opt == "-s":
- bleu.eff_ref_len = "shortest"
- if opt == "-n":
- bleu.nonorm = 1
-
- print args
- cookedrefs = []
- reffiles = [file(name) for name in args[:-1]]
- print reffiles
- for refs in itertools.izip(*reffiles):
- cookedrefs.append(bleu.cook_refs(refs))
-
- outprefix = args[-1]
-
- featsfile = file(outprefix+"feats.opt", "w")
- candsfile = file(outprefix+"cands.opt", "w")
-
- cur_sentnum = None
- testsents = set()
- progress = 0
-
- infile = sys.stdin
-
- # function that recognizes floats
- re_float=re.compile(r'^-?[-0-9.e]+$')
- is_float=lambda(x):re_float.match(x)
-
- for line in infile:
- try:
- ##Changed to add a further field - AA 29/11/05
- #(sentnum, sent, vector) = line.split('|||')
- (sentnum, sent, vector, prob ) = line.split('|||')
- except:
- sys.stderr.write("ERROR: bad input line %s\n" % line)
- sentnum = int(sentnum)
- sent = " ".join(sent.split())
- # filter out score labels (keep only floats) and convert numbers to floats
- vector = tuple(map(lambda(s): -float(s), filter(is_float, vector.split())))
-
- if sentnum != cur_sentnum:
- if cur_sentnum is not None:
- process(cur_sentnum, testsents)
- cur_sentnum = sentnum
- testsents = set()
- testsents.add((sent,vector))
-
- if progress % 10000 == 0:
- sys.stdout.write(".")
- sys.stdout.flush()
-
- progress += 1
- process(cur_sentnum, testsents)
-
- sys.stdout.write("\n")
- featsfile.close()
- candsfile.close()
-
-
-
-
diff --git a/scripts/training/cmert-0.5/score.c b/scripts/training/cmert-0.5/score.c
deleted file mode 100755
index 5b939a3c8..000000000
--- a/scripts/training/cmert-0.5/score.c
+++ /dev/null
@@ -1,33 +0,0 @@
-#include <math.h>
-#include <stdio.h>
-
-#include "score.h"
-
-int comps_n = 9;
-
-void comps_addto(int *comps1, int *comps2) {
- int i;
- for (i=0; i<comps_n; i++)
- comps1[i] += comps2[i];
-}
-
-float compute_score(int *comps) {
- float logbleu = 0.0, brevity;
- int i;
- int n = (comps_n-1)/2;
-
- /*for (i=0; i<comps_n; i++)
- fprintf(stderr, " %d", comps[i]);
- fprintf(stderr, "\n");*/
-
- for (i=0; i<n; i++) {
- if (comps[2*i] == 0)
- return 0.0;
- logbleu += log(comps[2*i])-log(comps[2*i+1]);
- }
- logbleu /= n;
- brevity = 1.0-(float)comps[comps_n-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length
- if (brevity < 0.0)
- logbleu += brevity;
- return exp(logbleu);
-}
diff --git a/scripts/training/cmert-0.5/score.h b/scripts/training/cmert-0.5/score.h
deleted file mode 100755
index 5202645f6..000000000
--- a/scripts/training/cmert-0.5/score.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef SCORE_H
-#define SCORE_H
-
-extern int comps_n;
-
-void comps_addto(int *comps1, int *comps2);
-float compute_score(int *comps);
-
-#endif
diff --git a/scripts/training/filter-model-given-input.pl b/scripts/training/filter-model-given-input.pl
deleted file mode 100755
index 0e65b1640..000000000
--- a/scripts/training/filter-model-given-input.pl
+++ /dev/null
@@ -1,206 +0,0 @@
-#!/usr/bin/perl -w
-# Given a moses.ini file and an input text prepare minimized translation
-# tables and a new moses.ini, so that loading of tables is much faster.
-
-# original code by Philipp Koehn
-# changes by Ondrej Bojar
-
-use strict;
-
-my $MAX_LENGTH = 10;
-# consider phrases in input up to this length
-# in other words, all phrase-tables will be truncated at least to 10 words per
-# phrase
-
-my $dir = shift;
-my $config = shift;
-my $input = shift;
-
-if (!defined $dir || !defined $config || !defined $input) {
- print STDERR "usage: filter-model-given-input.pl targetdir moses.ini input.text\n";
- exit 1;
-}
-
-$dir = ensure_full_path($dir);
-
-# buggy directory in place?
-if (-d $dir && ! -e "$dir/info") {
- print STDERR "The directory $dir exists but does not belong to me. Delete $dir!\n";
- exit(1);
-}
-
-# already filtered? check if it can be re-used
-if (-d $dir) {
- my @INFO = `cat $dir/info`;
- chop(@INFO);
- if($INFO[0] ne $config
- || ($INFO[1] ne $input &&
- $INFO[1].".tagged" ne $input)) {
- print STDERR "WARNING: directory exists but does not match parameters:\n";
- print STDERR " ($INFO[0] ne $config || $INFO[1] ne $input)\n";
- exit 1;
- }
- print STDERR "The filtered model was ready in $dir, not doing anything.\n";
- exit 0;
-}
-
-
-# filter the translation and distortion tables
-safesystem("mkdir -p $dir") or die "Can't mkdir $dir";
-
-# get tables to be filtered (and modify config file)
-my (@TABLE,@TABLE_FACTORS,@TABLE_NEW_NAME,%CONSIDER_FACTORS);
-open(INI_OUT,">$dir/moses.ini") or die "Can't write $dir/moses.ini";
-open(INI,$config) or die "Can't read $config";
-while(<INI>) {
- print INI_OUT $_;
- if (/ttable-file\]/) {
- while(1) {
- my $table_spec = <INI>;
- if ($table_spec !~ /^([\d\-]+) ([\d\-]+) (\d+) (\S+)$/) {
- print INI_OUT $table_spec;
- last;
- }
- my ($source_factor,$t,$w,$file) = ($1,$2,$3,$4);
-
- chomp($file);
- push @TABLE, $file;
-
- my $new_name = "$dir/phrase-table.$source_factor-$t";
- print INI_OUT "$source_factor $t $w $new_name\n";
- push @TABLE_NEW_NAME,$new_name;
-
- $CONSIDER_FACTORS{$source_factor} = 1;
- print STDERR "Considering factor $source_factor\n";
- push @TABLE_FACTORS, $source_factor;
- }
- }
- elsif (/distortion-file/) {
- while(1) {
- my $table_spec = <INI>;
- if ($table_spec !~ /^([\d\-]+) ([\d\-]+) (\d+) (\S+)$/) {
- print INI_OUT $table_spec;
- last;
- }
- my ($source_factor,$t,$w,$file) = ($1,$2,$3,$4);
-
- chomp($file);
- push @TABLE,$file;
-
- $file =~ s/^.*\/+([^\/]+)/$1/g;
- my $new_name = "$dir/$file";
- print INI_OUT "$source_factor $t $w $new_name\n";
- push @TABLE_NEW_NAME,$new_name;
-
- $CONSIDER_FACTORS{$source_factor} = 1;
- print STDERR "Considering factor $source_factor\n";
- push @TABLE_FACTORS,$source_factor;
- }
- }
-}
-close(INI);
-close(INI_OUT);
-
-
-# get the phrase pairs appearing in the input text, up to the $MAX_LENGTH
-my %PHRASE_USED;
-open(INPUT,$input) or die "Can't read $input";
-while(my $line = <INPUT>) {
- chomp($line);
- my @WORD = split(/ +/,$line);
- for(my $i=0;$i<=$#WORD;$i++) {
- for(my $j=0;$j<$MAX_LENGTH && $j+$i<=$#WORD;$j++) {
- foreach (keys %CONSIDER_FACTORS) {
- my @FACTOR = split(/,/);
- my $phrase = "";
- for(my $k=$i;$k<=$i+$j;$k++) {
- my @WORD_FACTOR = split(/\|/,$WORD[$k]);
- for(my $f=0;$f<=$#FACTOR;$f++) {
- $phrase .= $WORD_FACTOR[$FACTOR[$f]]."|";
- }
- chop($phrase);
- $phrase .= " ";
- }
- chop($phrase);
- $PHRASE_USED{$_}{$phrase}++;
- }
- }
- }
-}
-close(INPUT);
-
-# filter files
-for(my $i=0;$i<=$#TABLE;$i++) {
- my ($used,$total) = (0,0);
- my $file = $TABLE[$i];
- my $factors = $TABLE_FACTORS[$i];
- my $new_file = $TABLE_NEW_NAME[$i];
- print STDERR "filtering $file -> $new_file...\n";
-
- my $openstring;
- if ($file =~ /\.gz$/) {
- $openstring = "zcat $file |";
- } else {
- $openstring = "< $file";
- }
-
- open(FILE,$openstring) or die "Can't open '$openstring'";
- open(FILE_OUT,">$new_file") or die "Can't write $new_file";
-
- while(my $entry = <FILE>) {
- my ($foreign,$rest) = split(/ \|\|\| /,$entry,2);
- $foreign =~ s/ $//;
- if (defined($PHRASE_USED{$factors}{$foreign})) {
- print FILE_OUT $entry;
- $used++;
- }
- $total++;
- }
- close(FILE);
- close(FILE_OUT);
- die "No phrases found in $file!" if $total == 0;
- printf STDERR "$used of $total phrases pairs used (%.2f%s) - note: max length $MAX_LENGTH\n",(100*$used/$total),'%';
-}
-
-open(INFO,">$dir/info");
-print INFO "$config\n$input\n";
-close(INFO);
-
-
-print "To run the decoder, please call:
- moses -f $dir/moses.ini < $input\n";
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-sub ensure_full_path {
- my $PATH = shift;
- return $PATH if $PATH =~ /^\//;
- $PATH = `pwd`."/".$PATH;
- $PATH =~ s/[\r\n]//g;
- $PATH =~ s/\/\.\//\//g;
- $PATH =~ s/\/+/\//g;
- my $sanity = 0;
- while($PATH =~ /\/\.\.\// && $sanity++<10) {
- $PATH =~ s/\/+/\//g;
- $PATH =~ s/\/[^\/]+\/\.\.\//\//g;
- }
- $PATH =~ s/\/[^\/]+\/\.\.$//;
- $PATH =~ s/\/+$//;
- return $PATH;
-}
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
deleted file mode 100755
index 58f7c4f01..000000000
--- a/scripts/training/mert-moses.pl
+++ /dev/null
@@ -1,956 +0,0 @@
-#!/usr/bin/perl -w
-# Usage:
-# mert-moses.pl <foreign> <english> <decoder-executable> <decoder-config>
-# For other options see below or run 'mert-moses.pl --help'
-
-# Notes:
-# <foreign> and <english> should be raw text files, one sentence per line
-# <english> can be a prefix, in which case the files are <english>0, <english>1, etc. are used
-
-# Revision history
-
-# 31 Jl 1006 move gzip run*.out to avoid failure wit restartings
-# adding default paths
-# 29 Jul 2006 run-filter, score-nbest and mert run on the queue (Nicola; Ondrej had to type it in again)
-# 28 Jul 2006 attempt at foolproof usage, strong checking of input validity, merged the parallel and nonparallel version (Ondrej Bojar)
-# 27 Jul 2006 adding the safesystem() function to handle with process failure
-# 22 Jul 2006 fixed a bug about handling relative path of configuration file (Nicola Bertoldi)
-# 21 Jul 2006 adapted for Moses-in-parallel (Nicola Bertoldi)
-# 18 Jul 2006 adapted for Moses and cleaned up (PK)
-# 21 Jan 2005 unified various versions, thorough cleanup (DWC)
-# now indexing accumulated n-best list solely by feature vectors
-# 14 Dec 2004 reimplemented find_threshold_points in C (NMD)
-# 25 Oct 2004 Use either average or shortest (default) reference
-# length as effective reference length (DWC)
-# 13 Oct 2004 Use alternative decoders (DWC)
-# Original version by Philipp Koehn
-
-# defaults for initial values and ranges are:
-my $default_triples = {
- # for each _d_istortion, _l_anguage _m_odel, _t_ranslation _m_odel and _w_ord penalty, there is a list
- # of [ default value, lower bound, upper bound ]-triples. In most cases, only one triple is used,
- # but the translation model has currently 5 features
- "d" => [ [ 1.0, 0.0, 2.0 ] ],
- "lm" => [ [ 1.0, 0.0, 2.0 ] ],
- "tm" => [
- [ 0.3, 0.0, 0.5 ],
- [ 0.2, 0.0, 0.5 ],
- [ 0.3, 0.0, 0.5 ],
- [ 0.2, 0.0, 0.5 ],
- [ 0.0, -1.0, 1.0 ],
- ],
- "g" => [
- [ 1.0, 0.0, 2.0 ],
- [ 1.0, 0.0, 2.0 ],
- ],
- "w" => [ [ 0.0, -1.0, 1.0 ] ],
-};
-
-# moses.ini file uses FULL names for lambdas, while this training script internally (and on the command line)
-# uses ABBR names.
-my $ABBR_FULL_MAP = "d=weight-d lm=weight-l tm=weight-t w=weight-w g=weight-generation";
-my %ABBR2FULL = map {split/=/,$_,2} split /\s+/, $ABBR_FULL_MAP;
-my %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} split /\s+/, $ABBR_FULL_MAP;
-
-# We parse moses.ini to figure out how many weights do we need to optimize.
-# For this, we must know the correspondence between options defining files
-# for models and options assigning weights to these models.
-my $TABLECONFIG_ABBR_MAP = "ttable-file=tm lmodel-file=lm distortion-file=d generation-file=g";
-my %TABLECONFIG2ABBR = map {split(/=/,$_,2)} split /\s+/, $TABLECONFIG_ABBR_MAP;
-
-# There are weights that do not correspond to any input file, they just increase the total number of lambdas we optimize
-my $extra_lambdas_for_model = {
- "w" => 1, # word penalty
- "d" => 1, # basic distortion
-};
-
-
-
-
-my $minimum_required_change_in_weights = 0.00001;
- # stop if no lambda changes more than this
-
-my $verbose = 0;
-my $usage = 0; # request for --help
-my $___WORKING_DIR = "mert-work";
-my $___DEV_F = undef; # required, input text to decode
-my $___DEV_E = undef; # required, basename of files with references
-my $___DECODER = undef; # required, pathname to the decoder executable
-my $___CONFIG = undef; # required, pathname to startup ini file
-my $___N_BEST_LIST_SIZE = 100;
-my $queue_flags = "-l ws06ossmt=true -l mem_free=0.5G -hard"; # extra parameters for parallelizer
- # the -l ws0ssmt is relevant only to JHU workshop
-my $___JOBS = undef; # if parallel, number of jobs to use (undef -> serial)
-my $___DECODER_FLAGS = ""; # additional parametrs to pass to the decoder
-my $___LAMBDA = undef; # string specifying the seed weights and boundaries of all lambdas
-my $continue = 0; # should we try to continue from the last saved step?
-my $skip_decoder = 0; # and should we skip the first decoder run (assuming we got interrupted during mert)
-
-# Parameter for effective reference length when computing BLEU score
-# This is used by score-nbest-bleu.py
-# Default is to use shortest reference
-# Use "--average" to use average reference length
-my $___AVERAGE = 0;
-
-my $allow_unknown_lambdas = 0;
-my $allow_skipping_lambdas = 0;
-
-
-my $SCRIPTS_ROOTDIR = undef; # path to all tools (overriden by specific options)
-my $cmertdir = undef; # path to cmert directory
-my $pythonpath = undef; # path to python libraries needed by cmert
-my $filtercmd = undef; # path to filter-model-given-input.pl
-my $SCORENBESTCMD = undef;
-my $qsubwrapper = undef;
-my $moses_parallel_cmd = undef;
-
-
-use strict;
-use Getopt::Long;
-GetOptions(
- "working-dir=s" => \$___WORKING_DIR,
- "input=s" => \$___DEV_F,
- "refs=s" => \$___DEV_E,
- "decoder=s" => \$___DECODER,
- "config=s" => \$___CONFIG,
- "nbest=i" => \$___N_BEST_LIST_SIZE,
- "queue-flags=s" => \$queue_flags,
- "jobs=i" => \$___JOBS,
- "decoder-flags=s" => \$___DECODER_FLAGS,
- "lambdas=s" => \$___LAMBDA,
- "continue" => \$continue,
- "skip-decoder" => \$skip_decoder,
- "average" => \$___AVERAGE,
- "help" => \$usage,
- "allow-unknown-lambdas" => \$allow_unknown_lambdas,
- "allow-skipping-lambdas" => \$allow_skipping_lambdas,
- "verbose" => \$verbose,
- "roodir=s" => \$SCRIPTS_ROOTDIR,
- "cmertdir=s" => \$cmertdir,
- "pythonpath=s" => \$pythonpath,
- "filtercmd=s" => \$filtercmd, # allow to override the default location
- "scorenbestcmd=s" => \$SCORENBESTCMD, # path to score-nbest.py
- "qsubwrapper=s" => \$qsubwrapper, # allow to override the default location
- "mosesparallelcmd=s" => \$moses_parallel_cmd, # allow to override the default location
-) or exit(1);
-
-# the 4 required parameters can be supplied on the command line directly
-# or using the --options
-if (scalar @ARGV == 4) {
- # required parameters: input_file references_basename decoder_executable
- $___DEV_F = shift;
- $___DEV_E = shift;
- $___DECODER = shift;
- $___CONFIG = shift;
-}
-
-
-print STDERR "After default: $queue_flags\n";
-
-if ($usage || !defined $___DEV_F || !defined$___DEV_E || !defined$___DECODER || !defined $___CONFIG) {
- print STDERR "usage: mert-moses.pl input-text references decoder-executable decoder.ini
-Options:
- --working-dir=mert-dir ... where all the files are created
- --nbest=100 ... how big nbestlist to generate
- --jobs=N ... set this to anything to run moses in parallel
- --mosesparallelcmd=STRING ... use a different script instead of moses-parallel
- --queue-flags=STRING ... anything you with to pass to
- qsub, eg. '-l ws06osssmt=true'
- The default is to submit the jobs to the ws06ossmt queue, which
- makes sense only at JHU. To reset the default JHU queue
- parameters, please use \"--queue-flags=' '\" (i.e. a space between
- the quotes).
- --decoder-flags=STRING ... extra parameters for the decoder
- --lambdas=STRING ... default values and ranges for lambdas, a complex string
- such as 'd:1,0.5-1.5 lm:1,0.5-1.5 tm:0.3,0.25-0.75;0.2,0.25-0.75;0.2,0.25-0.75;0.3,0.25-0.75;0,-0.5-0.5 w:0,-0.5-0.5'
- --allow-unknown-lambdas ... keep going even if someone supplies a new lambda
- in the lambdas option (such as 'superbmodel:1,0-1'); optimize it, too
- --continue ... continue from the last achieved state
- --skip-decoder ... skip the decoder run for the first time, assuming that
- we got interrupted during optimization
- --average ... Use either average or shortest (default) reference
- length as effective reference length
- --filtercmd=STRING ... path to filter-model-given-input.pl
- --roodir=STRING ... where do helpers reside (if not given explicitly)
- --cmertdir=STRING ... where is cmert installed
- --pythonpath=STRING ... where is python executable
- --scorenbestcmd=STRING ... path to score-nbest.py
-";
- exit 1;
-}
-
-# Check validity of input parameters and set defaults if needed
-
-
-
-
-if (!defined $SCRIPTS_ROOTDIR) {
- $SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"};
- die "Please set SCRIPTS_ROOTDIR or specify --rootdir" if !defined $SCRIPTS_ROOTDIR;
-}
-
-print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";
-
-
-
-
-# path of script for filtering phrase tables and running the decoder
-$filtercmd="$SCRIPTS_ROOTDIR/training/filter-model-given-input.pl" if !defined $filtercmd;
-
-$qsubwrapper="$SCRIPTS_ROOTDIR/generic/qsub-wrapper.pl" if !defined $qsubwrapper;
-
-$moses_parallel_cmd = "$SCRIPTS_ROOTDIR/generic/moses-parallel.pl"
- if !defined $moses_parallel_cmd;
-
-$cmertdir = "$SCRIPTS_ROOTDIR/training/cmert-0.5" if !defined $cmertdir;
-my $cmertcmd="$cmertdir/mert";
-
-$SCORENBESTCMD = "$cmertdir/score-nbest.py" if ! defined $SCORENBESTCMD;
-
-$pythonpath = "$cmertdir/python" if !defined $pythonpath;
-
-$ENV{PYTHONPATH} = $pythonpath; # other scripts need to know
-
-
-die "Not executable: $filtercmd" if ! -x $filtercmd;
-die "Not executable: $cmertcmd" if ! -x $cmertcmd;
-die "Not executable: $moses_parallel_cmd" if defined $___JOBS && ! -x $moses_parallel_cmd;
-die "Not executable: $qsubwrapper" if defined $___JOBS && ! -x $qsubwrapper;
-die "Not a dir: $pythonpath" if ! -d $pythonpath;
-die "Not executable: $___DECODER" if ! -x $___DECODER;
-
-my $input_abs = ensure_full_path($___DEV_F);
-die "File not found: $___DEV_F (interpreted as $input_abs)."
- if ! -e $input_abs;
-$___DEV_F = $input_abs;
-
-
-my $decoder_abs = ensure_full_path($___DECODER);
-die "File not found: $___DECODER (interpreted as $decoder_abs)."
- if ! -x $decoder_abs;
-$___DECODER = $decoder_abs;
-
-
-my $ref_abs = ensure_full_path($___DEV_E);
-# check if English dev set (reference translations) exist and store a list of all references
-my @references;
-if (-e $ref_abs) {
- push @references, $ref_abs;
-}
-else {
- # if multiple file, get a full list of the files
- my $part = 0;
- while (-e $ref_abs.$part) {
- push @references, $ref_abs.$part;
- $part++;
- }
- die("Reference translations not found: $___DEV_E (interpreted as $ref_abs)") unless $part;
-}
-
-my $config_abs = ensure_full_path($___CONFIG);
-die "File not found: $___CONFIG (interpreted as $config_abs)."
- if ! -e $config_abs;
-$___CONFIG = $config_abs;
-
-
-
-# check validity of moses.ini and collect number of models and lambdas per model
-# need to make a copy of $extra_lambdas_for_model, scan_config spoils it
-my %copy_of_extra_lambdas_for_model = %$extra_lambdas_for_model;
-my ($lambdas_per_model, $models_used) = scan_config($___CONFIG, \%copy_of_extra_lambdas_for_model);
-
-
-# Parse the lambda config string and convert it to a nice structure in the same format as $default_triples
-my $use_triples = undef;
-if (defined $___LAMBDA) {
- # interpreting lambdas from command line
- foreach (split(/\s+/,$___LAMBDA)) {
- my ($name,$values) = split(/:/);
- die "Malformed setting: '$_', expected name:values\n" if !defined $name || !defined $values;
- foreach my $startminmax (split/;/,$values) {
- if ($startminmax =~ /^(-?[\.\d]+),(-?[\.\d]+)-(-?[\.\d]+)$/) {
- my $start = $1;
- my $min = $1;
- my $max = $1;
- push @{$use_triples->{$name}}, [$start, $min, $max];
- }
- else {
- die "Malformed feature range definition: $name => $startminmax\n";
- }
- }
- }
-} else {
- # no lambdas supplied, use the default ones, but do not forget to repeat them accordingly
- # first for or inherent models
- foreach my $name (keys %$extra_lambdas_for_model) {
- foreach (1..$extra_lambdas_for_model->{$name}) {
- die "No default weights defined for -$name"
- if !defined $default_triples->{$name};
- # XXX here was a deadly bug: we need a deep copy of the default values
- my @copy = ();
- foreach my $triple (@{$default_triples->{$name}}) {
- my @copy_triple = @$triple;
- push @copy, [ @copy_triple ];
- }
- push @{$use_triples->{$name}}, @copy;
- }
- }
- # and then for all models used
- foreach my $name (keys %$models_used) {
- foreach (1..$models_used->{$name}) {
- die "No default weights defined for -$name"
- if !defined $default_triples->{$name};
- # XXX here was a deadly bug: we need a deep copy of the default values
- my @copy = ();
- foreach my $triple (@{$default_triples->{$name}}) {
- my @copy_triple = @$triple;
- push @copy, [ @copy_triple ];
- }
- push @{$use_triples->{$name}}, @copy;
- }
- }
-}
-
-# moses should use our config
-if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
-|| $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /
-|| $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /
-|| $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /
-|| $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /
-) {
- die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";
-}
-
-# walk through all lambdas the user wishes to optimize and check
-# if the number of lambdas matches
-foreach my $name (keys %$use_triples) {
- my $expected_lambdas = $lambdas_per_model->{$name};
- $expected_lambdas = 0 if !defined $expected_lambdas;
- my $got_lambdas = defined $use_triples->{$name} ? scalar @{$use_triples->{$name}} : 0;
- if ($got_lambdas != $expected_lambdas) {
- if ($allow_unknown_lambdas && $expected_lambdas == 0) {
- print STDERR "Allowing to optimize $name, although I have no idea what it is.\n";
- } else {
- print STDERR "Wrong number of lambdas for $name. Expected (given the config file): $expected_lambdas, got: $got_lambdas.
-Use --allow-unknown-lambdas to optimize lambdas that you are just introducing
-and I cannot validate against the models mentioned in moses.ini.\n";
- exit 1;
- }
- }
-}
-
-# as weights are normalized in the next steps (by cmert)
-# normalize initial LAMBDAs, too
-my $need_to_normalize = 1;
-
-
-
-my @order_of_lambdas_from_decoder = ();
-# this will store the labels of scores coming out of the decoder (and hence the order of lambdas coming out of mert)
-# we will use the array to interpret the lambdas
-# the array gets filled with labels only after first nbestlist was generated
-
-
-
-
-#store current directory and create the working directory (if needed)
-my $cwd = `pwd`; chop($cwd);
-safesystem("mkdir -p $___WORKING_DIR") or die "Can't mkdir $___WORKING_DIR";
-
-{
-# open local scope
-
-#chdir to the working directory
-chdir($___WORKING_DIR) or die "Can't chdir to $___WORKING_DIR";
-
-
-
-
-# set start run
-my $start_run = 1;
-
-if ($continue) {
- # need to load last best values
- print STDERR "Trying to continue an interrupted optimization.\n";
- open IN, "finished_step.txt" or die "Failed to find the step number, failed to read finished_step.txt";
- my $step = <IN>;
- chomp $step;
- $step++;
- close IN;
- $start_run = $step +1;
-
- die "Can't start from step $step, because run$step.best$___N_BEST_LIST_SIZE.out.gz was not found!"
- if ! -e "run$step.best$___N_BEST_LIST_SIZE.out.gz";
-
- print STDERR "Reading last cached lambda values (result from step $step)\n";
- @order_of_lambdas_from_decoder = get_order_of_scores_from_nbestlist("gunzip -c < run$step.best$___N_BEST_LIST_SIZE.out.gz |");
-
- open IN, "weights.txt" or die "Can't read weights.txt";
- my $newweights = <IN>;
- chomp $newweights;
- close IN;
- my @newweights = split /\s+/, $newweights;
-
- # dump_triples($use_triples);
- $use_triples = store_new_lambda_values($use_triples, \@order_of_lambdas_from_decoder, \@newweights);
- # dump_triples($use_triples);
-}
-
-
-
-# filter the phrase tables, use --decoder-flags
-print "filtering the phrase tables... ".`date`;
-my $cmd = "$filtercmd ./filtered $___CONFIG $___DEV_F";
-if (defined $___JOBS) {
- safesystem("$qsubwrapper -command='$cmd' -queue-parameter=\"$queue_flags\"" ) or die "Failed to submit filtering of tables to the queue (via $qsubwrapper)";
-} else {
- safesystem($cmd) or die "Failed to filter the tables.";
-}
-
-
-# the decoder should now use the filtered model
-my $PARAMETERS;
-$PARAMETERS = $___DECODER_FLAGS . " -config filtered/moses.ini";
-
-my $devbleu = undef;
-my $bestpoint = undef;
-my $run=$start_run-1;
-my $prev_size = -1;
-while(1) {
- $run++;
- # run beamdecoder with option to output nbestlists
- # the end result should be (1) @NBEST_LIST, a list of lists; (2) @SCORE, a list of lists of lists
-
- print "run $run start at ".`date`;
-
- # In case something dies later, we might wish to have a copy
- create_config($___CONFIG, "./run$run.moses.ini", $use_triples, $run, (defined$devbleu?$devbleu:"--not-estimated--"));
-
-
- # skip if the user wanted
- if (!$skip_decoder) {
- print "($run) run decoder to produce n-best lists\n";
- @order_of_lambdas_from_decoder = run_decoder($use_triples, $PARAMETERS, $run, \@order_of_lambdas_from_decoder, $need_to_normalize);
- $need_to_normalize = 0;
- safesystem("gzip -f run*out") or die "Failed to gzip run*out";
- }
- else {
- print "skipped decoder run\n";
- if (0 == scalar @order_of_lambdas_from_decoder) {
- @order_of_lambdas_from_decoder = get_order_of_scores_from_nbestlist("gunzip -dc run*.best*.out.gz | head -1 |");
- }
- $skip_decoder = 0;
- $need_to_normalize = 0;
- }
-
- my $EFF_REF_LEN = "";
- if ($___AVERAGE) {
- $EFF_REF_LEN = "-a";
- }
-
- # To be sure that scoring script produses these fresh:
- safesystem("rm -f cands.opt feats.opt") or die;
-
- # convert n-best list into a numberized format with error scores
-
- print STDERR "Scoring the nbestlist.\n";
- my $cmd = "export PYTHONPATH=$pythonpath ; gunzip -dc run*.best*.out.gz | sort -n -t \"|\" -k 1,1 | $SCORENBESTCMD $EFF_REF_LEN ".join(" ", @references)." ./";
- if (defined $___JOBS) {
- safesystem("$qsubwrapper -command='$cmd' -queue-parameter=\"$queue_flags\"") or die "Failed to submit scoring nbestlist to queue (via $qsubwrapper)";
- } else {
- safesystem($cmd) or die "Failed to score nbestlist";
- }
-
-
- print STDERR "Hoping that scoring succeeded. We'll see if we can read the output files now.\n";
-
-
- # keep a count of lines in nbests lists (alltogether)
- # if it did not increase since last iteration, we are DONE
- open(IN,"cands.opt") or die "Can't read cands.opt";
- my $size=0;
- while (<IN>) {
- chomp;
- my @flds = split / /;
- $size += $flds[1];
- }
- close(IN);
- print "$size accumulated translations\n";
- print "prev accumulated translations was : $prev_size\n";
- if ($size <= $prev_size){
- print STDERR "No new hypotheses in nbest list. Stopping.\n";
- last;
- }
- $prev_size = $size;
-
-
- # run cmert
- # cmert reads in the file init.opt containing three lines:
- # minimum values
- # maximum values
- # current values
- # We need to prepare the files and **the order of the lambdas must
- # correspond to the order @order_of_lambdas_from_decoder
-
- my @MIN = (); # lower bounds
- my @MAX = (); # upper bounds
- my @CURR = (); # the starting values
- my @NAME = (); # to which model does the lambda belong
-
- # walk in order of @order_of_lambdas_from_decoder and collect the min,max,val
- my %visited = ();
- foreach my $name (@order_of_lambdas_from_decoder) {
- next if $visited{$name};
- $visited{$name} = 1;
- die "The decoder produced also some '$name' scores, but we do not know the ranges for them, no way to optimize them\n"
- if !defined $use_triples->{$name};
- foreach my $feature (@{$use_triples->{$name}}) {
- my ($val, $min, $max) = @$feature;
- push @CURR, $val;
- push @MIN, $min;
- push @MAX, $max;
- push @NAME, $name;
- }
- }
-
- open(OUT,"> init.opt") or die "Can't write init.opt (WD now $___WORKING_DIR)";
- print OUT join(" ", @MIN)."\n";
- print OUT join(" ", @MAX)."\n";
- print OUT join(" ", @CURR)."\n";
- close(OUT);
-
- #just for brevity
- open(OUT,"> names.txt") or die "Can't write names.txt (WD now $___WORKING_DIR)";
- print OUT join(" ", @NAME)."\n";
- close(OUT);
-
- # make a backup copy labelled with this run number
- safesystem("cp init.opt run$run.init.opt") or die;
-
- my $DIM = scalar(@CURR); # number of lambdas
- $cmd="$cmertcmd -d $DIM";
-
- print STDERR "Starting cmert.\n";
- if (defined $___JOBS) {
- safesystem("$qsubwrapper -command='$cmd' -stderr=cmert.log -queue-parameter=\"$queue_flags\"") or die "Failed to start cmert (via qsubwrapper $qsubwrapper)";
- } else {
- safesystem("$cmd 2> cmert.log") or die "Failed to run cmert";
- }
- die "Optimization failed, file weights.txt does not exist or is empty"
- if ! -s "weights.txt";
- # backup copies
- safesystem ("cp cmert.log run$run.cmert.log") or die;
- safesystem ("cp weights.txt run$run.weights.txt") or die; # this one is needed for restarts, too
- print "run $run end at ".`date`;
-
- $bestpoint = undef;
- $devbleu = undef;
- open(IN,"cmert.log") or die "Can't open cmert.log";
- while (<IN>) {
- if (/Best point:\s*([\s\d\.\-]+?)\s*=> ([\d\.]+)/) {
- $bestpoint = $1;
- $devbleu = $2;
- last;
- }
- }
- close IN;
- die "Failed to parse cmert.log, missed Best point there."
- if !defined $bestpoint || !defined $devbleu;
- print "($run) BEST at $run: $bestpoint => $devbleu at ".`date`;
-
- my @newweights = split /\s+/, $bestpoint;
-
- # update my cache of lambda values
- $use_triples = store_new_lambda_values($use_triples, \@order_of_lambdas_from_decoder, \@newweights);
-
- ## additional stopping criterion: weights have not changed
- my $shouldstop = 1;
- for(my $i=0; $i<@CURR; $i++) {
- die "Lost weight! cmert reported fewer weights (@newweights) than we gave it (@CURR)"
- if !defined $newweights[$i];
- if (abs($CURR[$i] - $newweights[$i]) >= $minimum_required_change_in_weights) {
- $shouldstop = 0;
- last;
- }
- }
-
- open F, "> finished_step.txt" or die "Can't mark finished step";
- print F $run."\n";
- close F;
-
-
- if ($shouldstop) {
- print STDERR "None of the weights changed more than $minimum_required_change_in_weights. Stopping.\n";
- last;
- }
-
-}
-print "Training finished at ".`date`;
-
-safesystem("cp init.opt run$run.init.opt") or die;
-safesystem ("cp cmert.log run$run.cmert.log") or die;
-
-create_config($___CONFIG, "./moses.ini", $use_triples, $run, $devbleu);
-
-# just to be sure that we have the really last finished step marked
-open F, "> finished_step.txt" or die "Can't mark finished step";
-print F $run."\n";
-close F;
-
-
-#chdir back to the original directory # useless, just to remind we were not there
-chdir($cwd);
-
-} # end of local scope
-
-
-sub store_new_lambda_values {
- # given new lambda values (in given order), replace the 'val' element in our triples
- my $triples = shift;
- my $names = shift;
- my $values = shift;
-
- my %idx = ();
- foreach my $i (0..scalar(@$values)-1) {
- my $name = $names->[$i];
- die "Missed name for lambda $values->[$i] (in @$values; names: @$names)"
- if !defined $name;
- if (!defined $idx{$name}) {
- $idx{$name} = 0;
- } else {
- $idx{$name}++;
- }
- die "We did not optimize '$name', but moses returned it back to us"
- if !defined $triples->{$name};
- die "Moses gave us too many lambdas for '$name', we had ".scalar(@{$triples->{$name}})
- ." but we got at least ".$idx{$name}+1
- if !defined $triples->{$name}->[$idx{$name}];
-
- # set the corresponding field in triples
- # print STDERR "Storing $i-th score as $name: $idx{$name}: $values->[$i]\n";
- $triples->{$name}->[$idx{$name}]->[0] = $values->[$i];
- }
- return $triples;
-}
-
-sub dump_triples {
- my $triples = shift;
-
- foreach my $name (keys %$triples) {
- foreach my $triple (@{$triples->{$name}}) {
- my ($val, $min, $max) = @$triple;
- print STDERR "Triples: $name\t$val\t$min\t$max ($triple)\n";
- }
- }
-}
-
-
-sub run_decoder {
- my ($triples, $parameters, $run, $output_order_of_lambdas, $need_to_normalize) = @_;
- my $filename_template = "run%d.best$___N_BEST_LIST_SIZE.out";
- my $filename = sprintf($filename_template, $run);
-
- print "params = $parameters\n";
- # prepare the decoder config:
- my $decoder_config = "";
- my @vals = ();
- foreach my $name (keys %$triples) {
- $decoder_config .= "-$name ";
- foreach my $triple (@{$triples->{$name}}) {
- my ($val, $min, $max) = @$triple;
- $decoder_config .= "%.6f ";
- push @vals, $val;
- }
- }
- if ($need_to_normalize) {
- print STDERR "Normalizing lambdas: @vals\n";
- my $totlambda=0;
- grep($totlambda+=abs($_),@vals);
- grep($_/=$totlambda,@vals);
- }
- print STDERR "DECODER_CFG = $decoder_config\n";
- print STDERR " values = @vals\n";
- $decoder_config = sprintf($decoder_config, @vals);
- print "decoder_config = $decoder_config\n";
-
- # run the decoder
- my $decoder_cmd;
- if (defined $___JOBS) {
- $decoder_cmd = "$moses_parallel_cmd -qsub-prefix mert$run -queue-parameters \"$queue_flags\" $parameters $decoder_config -n-best-file $filename -n-best-size $___N_BEST_LIST_SIZE -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
- } else {
- $decoder_cmd = "$___DECODER $parameters $decoder_config -n-best-list $filename $___N_BEST_LIST_SIZE -i $___DEV_F > run$run.out";
- }
-
- safesystem($decoder_cmd) or die "The decoder died.";
-
- if (0 == scalar @$output_order_of_lambdas) {
- # we have to peek at the nbestlist
- return get_order_of_scores_from_nbestlist($filename);
- } else {
- # we have checked the nbestlist already, we trust the order of output scores does not change
- return @$output_order_of_lambdas;
- }
-}
-
-sub get_order_of_scores_from_nbestlist {
- # read the first line and interpret the ||| label: num num num label2: num ||| column in nbestlist
- # return the score labels in order
- my $fname_or_source = shift;
- print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n";
- open IN, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source'";
- my $line = <IN>;
- close IN;
- die "Line empty in nbestlist '$fname_or_source'" if !defined $line;
- my ($sent, $hypo, $scores, $total) = split /\|\|\|/, $line;
- $scores =~ s/^\s*|\s*$//g;
- die "No scores in line: $line" if $scores eq "";
-
- my @order = ();
- my $label = undef;
- foreach my $tok (split /\s+/, $scores) {
- if ($tok =~ /^([a-z][0-9a-z]*):/i) {
- $label = $1;
- } elsif ($tok =~ /^-?[-0-9.e]+$/) {
- # a score found, remember it
- die "Found a score but no label before it! Bad nbestlist '$fname_or_source'!"
- if !defined $label;
- push @order, $label;
- } else {
- die "Not a label, not a score '$tok'. Failed to parse the scores string: '$scores' of nbestlist '$fname_or_source'";
- }
- }
- print STDERR "The decoder returns the scores in this order: @order\n";
- return @order;
-}
-
-sub create_config {
- my $infn = shift; # source config
- my $outfn = shift; # where to save the config
- my $triples = shift; # the lambdas we should write
- my $iteration = shift; # just for verbosity
- my $bleu_achieved = shift; # just for verbosity
-
- my %P; # the hash of all parameters we wish to override
-
- # first convert the command line parameters to the hash
- { # ensure local scope of vars
- my $parameter=undef;
- print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n";
- $___DECODER_FLAGS =~ s/^\s*|\s*$//;
- $___DECODER_FLAGS =~ s/\s+/ /;
- foreach (split(/ /,$___DECODER_FLAGS)) {
- if (/^\-([^\d].*)$/) {
- $parameter = $1;
- $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
- }
- else {
- die "Found value with no -paramname before it: $_"
- if !defined $parameter;
- push @{$P{$parameter}},$_;
- }
- }
- }
-
- # Convert weights to elements in P
- foreach my $abbr (keys %$triples) {
- # First delete all weights params from the input, in short or long-named version
- delete($P{$abbr});
- delete($P{$ABBR2FULL{$abbr}});
- # Then feed P with the current values
- foreach my $feature (@{$use_triples->{$abbr}}) {
- my ($val, $min, $max) = @$feature;
- my $name = defined $ABBR2FULL{$abbr} ? $ABBR2FULL{$abbr} : $abbr;
- push @{$P{$name}}, $val;
- }
- }
-
- # create new moses.ini decoder config file by cloning and overriding the original one
- open(INI,$infn) or die "Can't read $infn";
- delete($P{"config"}); # never output
- print "Saving new config to: $outfn\n";
- open(OUT,"> $outfn") or die "Can't write $outfn";
- print OUT "# MERT optimized configuration\n";
- print OUT "# decoder $___DECODER\n";
- print OUT "# BLEU $bleu_achieved on dev $___DEV_F\n";
- print OUT "# We were before running iteration $iteration\n";
- print OUT "# finished ".`date`;
- my $line = <INI>;
- while(1) {
- last unless $line;
-
- # skip until hit [parameter]
- if ($line !~ /^\[(.+)\]\s*$/) {
- $line = <INI>;
- print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
- next;
- }
-
- # parameter name
- my $parameter = $1;
- $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
- print OUT "[$parameter]\n";
-
- # change parameter, if new values
- if (defined($P{$parameter})) {
- # write new values
- foreach (@{$P{$parameter}}) {
- print OUT $_."\n";
- }
- delete($P{$parameter});
- # skip until new parameter, only write comments
- while($line = <INI>) {
- print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
- last if $line =~ /^\[/;
- last unless $line;
- }
- next;
- }
-
- # unchanged parameter, write old
- while($line = <INI>) {
- last if $line =~ /^\[/;
- print OUT $line;
- }
- }
-
- # write all additional parameters
- foreach my $parameter (keys %P) {
- print OUT "\n[$parameter]\n";
- foreach (@{$P{$parameter}}) {
- print OUT $_."\n";
- }
- }
-
- close(INI);
- close(OUT);
- print STDERR "Saved: $outfn\n";
-}
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-sub ensure_full_path {
- my $PATH = shift;
- return $PATH if $PATH =~ /^\//;
- $PATH = `pwd`."/".$PATH;
- $PATH =~ s/[\r\n]//g;
- $PATH =~ s/\/\.\//\//g;
- $PATH =~ s/\/+/\//g;
- my $sanity = 0;
- while($PATH =~ /\/\.\.\// && $sanity++<10) {
- $PATH =~ s/\/+/\//g;
- $PATH =~ s/\/[^\/]+\/\.\.\//\//g;
- }
- $PATH =~ s/\/[^\/]+\/\.\.$//;
- $PATH =~ s/\/+$//;
- return $PATH;
-}
-
-
-
-
-sub scan_config {
- my $ini = shift;
- my $inishortname = $ini; $inishortname =~ s/^.*\///; # for error reporting
- my $lambda_counts = shift;
- # we get a pre-filled counts, because some lambdas are always needed (word penalty, for instance)
- # as we walk though the ini file, we record how many extra lambdas do we need
- # and finally, we report it
-
- # in which field (counting from zero) is the filename to check?
- my %where_is_filename = (
- "ttable-file" => 3,
- "generation-file" => 3,
- "lmodel-file" => 3,
- "distortion-file" => 0,
- );
- # by default, each line of each section means one lambda, but some sections
- # explicitly state a custom number of lambdas
- my %where_is_lambda_count = (
- "ttable-file" => 2,
- "generation-file" => 2,
- );
-
- open INI, $ini or die "Can't read $ini";
- my $section = undef; # name of the section we are reading
- my $shortname = undef; # the corresponding short name
- my $nr = 0;
- my $error = 0;
- my %defined_files;
- my %defined_steps; # check the ini file for compatible mapping steps and actually defined files
- while (<INI>) {
- $nr++;
- next if /^\s*#/; # skip comments
- if (/^\[([^\]]*)\]\s*$/) {
- $section = $1;
- $shortname = $TABLECONFIG2ABBR{$section};
- next;
- }
- if (defined $section && $section eq "mapping") {
- # keep track of mapping steps used
- $defined_steps{$1}++ if /^([TG])/;
- }
- if (defined $section && defined $where_is_filename{$section}) {
- # this ini section is relevant to lambdas
- chomp;
- my @flds = split / +/;
- my $fn = $flds[$where_is_filename{$section}];
- if (defined $fn && $fn !~ /^\s+$/) {
- # this is a filename! check it
- if ($fn !~ /^\//) {
- $error = 1;
- print STDERR "$inishortname:$nr:Filename not absolute: $fn\n";
- }
- if (! -s $fn) {
- $error = 1;
- print STDERR "$inishortname:$nr:File does not exist or empty: $fn\n";
- }
- # remember the number of files used, to know how many lambdas do we need
- die "No short name was defined for section $section!"
- if ! defined $shortname;
-
- # how many lambdas does this model need?
- # either specified explicitly, or the default, i.e. one
- my $needlambdas = defined $where_is_lambda_count{$section} ? $flds[$where_is_lambda_count{$section}] : 1;
-
- print STDERR "Config needs $needlambdas lambdas for $section (i.e. $shortname)\n" if $verbose;
- $lambda_counts->{$shortname}+=$needlambdas;
- if (!defined $___LAMBDA && (!defined $default_triples->{$shortname} || scalar(@{$default_triples->{$shortname}}) != $needlambdas)) {
- print STDERR "$inishortname:$nr:Your model $shortname needs $needlambdas weights but we define the default ranges for "
- .scalar(@{$default_triples->{$shortname}})." weights. Cannot use the default, you must supply lambdas by hand.\n";
- $error = 1;
- }
- $defined_files{$shortname}++;
- }
- }
- }
- die "$inishortname: File was empty!" if !$nr;
- close INI;
- for my $pair (qw/T=tm=translation G=g=generation/) {
- my ($tg, $shortname, $label) = split /=/, $pair;
- $defined_files{$shortname} = 0 if ! defined $defined_files{$shortname};
- $defined_steps{$tg} = 0 if ! defined $defined_steps{$tg};
- if ($defined_files{$shortname} != $defined_steps{$tg}) {
- print STDERR "$inishortname: You defined $defined_files{$shortname} files for $label but use $defined_steps{$tg} in [mapping]!\n";
- $error = 1;
- }
- }
- exit(1) if $error;
- return ($lambda_counts, \%defined_files);
-}
-
diff --git a/scripts/training/phrase-extract/Makefile b/scripts/training/phrase-extract/Makefile
deleted file mode 100644
index 21b254702..000000000
--- a/scripts/training/phrase-extract/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-all: extract score
-
-extract: tables-core.o
-
-score: tables-core.o
diff --git a/scripts/training/phrase-extract/extract.cpp b/scripts/training/phrase-extract/extract.cpp
deleted file mode 100644
index fdd6ae92d..000000000
--- a/scripts/training/phrase-extract/extract.cpp
+++ /dev/null
@@ -1,286 +0,0 @@
-using namespace std;
-
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <string>
-#include <stdlib.h>
-#include <assert.h>
-
-#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) {_IS.getline(_LINE, _SIZE, _DELIM); if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear();}
-#define LINE_MAX_LENGTH 10000
-
-class SentenceAlignment {
- public:
- vector<string> english;
- vector<string> foreign;
- vector<int> alignedCountF;
- vector< vector<int> > alignedToE;
-
- int create( char[], char[], char[], int );
- // void clear() { delete(alignment); };
-};
-
-void extract( SentenceAlignment & );
-void addPhrase( SentenceAlignment &, int, int, int, int );
-vector<string> tokenize( char [] );
-bool isAligned ( SentenceAlignment &, int, int );
-
-ofstream extractFile;
-ofstream extractFileInv;
-ofstream extractFileOrientation;
-int maxPhraseLength;
-int phraseCount = 0;
-char* fileNameExtract;
-bool orientationFlag;
-
-int main(int argc, char* argv[])
-{
- cerr << "PhraseExtract v1.3.0, written by Philipp Koehn\n"
- << "phrase extraction from an aligned parallel corpus\n";
- time_t starttime = time(NULL);
-
- if (argc != 6 && argc != 7) {
- cerr << "syntax: phrase-extract en de align extract max-length [orientation]\n";
- exit(1);
- }
- char* &fileNameE = argv[1];
- char* &fileNameF = argv[2];
- char* &fileNameA = argv[3];
- fileNameExtract = argv[4];
- maxPhraseLength = atoi(argv[5]);
- orientationFlag = (argc == 7);
- if (orientationFlag) cerr << "(also extracting orientation)\n";
-
- // string fileNameE = "/data/nlp/koehn/europarl-v2/models/de-en/model/aligned.en";
- // string fileNameF = "/data/nlp/koehn/europarl-v2/models/de-en/model/aligned.de";
- // string fileNameA = "/data/nlp/koehn/europarl-v2/models/de-en/model/aligned.grow-diag-final";
-
- ifstream eFile;
- ifstream fFile;
- ifstream aFile;
- eFile.open(fileNameE);
- fFile.open(fileNameF);
- aFile.open(fileNameA);
- istream *eFileP = &eFile;
- istream *fFileP = &fFile;
- istream *aFileP = &aFile;
-
- // string fileNameExtract = "/data/nlp/koehn/europarl-v2/models/de-en/model/new-extract";
-
- int i=0;
- while(true) {
- i++;
- if (i%10000 == 0) cerr << "." << flush;
- char englishString[LINE_MAX_LENGTH];
- char foreignString[LINE_MAX_LENGTH];
- char alignmentString[LINE_MAX_LENGTH];
- SAFE_GETLINE((*eFileP), englishString, LINE_MAX_LENGTH, '\n');
- if (eFileP->eof()) break;
- SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n');
- SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n');
- SentenceAlignment sentence;
- // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
- if (sentence.create( englishString, foreignString, alignmentString, i ))
- extract(sentence);
- }
-
- eFile.close();
- fFile.close();
- aFile.close();
- extractFile.close();
- extractFileInv.close();
-}
-
-void extract( SentenceAlignment &sentence ) {
- int countE = sentence.english.size();
- int countF = sentence.foreign.size();
-
- // check alignments for english phrase startE...endE
- for(int startE=0;startE<countE;startE++) {
- for(int endE=startE;
- (endE<countE && endE<startE+maxPhraseLength);
- endE++) {
-
- int minF = 9999;
- int maxF = -1;
- vector< int > usedF = sentence.alignedCountF;
- for(int ei=startE;ei<=endE;ei++) {
- for(int i=0;i<sentence.alignedToE[ei].size();i++) {
- int fi = sentence.alignedToE[ei][i];
- // cout << "point (" << fi << ", " << ei << ")\n";
- if (fi<minF) { minF = fi; }
- if (fi>maxF) { maxF = fi; }
- usedF[ fi ]--;
- }
- }
-
- // cout << "f projected ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
-
- if (maxF >= 0 && // aligned to any foreign words at all
- maxF-minF < maxPhraseLength) { // foreign phrase within limits
-
- // check if foreign words are aligned to out of bound english words
- bool out_of_bounds = false;
- for(int fi=minF;fi<=maxF && !out_of_bounds;fi++)
- if (usedF[fi]>0) {
- // cout << "ouf of bounds: " << fi << "\n";
- out_of_bounds = true;
- }
-
- // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
- if (!out_of_bounds)
- // start point of foreign phrase may retreat over unaligned
- for(int startF=minF;
- (startF>=0 &&
- startF>maxF-maxPhraseLength && // within length limit
- (startF==minF || sentence.alignedCountF[startF]==0)); // unaligned
- startF--)
- // end point of foreign phrase may advance over unaligned
- for(int endF=maxF;
- (endF<countF &&
- endF<startF+maxPhraseLength && // within length limit
- (endF==maxF || sentence.alignedCountF[endF]==0)); // unaligned
- endF++)
- addPhrase(sentence,startE,endE,startF,endF);
- }
- }
- }
-}
-
-void addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF ) {
- // foreign
- // cout << "adding ( " << startF << "-" << endF << ", " << startE << "-" << endE << ")\n";
- if (phraseCount % 10000000 == 0) {
- if (phraseCount>0) {
- extractFile.close();
- extractFileInv.close();
- if (orientationFlag) extractFileOrientation.close();
- }
- char part[10];
- sprintf(part,".part%04d",phraseCount/10000000);
- string fileNameExtractPart = string(fileNameExtract) + part;
- string fileNameExtractInvPart = string(fileNameExtract) + ".inv" + part;
- string fileNameExtractOrientationPart = string(fileNameExtract) + ".o" + part;
- extractFile.open(fileNameExtractPart.c_str());
- extractFileInv.open(fileNameExtractInvPart.c_str());
- if (orientationFlag) extractFileOrientation.open(fileNameExtractOrientationPart.c_str());
- }
- phraseCount++;
-
- for(int fi=startF;fi<=endF;fi++) {
- extractFile << sentence.foreign[fi] << " ";
- if (orientationFlag) extractFileOrientation << sentence.foreign[fi] << " ";
- }
- extractFile << "||| ";
- if (orientationFlag) extractFileOrientation << "||| ";
-
- // english
- for(int ei=startE;ei<=endE;ei++) {
- extractFile << sentence.english[ei] << " ";
- extractFileInv << sentence.english[ei] << " ";
- if (orientationFlag) extractFileOrientation << sentence.english[ei] << " ";
- }
- extractFile << "|||";
- extractFileInv << "||| ";
- if (orientationFlag) extractFileOrientation << "||| ";
-
- // foreign (for inverse)
- for(int fi=startF;fi<=endF;fi++)
- extractFileInv << sentence.foreign[fi] << " ";
- extractFileInv << "|||";
-
- // alignment
- for(int ei=startE;ei<=endE;ei++)
- for(int i=0;i<sentence.alignedToE[ei].size();i++) {
- int fi = sentence.alignedToE[ei][i];
- extractFile << " " << fi-startF << "-" << ei-startE;
- extractFileInv << " " << ei-startE << "-" << fi-startF;
- }
-
- if (orientationFlag) {
-
- // orientation to previous E
- bool connectedLeftTop = isAligned( sentence, startF-1, startE-1 );
- bool connectedRightTop = isAligned( sentence, endF+1, startE-1 );
- if ( connectedLeftTop && !connectedRightTop)
- extractFileOrientation << "mono";
- else if (!connectedLeftTop && connectedRightTop)
- extractFileOrientation << "swap";
- else
- extractFileOrientation << "other";
-
- // orientation to following E
- bool connectedLeftBottom = isAligned( sentence, startF-1, endE+1 );
- bool connectedRightBottom = isAligned( sentence, endF+1, endE+1 );
- if ( connectedLeftBottom && !connectedRightBottom)
- extractFileOrientation << " swap";
- else if (!connectedLeftBottom && connectedRightBottom)
- extractFileOrientation << " mono";
- else
- extractFileOrientation << " other";
- }
-
- extractFile << "\n";
- extractFileInv << "\n";
- if (orientationFlag) extractFileOrientation << "\n";
-}
-
-bool isAligned ( SentenceAlignment &sentence, int fi, int ei ) {
- if (ei == -1 && fi == -1) return true;
- if (ei <= -1 || fi <= -1) return false;
- if (ei == sentence.english.size() && fi == sentence.foreign.size()) return true;
- if (ei >= sentence.english.size() || fi >= sentence.foreign.size()) return false;
- for(int i=0;i<sentence.alignedToE[ei].size();i++)
- if (sentence.alignedToE[ei][i] == fi) return true;
- return false;
-}
-
-
-int SentenceAlignment::create( char englishString[], char foreignString[], char alignmentString[], int sentenceID ) {
- english = tokenize( englishString );
- foreign = tokenize( foreignString );
- // alignment = new bool[foreign.size()*english.size()];
- // alignment = (bool**) calloc(english.size()*foreign.size(),sizeof(bool)); // is this right?
-
- if (english.size() == 0 || foreign.size() == 0) {
- cerr << "no english (" << english.size() << ") or foreign (" << foreign.size() << ") words << end insentence " << sentenceID << endl;
- cerr << "E: " << englishString << endl << "F: " << foreignString << endl;
- return 0;
- }
- // cout << "english.size = " << english.size() << endl;
- // cout << "foreign.size = " << foreign.size() << endl;
-
- // cout << "xxx\n";
- for(int i=0; i<foreign.size(); i++) {
- // cout << "i" << i << endl;
- alignedCountF.push_back( 0 );
- }
- for(int i=0; i<english.size(); i++) {
- vector< int > dummy;
- alignedToE.push_back( dummy );
- }
- // cout << "\nscanning...\n";
-
- vector<string> alignmentSequence = tokenize( alignmentString );
- for(int i=0; i<alignmentSequence.size(); i++) {
- int e,f;
- // cout << "scaning " << alignmentSequence[i].c_str() << endl;
- if (! sscanf(alignmentSequence[i].c_str(), "%d-%d", &f, &e)) {
- cerr << "WARNING: " << alignmentSequence[i] << " is a bad alignment point in sentnce " << sentenceID << endl;
- cerr << "E: " << englishString << endl << "F: " << foreignString << endl;
- return 0;
- }
- // cout << "alignmentSequence[i] " << alignmentSequence[i] << " is " << f << ", " << e << endl;
- if (e >= english.size() || f >= foreign.size()) {
- cerr << "WARNING: sentence " << sentenceID << " has alignment point (" << f << ", " << e << ") out of bounds (" << foreign.size() << ", " << english.size() << ")\n";
- cerr << "E: " << englishString << endl << "F: " << foreignString << endl;
- return 0;
- }
- alignedToE[e].push_back( f );
- alignedCountF[f]++;
- }
- return 1;
-}
-
diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp
deleted file mode 100644
index 372893978..000000000
--- a/scripts/training/phrase-extract/score.cpp
+++ /dev/null
@@ -1,323 +0,0 @@
-using namespace std;
-
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <string>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "tables-core.h"
-
-#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) {_IS.getline(_LINE, _SIZE, _DELIM); if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear();}
-#define LINE_MAX_LENGTH 10000
-
-class PhraseAlignment {
-public:
- int english, foreign;
- vector< vector<int> > alignedToE;
- vector< vector<int> > alignedToF;
-
- void create( char*, int );
- void clear();
- bool equals( PhraseAlignment );
-};
-
-class LexicalTable {
-public:
- map< WORD_ID, map< WORD_ID, double > > ltable;
- void load( char[] );
-};
-
-vector<string> tokenize( char [] );
-
-void processPhrasePairs( vector< PhraseAlignment > & );
-
-ofstream phraseTableFile;
-
-Vocabulary vcbE;
-Vocabulary vcbF;
-LexicalTable lexTable;
-PhraseTable phraseTableE;
-PhraseTable phraseTableF;
-bool inverseFlag;
-
-int main(int argc, char* argv[])
-{
- cerr << "PhraseScore v1.2.1, written by Philipp Koehn\n"
- << "phrase scoring methods for extracted phrases\n";
- time_t starttime = time(NULL);
-
- if (argc != 4 && argc != 5) {
- cerr << "syntax: phrase-score extract lex phrase-table [inverse]\n";
- exit(1);
- }
- char* &fileNameExtract = argv[1];
- char* &fileNameLex = argv[2];
- char* &fileNamePhraseTable = argv[3];
- inverseFlag = false;
- if (argc > 4) {
- inverseFlag = true;
- cerr << "using inverse mode\n";
- }
- // char[] fileNameExtract& = "/data/nlp/koehn/europarl-v2/models/de-en/model/new-extract.sorted";
- // string fileNameLex = "/data/nlp/koehn/europarl-v2/models/de-en/model/lex.f2n";
- // string fileNamePhraseTable = "/data/nlp/koehn/europarl-v2/models/de-en/model/new-phrase-table-half.f2n";
-
- // lexical translation table
- lexTable.load( fileNameLex );
-
- // sorted phrase extraction file
- ifstream extractFile;
-
- extractFile.open(fileNameExtract);
- if (extractFile.fail()) {
- cerr << "ERROR: could not open extract file " << fileNameExtract << endl;
- exit(1);
- }
- istream *extractFileP = &extractFile;
-
- // output file: phrase translation table
- phraseTableFile.open(fileNamePhraseTable);
- if (phraseTableFile.fail()) {
- cerr << "ERROR: could not open file phrase table file "
- << fileNamePhraseTable << endl;
- exit(1);
- }
-
- // loop through all extracted phrase translations
- int lastForeign = -1;
- vector< PhraseAlignment > phrasePairsWithSameF;
- int i=0;
- int fileCount = 0;
- while(true) {
- if (extractFileP->eof()) break;
- if (++i % 100000 == 0) cerr << "." << flush;
- char line[LINE_MAX_LENGTH];
- SAFE_GETLINE((*extractFileP), line, LINE_MAX_LENGTH, '\n');
- // if (fileCount>0)
- if (extractFileP->eof()) break;
- PhraseAlignment phrasePair;
- phrasePair.create( line, i );
- if (lastForeign >= 0 && lastForeign != phrasePair.foreign) {
- processPhrasePairs( phrasePairsWithSameF );
- for(int j=0;j<phrasePairsWithSameF.size();j++)
- phrasePairsWithSameF[j].clear();
- phrasePairsWithSameF.clear();
- phraseTableE.clear();
- phraseTableF.clear();
- phrasePair.clear(); // process line again, since phrase tables flushed
- phrasePair.create( line, i );
- }
- lastForeign = phrasePair.foreign;
- phrasePairsWithSameF.push_back( phrasePair );
- }
- processPhrasePairs( phrasePairsWithSameF );
- phraseTableFile.close();
-}
-
-void processPhrasePairs( vector< PhraseAlignment > &phrasePair ) {
- map<int, int> countE;
- map<int, int> alignmentE;
- int totalCount = 0;
- int currentCount = 0;
- int maxSameCount = 0;
- int maxSame = -1;
- int old = -1;
- for(int i=0;i<phrasePair.size();i++) {
- if (i>0) {
- if (phrasePair[old].english == phrasePair[i].english) {
- if (! phrasePair[i].equals( phrasePair[old] )) {
- if (currentCount > maxSameCount) {
- maxSameCount = currentCount;
- maxSame = i-1;
- }
- currentCount = 0;
- }
- }
- else {
- // wrap up old E
- if (currentCount > maxSameCount) {
- maxSameCount = currentCount;
- maxSame = i-1;
- }
-
- alignmentE[ phrasePair[old].english ] = maxSame;
- // if (maxSameCount != totalCount)
- // cout << "max count is " << maxSameCount << "/" << totalCount << endl;
-
- // get ready for new E
- totalCount = 0;
- currentCount = 0;
- maxSameCount = 0;
- maxSame = -1;
- }
- }
- countE[ phrasePair[i].english ]++;
- old = i;
- currentCount++;
- totalCount++;
- }
-
- // wrap up old E
- if (currentCount > maxSameCount) {
- maxSameCount = currentCount;
- maxSame = phrasePair.size()-1;
- }
- alignmentE[ phrasePair[old].english ] = maxSame;
- // if (maxSameCount != totalCount)
- // cout << "max count is " << maxSameCount << "/" << totalCount << endl;
-
- // output table
- typedef map< int, int >::iterator II;
- PHRASE phraseF = phraseTableF.getPhrase( phrasePair[0].foreign );
- for(II i = countE.begin(); i != countE.end(); i++) {
- // cout << "\tp( " << i->first << " | " << phrasePair[0].foreign << " ; " << phraseF.size() << " ) = ...\n";
-
- // foreign phrase (unless inverse)
- if (! inverseFlag) {
- for(int j=0;j<phraseF.size();j++)
- phraseTableFile << vcbF.getWord( phraseF[j] ) << " ";
- phraseTableFile << "||| ";
- }
-
- // english phrase
- PHRASE phraseE = phraseTableE.getPhrase( i->first );
- for(int j=0;j<phraseE.size();j++)
- phraseTableFile << vcbE.getWord( phraseE[j] ) << " ";
- phraseTableFile << "||| ";
-
- // foreign phrase (if inverse)
- if (inverseFlag) {
- for(int j=0;j<phraseF.size();j++)
- phraseTableFile << vcbF.getWord( phraseF[j] ) << " ";
- phraseTableFile << "||| ";
- }
-
- // phrase translation probability
- phraseTableFile << ((double) i->second / (double) phrasePair.size());
-
- // lexical translation probability
- double lexScore = 1;
- int null = vcbF.getWordID("NULL");
- PhraseAlignment &current = phrasePair[ alignmentE[ i->first ] ];
- for(int ei=0;ei<phraseE.size();ei++) { // all english words have to be explained
- if (current.alignedToE[ ei ].size() == 0)
- lexScore *= lexTable.ltable[ null ][ phraseE[ ei ] ]; // by NULL if neccessary
- else {
- double thisWordScore = 0;
- for(int j=0;j<current.alignedToE[ ei ].size();j++) {
- thisWordScore += lexTable.ltable[ phraseF[current.alignedToE[ ei ][ j ] ] ][ phraseE[ ei ] ];
- // cout << "lex" << j << "(" << vcbE.getWord( phraseE[ ei ] ) << "|" << vcbF.getWord( phraseF[current.alignedToE[ ei ][ j ] ] ) << ")=" << lexTable.ltable[ phraseF[current.alignedToE[ ei ][ j ] ] ][ phraseE[ ei ] ] << " ";
- }
- lexScore *= thisWordScore / (double)current.alignedToE[ ei ].size();
- }
- // cout << " => " << lexScore << endl;
- }
- phraseTableFile << " " << lexScore;
-
- // model 1 score
-
- // zens&ney lexical score
-
- phraseTableFile << endl;
- }
-}
-
-void PhraseAlignment::create( char line[], int lineID ) {
- vector< string > token = tokenize( line );
- int item = 1;
- PHRASE phraseF, phraseE;
- for (int j=0; j<token.size(); j++) {
- if (token[j] == "|||") item++;
- else {
- if (item == 1)
- phraseF.push_back( vcbF.storeIfNew( token[j] ) );
- else if (item == 2)
- phraseE.push_back( vcbE.storeIfNew( token[j] ) );
- else if (item == 3) {
- int e,f;
- sscanf(token[j].c_str(), "%d-%d", &f, &e);
- if (e >= phraseE.size() || f >= phraseF.size()) {
- cerr << "WARNING: sentence " << lineID << " has alignment point (" << f << ", " << e << ") out of bounds (" << phraseF.size() << ", " << phraseE.size() << ")\n"; }
- else {
- if (alignedToE.size() == 0) {
- vector< int > dummy;
- for(int i=0;i<phraseE.size();i++)
- alignedToE.push_back( dummy );
- for(int i=0;i<phraseF.size();i++)
- alignedToF.push_back( dummy );
- foreign = phraseTableF.storeIfNew( phraseF );
- english = phraseTableE.storeIfNew( phraseE );
- }
- alignedToE[e].push_back( f );
- alignedToF[f].push_back( e );
- }
- }
- }
- }
-}
-
-void PhraseAlignment::clear() {
- for(int i=0;i<alignedToE.size();i++)
- alignedToE[i].clear();
- for(int i=0;i<alignedToF.size();i++)
- alignedToF[i].clear();
- alignedToE.clear();
- alignedToF.clear();
-}
-
-bool PhraseAlignment::equals( PhraseAlignment other ) {
- if (other.english != english) return false;
- if (other.foreign != foreign) return false;
- PHRASE phraseE = phraseTableE.getPhrase( english );
- PHRASE phraseF = phraseTableF.getPhrase( foreign );
- for(int i=0;i<phraseE.size();i++) {
- if (alignedToE[i].size() != other.alignedToE[i].size()) return false;
- for(int j=0; j<alignedToE[i].size(); j++) {
- if (alignedToE[i][j] != other.alignedToE[i][j]) return false;
- }
- }
- for(int i=0;i<phraseF.size();i++) {
- if (alignedToF[i].size() != other.alignedToF[i].size()) return false;
- for(int j=0; j<alignedToF[i].size(); j++) {
- if (alignedToF[i][j] != other.alignedToF[i][j]) return false;
- }
- }
- return true;
-}
-
-void LexicalTable::load( char *fileName ) {
- cerr << "Loading lexical translation table from " << fileName;
- ifstream inFile;
- inFile.open(fileName);
- if (inFile.fail()) {
- cerr << " - ERROR: could not open file\n";
- exit(1);
- }
- istream *inFileP = &inFile;
-
- char line[LINE_MAX_LENGTH];
-
- int i=0;
- while(true) {
- i++;
- if (i%100000 == 0) cerr << "." << flush;
- SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n');
- if (inFileP->eof()) break;
-
- vector<string> token = tokenize( line );
- if (token.size() != 3) {
- cerr << "line " << i << " in " << fileName << " has wrong number of tokens, skipping:\n" <<
- token.size() << " " << token[0] << " " << line << endl;
- continue;
- }
-
- double prob = atof( token[2].c_str() );
- WORD_ID wordE = vcbE.storeIfNew( token[0] );
- WORD_ID wordF = vcbF.storeIfNew( token[1] );
- ltable[ wordF ][ wordE ] = prob;
- }
- cerr << endl;
-}
diff --git a/scripts/training/phrase-extract/tables-core.cpp b/scripts/training/phrase-extract/tables-core.cpp
deleted file mode 100644
index 1f961955a..000000000
--- a/scripts/training/phrase-extract/tables-core.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-//#include "beammain.h"
-#include "tables-core.h"
-
-#define TABLE_LINE_MAX_LENGTH 1000
-#define UNKNOWNSTR "UNK"
-
-#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) {_IS.getline(_LINE, _SIZE, _DELIM); if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear();}
-
-// as in beamdecoder/tables.cpp
-vector<string> tokenize( char input[] ) {
- vector< string > token;
- bool betweenWords = true;
- int start=0;
- int i=0;
- for(; input[i] != '\0'; i++) {
- bool isSpace = (input[i] == ' ' || input[i] == '\t');
-
- if (!isSpace && betweenWords) {
- start = i;
- betweenWords = false;
- }
- else if (isSpace && !betweenWords) {
- token.push_back( string( input+start, i-start ) );
- betweenWords = true;
- }
- }
- if (!betweenWords)
- token.push_back( string( input+start, i-start ) );
- return token;
-}
-
-WORD_ID Vocabulary::storeIfNew( WORD word ) {
- if( lookup.find( word ) != lookup.end() )
- return lookup[ word ];
-
- WORD_ID id = vocab.size();
- vocab.push_back( word );
- lookup[ word ] = id;
- return id;
-}
-
-WORD_ID Vocabulary::getWordID( WORD word ) {
- if( lookup.find( word ) == lookup.end() )
- return 0;
- return lookup[ word ];
-}
-
-PHRASE_ID PhraseTable::storeIfNew( PHRASE phrase ) {
- if( lookup.find( phrase ) != lookup.end() )
- return lookup[ phrase ];
-
- PHRASE_ID id = phraseTable.size();
- phraseTable.push_back( phrase );
- lookup[ phrase ] = id;
- return id;
-}
-
-PHRASE_ID PhraseTable::getPhraseID( PHRASE phrase ) {
- if( lookup.find( phrase ) == lookup.end() )
- return 0;
- return lookup[ phrase ];
-}
-
-void PhraseTable::clear() {
- lookup.clear();
- phraseTable.clear();
-}
-
-void DTable::init() {
- for(int i = -10; i<10; i++)
- dtable[i] = -abs( i );
-}
-
-void DTable::load( string fileName ) {
- ifstream inFile;
- inFile.open(fileName.c_str());
- istream *inFileP = &inFile;
-
- char line[TABLE_LINE_MAX_LENGTH];
- int i=0;
- while(true) {
- i++;
- SAFE_GETLINE((*inFileP), line, TABLE_LINE_MAX_LENGTH, '\n');
- if (inFileP->eof()) break;
-
- vector<string> token = tokenize( line );
- if (token.size() < 2) {
- cerr << "line " << i << " in " << fileName << " too short, skipping\n";
- continue;
- }
-
- int d = atoi( token[0].c_str() );
- double prob = log( atof( token[1].c_str() ) );
- dtable[ d ] = prob;
- }
-}
-
-double DTable::get( int distortion ) {
- if (dtable.find( distortion ) == dtable.end())
- return log( 0.00001 );
- return dtable[ distortion ];
-}
diff --git a/scripts/training/phrase-extract/tables-core.h b/scripts/training/phrase-extract/tables-core.h
deleted file mode 100644
index fb995339a..000000000
--- a/scripts/training/phrase-extract/tables-core.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _TABLES_H
-#define _TABLES_H
-
-using namespace std;
-
-#include <iostream>
-#include <fstream>
-#include <assert.h>
-#include <stdlib.h>
-#include <string>
-#include <queue>
-#include <map>
-#include <cmath>
-
-vector<string> tokenize( char[] );
-
-typedef string WORD;
-typedef unsigned int WORD_ID;
-
-class Vocabulary {
- public:
- map<WORD, WORD_ID> lookup;
- vector< WORD > vocab;
- WORD_ID storeIfNew( WORD );
- WORD_ID getWordID( WORD );
- inline WORD &getWord( WORD_ID id ) { return vocab[ id ]; }
-};
-
-typedef vector< WORD_ID > PHRASE;
-typedef unsigned int PHRASE_ID;
-
-class PhraseTable {
- public:
- map< PHRASE, PHRASE_ID > lookup;
- vector< PHRASE > phraseTable;
- PHRASE_ID storeIfNew( PHRASE );
- PHRASE_ID getPhraseID( PHRASE );
- void clear();
- inline PHRASE &getPhrase( const PHRASE_ID id ) { return phraseTable[ id ]; }
-};
-
-typedef vector< pair< PHRASE_ID, double > > PHRASEPROBVEC;
-
-class TTable {
- public:
- map< PHRASE_ID, vector< pair< PHRASE_ID, double > > > ttable;
- map< PHRASE_ID, vector< pair< PHRASE_ID, vector< double > > > > ttableMulti;
-};
-
-class DTable {
- public:
- map< int, double > dtable;
- void init();
- void load( string );
- double get( int );
-};
-
-#endif
diff --git a/scripts/training/postprocess-lopar.perl b/scripts/training/postprocess-lopar.perl
deleted file mode 100755
index f7b0ea2a0..000000000
--- a/scripts/training/postprocess-lopar.perl
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-
-use utf8;
-
-my $out = shift @ARGV or die "Please specify the output file path (will be appended with .lemma .morph and .words and .factored";
-
-my $wc = 0;
-my $uc = 0;
-open OUT, ">$out.factored" or die "Couldn't open joined";
-open M, ">$out.morph" or die "Couldn't open morph";
-open L, ">$out.lemma" or die "Couldn't open lemma";
-open S, ">$out.words" or die "Couldn't open surface";
-open P, ">$out.pos" or die "Couldn't open surface";
-my $lc = 0;
-while (my $l =<STDIN>) {
- chomp $l;
- $lc++;
- if ($lc % 1000 == 0) {print "$lc\n";}
- my @ls = (); my @ms = (); my @ss = (); my @js = (); my @ps = ();
- my @ws = split /\s+/, $l;
- foreach my $w (@ws) {
- $wc++;
- my ($surface, $morph, $lemma);
-
- if ($w =~ /^(.+)_([^_]+)_(.+)$/o) {
- ($surface, $morph, $lemma) = ($1, $2, $3);
- } else {
- print "can't parse: $w\n";
- next;
- }
- #next unless (defined $surface && !($surface eq ''));
- if (!defined $lemma) { $lemma=$surface; }
- if (!defined $morph) { $morph = 'NN.Neut.Cas.Sg'; }
- if ($lemma eq '<NUM>' || $lemma eq '<ORD>') {
- $lemma = $surface;
- }
-
- $surface =~ tr/A-Z/a-z/;
- $surface =~ tr/À-Þ/à-þ/;
-
- if ($lemma eq '<unknown>') {
- $uc++;
- $lemma = $surface;
- if ($surface =~ /ungen$/o) {
- $lemma =~ s/en$//o;
- $morph = 'NN.Fem.Cas.Pl';
- } elsif ($surface =~ /schaften$/o) {
- $lemma =~ s/en$//o;
- $morph = 'NN.Fem.Cas.Pl';
- } elsif ($surface =~ /eiten$/o) {
- $lemma =~ s/en$//o;
- $morph = 'NN.Fem.Cas.Pl';
- } elsif ($surface =~ /eit/o) {
- $morph = 'NN.Fem.Cas.Sg';
- } elsif ($surface =~ /schaft/o) {
- $morph = 'NN.Fem.Cas.Sg';
- } elsif ($surface =~ /ung/o) {
- $morph = 'NN.Fem.Cas.Sg';
- } elsif ($surface =~ /ismus$/o) {
- $morph =~ 'NN.Masc.Cas.Sg';
- }
- } else {
- if ($lemma =~ /\|/o) {
- my ($l, @rest) = split /\|/o, $lemma;
- $lemma = $l;
- }
- }
- my ($pos, @xs) = split /\./, $morph;
- $morph = join '.', @xs;
- if (!defined $morph || $morph eq '') {
- $morph = '-';
- }
-# if (defined($lemma) && defined($morph) && defined($surface)) {
- push @js, "$surface|$morph|$lemma";
- push @ls, $lemma;
- push @ms, $morph;
- push @ss, $surface;
- push @ps, $pos;
-# }
- }
- print OUT join(' ', @js) . "\n";
- print M join(' ', @ms) . "\n";
- print L join(' ', @ls) . "\n";
- print S join(' ', @ss) . "\n";
- print P join(' ', @ps) . "\n";
-}
-close OUT;
-
-print "word count: $wc\nunknown lemmas: $uc\nratio: " . $uc/$wc . "\n";
-
diff --git a/scripts/training/reduce_combine.pl b/scripts/training/reduce_combine.pl
deleted file mode 100755
index bbc4559cc..000000000
--- a/scripts/training/reduce_combine.pl
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/perl
-# given a pathname to a factored corpus, a list of (numeric) factors to keep
-# and a list of (labelled) factors to attach from factors/<basename>.label
-# produces new corpus on stdout
-
-use strict;
-use warnings;
-use Getopt::Long;
-use IO::File;
-use File::Basename;
-
-binmode(STDIN, ":utf8");
-binmode(STDOUT, ":utf8");
-binmode(STDERR, ":utf8");
-
-my $factordir = "factors";
-GetOptions(
- "factordir=s" => \$factordir,
-);
-my $corppathname = shift;
-
-my @requested_factors = split /[\+,]/, join("+", @ARGV);
-die "usage: reduce_combine.pl corpusfile 0 add_factor_label1 2 add_factor_label2 ..."
- if !defined $corppathname || 0 == scalar @requested_factors;
-
-my @addfactors = grep { ! /^[0-9]+$/ } @requested_factors;
-# these are the labelled factors we need to load;
-
-
-open CORP, $corppathname or die "Can't read $corppathname";
-binmode(CORP, ":utf8");
-
-my $corpdn = dirname($corppathname);
-my $corpbn = basename($corppathname);
-my %streams = map {
- my $fn = "$corpdn/$factordir/$corpbn.$_";
- my $stream = IO::File->new($fn, "<:utf8");
- die "Can't read '$fn'" if !defined $stream;
- ( $_, $stream ); # define a mapping factorlabel->stream
-} @addfactors;
-
-my $nr=0;
-while (<CORP>) {
- $nr++;
- print STDERR "." if $nr % 10000 == 0;
- print STDERR "($nr)" if $nr % 100000 == 0;
- chomp;
- my @intokens = split / /;
- # load lines of corresponding streams and ensure equal number of words
- my %lines_of_extratoks;
- foreach my $factor (keys %streams) {
- my $line = readline($streams{$factor});
- die "Additional factor file $factor contains too few sentences!"
- if !defined $line;
- chomp($line);
- my @toks = split / /, $line;
- die "Incompatible number of words in factor $factor on line $nr."
- if $#toks != $#intokens;
- $lines_of_extratoks{$factor} = \@toks;
- }
-
- # for every token, print the factors in the order as user wished
- for(my $i=0; $i<=$#intokens; $i++) {
- my $token = $intokens[$i];
- my @outtoken = ();
- my @factors = split /\|/, $token;
- # print STDERR "Token: $token\n";
- foreach my $name (@requested_factors) {
- my $f = undef;
- if ($name =~ /^[0-9]+$/o) {
- # numeric factors should be copied from original corpus
- $f = $factors[$name];
- die "Missed factor $name in $token on line $nr"
- if !defined $f || $f eq "";
- } else {
- # named factors should be obtained from the streams
- $f = $lines_of_extratoks{$name}->[$i];
- die "Missed factor $name on line $nr"
- if !defined $f || $f eq "";
- }
- # print STDERR " Factor $name: $f\n";
- push @outtoken, $f;
- }
- print " " if $i != 0;
- print join("|", @outtoken);
- }
- print "\n";
-}
-close CORP;
-print STDERR "Done.\n";
-
-
-
diff --git a/scripts/training/symal/Makefile b/scripts/training/symal/Makefile
deleted file mode 100644
index c7ced672f..000000000
--- a/scripts/training/symal/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-CPP=g++ -g
-CC=gcc -g
-
-all: symal
-
-clean:
- rm -f *.o
-
-cmd.o: cmd.c cmd.h
- $(CC) -c -o cmd.o cmd.c
-
-symal: symal.cpp cmd.o
- $(CPP) -o $@ $(@).cpp cmd.o
diff --git a/scripts/training/symal/cmd.c b/scripts/training/symal/cmd.c
deleted file mode 100644
index ee607a254..000000000
--- a/scripts/training/symal/cmd.c
+++ /dev/null
@@ -1,642 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-
-#include "cmd.h"
-
-static Enum_T BoolEnum[] = {
- { "FALSE", 0 },
- { "TRUE", 1 },
- { 0, 0 }
-};
-
-#ifdef NEEDSTRDUP
-char *strdup();
-#endif
-
-#define FALSE 0
-#define TRUE 1
-
-#define LINSIZ 10240
-#define MAXPARAM 256
-
-static char *GetLine(),
- **str2array();
-static int Scan(),
- SetParam(),
- SetEnum(),
- SetSubrange(),
- SetStrArray(),
- SetGte(),
- SetLte(),
- CmdError(),
- EnumError(),
- SubrangeError(),
- GteError(),
- LteError(),
- PrintParam(),
- PrintEnum(),
- PrintStrArray();
-
-static Cmd_T cmds[MAXPARAM+1];
-static char *SepString = " \t\n";
-
-#if defined(__STDC__)
-#include <stdarg.h>
-int DeclareParams(char *ParName, ...)
-#else
-#include <varargs.h>
-int DeclareParams(ParName, va_alist)
-char *ParName;
-va_dcl
-#endif
-{
- va_list args;
- static int ParamN = 0;
- int j,
- c;
- char *s;
-
-#if defined(__STDC__)
- va_start(args, ParName);
-#else
- va_start(args);
-#endif
- for(;ParName;) {
- if(ParamN==MAXPARAM) {
- fprintf(stderr, "Too many parameters !!\n");
- break;
- }
- for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
- ;
- if(!c) {
- fprintf(stderr,
- "Warning: parameter \"%s\" declared twice.\n",
- ParName);
- }
- for(c=ParamN; c>j; c--) {
- cmds[c] = cmds[c-1];
- }
- cmds[j].Name = ParName;
- cmds[j].Type = va_arg(args, int);
- cmds[j].Val = va_arg(args, void *);
- switch(cmds[j].Type) {
- case CMDENUMTYPE: /* get the pointer to Enum_T struct */
- cmds[j].p = va_arg(args, void *);
- break;
- case CMDSUBRANGETYPE: /* get the two extremes */
- cmds[j].p = (void*) calloc(2, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- ((int*)cmds[j].p)[1] = va_arg(args, int);
- break;
- case CMDGTETYPE: /* get lower or upper bound */
- case CMDLTETYPE:
- cmds[j].p = (void*) calloc(1, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- break;
- case CMDSTRARRAYTYPE: /* get the separators string */
- cmds[j].p = (s=va_arg(args, char*))
- ? (void*)strdup(s) : 0;
- break;
- case CMDBOOLTYPE:
- cmds[j].Type = CMDENUMTYPE;
- cmds[j].p = BoolEnum;
- break;
- case CMDDOUBLETYPE: /* nothing else is needed */
- case CMDINTTYPE:
- case CMDSTRINGTYPE:
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "DeclareParam()", "Unknown Type",
- cmds[j].Type, "for parameter", cmds[j].Name);
- exit(1);
- }
- ParamN++;
- ParName = va_arg(args, char *);
- }
- cmds[ParamN].Name = NULL;
- va_end(args);
- return 0;
-}
-
-int GetParams(n, a, CmdFileName)
-int *n;
-char ***a;
-char *CmdFileName;
-{
- char *Line,
- *ProgName;
- int argc = *n;
- char **argv = *a,
- *s;
- FILE *fp;
- int IsPipe;
-
-#ifdef MSDOS
-#define PATHSEP '\\'
- char *dot = NULL;
-#else
-#define PATHSEP '/'
-#endif
-
- if(!(Line=malloc(LINSIZ))) {
- fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
- LINSIZ);
- exit(1);
- }
- if((ProgName=strrchr(*argv, PATHSEP))) {
- ++ProgName;
- } else {
- ProgName = *argv;
- }
-#ifdef MSDOS
- if(dot=strchr(ProgName, '.')) *dot = 0;
-#endif
- --argc;
- ++argv;
- for(;;) {
- if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
- CmdFileName = argv[0]+2;
- ++argv;
- --argc;
- }
- if(!CmdFileName) {
- break;
- }
- IsPipe = !strncmp(CmdFileName, "@@", 2);
- fp = IsPipe
- ? popen(CmdFileName+2, "r")
- : strcmp(CmdFileName, "-")
- ? fopen(CmdFileName, "r")
- : stdin;
- if(!fp) {
- fprintf(stderr, "Unable to open command file %s\n",
- CmdFileName);
- exit(1);
- }
- while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
- if(Scan(ProgName, cmds, Line)) {
- CmdError(Line);
- }
- }
- if(fp!=stdin) {
- if(IsPipe) pclose(fp); else fclose(fp);
- }
- CmdFileName = NULL;
- }
- while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
- *s = ' ';
- sprintf(Line, "%s/%s", ProgName, *argv+1);
- *s = '=';
- if(Scan(ProgName, cmds, Line)) CmdError(*argv);
- --argc;
- ++argv;
- }
- *n = argc;
- *a = argv;
-#ifdef MSDOS
- if(dot) *dot = '.';
-#endif
- free(Line);
- return 0;
-}
-
-int PrintParams(ValFlag, fp)
-int ValFlag;
-FILE *fp;
-{
- int i;
-
- fflush(fp);
- if(ValFlag) {
- fprintf(fp, "Parameters Values:\n");
- } else {
- fprintf(fp, "Parameters:\n");
- }
- for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
- fprintf(fp, "\n");
- fflush(fp);
- return 0;
-}
-
-int SPrintParams(a, pfx)
-char ***a,
- *pfx;
-{
- int l,
- n;
- Cmd_T *cmd;
-
- if(!pfx) pfx="";
- l = strlen(pfx);
- for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
- a[0] = calloc(n, sizeof(char*));
- for(n=0, cmd=cmds; cmd->Name; cmd++) {
- if(!cmd->ArgStr) continue;
- a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
- sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
- ++n;
- }
- return n;
-}
-
-static int CmdError(opt)
-char *opt;
-{
- fprintf(stderr, "Invalid option \"%s\"\n", opt);
- fprintf(stderr, "This program expectes the following parameters:\n");
- PrintParams(FALSE, stderr);
- exit(0);
-}
-
-static int PrintParam(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- fprintf(fp, "%4s", "");
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDENUMTYPE:
- PrintEnum(cmd, ValFlag, fp);
- break;
- case CMDINTTYPE:
- case CMDSUBRANGETYPE:
- case CMDGTETYPE:
- case CMDLTETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDSTRINGTYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- if(*(char **)cmd->Val) {
- fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
- } else {
- fprintf(fp, ": %s", "NULL");
- }
- }
- fprintf(fp, "\n");
- break;
- case CMDSTRARRAYTYPE:
- PrintStrArray(cmd, ValFlag, fp);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "PrintParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- return 0;
-}
-
-static char *GetLine(fp, n, Line)
-FILE *fp;
-int n;
-char *Line;
-{
- int j,
- l,
- offs=0;
-
- for(;;) {
- if(!fgets(Line+offs, n-offs, fp)) {
- return NULL;
- }
- if(Line[offs]=='#') continue;
- l = strlen(Line+offs)-1;
- Line[offs+l] = 0;
- for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
- ;
- if(l<1) continue;
- if(j > offs) {
- char *s = Line+offs,
- *q = Line+j;
-
- while((*s++=*q++))
- ;
- }
- if(Line[offs+l-1]=='\\') {
- offs += l;
- Line[offs-1] = ' ';
- } else {
- break;
- }
- }
- return Line;
-}
-
-static int Scan(ProgName, cmds, Line)
-char *ProgName,
- *Line;
-Cmd_T *cmds;
-{
- char *q,
- *p;
- int i,
- hl,
- HasToMatch = FALSE,
- c0,
- c;
-
- p = Line+strspn(Line, SepString);
- if(!(hl=strcspn(p, SepString))) {
- return 0;
- }
- if((q=strchr(p, '/')) && q-p<hl) {
- *q = 0;
- if(strcmp(p, ProgName)) {
- *q = '/';
- return 0;
- }
- *q = '/';
- HasToMatch=TRUE;
- p = q+1;
- }
- if(!(hl = strcspn(p, SepString))) {
- return 0;
- }
- c0 = p[hl];
- p[hl] = 0;
- for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
- ;
- p[hl] = c0;
- if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
- return HasToMatch && c;
-}
-
-static int SetParam(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- if(!*s && cmd->Type != CMDSTRINGTYPE) {
- fprintf(stderr,
- "WARNING: No value specified for parameter \"%s\"\n",
- cmd->Name);
- return 0;
- }
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
- fprintf(stderr,
- "Float value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDENUMTYPE:
- SetEnum(cmd, s);
- break;
- case CMDINTTYPE:
- if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDSTRINGTYPE:
- *(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
- ? strdup(s)
- : 0;
- break;
- case CMDSTRARRAYTYPE:
- SetStrArray(cmd, s);
- break;
- case CMDGTETYPE:
- SetGte(cmd, s);
- break;
- case CMDLTETYPE:
- SetLte(cmd, s);
- break;
- case CMDSUBRANGETYPE:
- SetSubrange(cmd, s);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "SetParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- cmd->ArgStr = strdup(s);
- return 0;
-}
-
-static int SetEnum(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && !strcmp(s, en->Name)) {
- *(int *) cmd->Val = en->Idx;
- return 0;
- }
- }
- return EnumError(cmd, s);
-}
-
-static int SetSubrange(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
- return SubrangeError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetGte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n<*(int *)cmd->p) {
- return GteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetStrArray(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- *(char***)cmd->Val = str2array(s, (char*)cmd->p);
- return 0;
-}
-
-static int SetLte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n > *(int *)cmd->p) {
- return LteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int EnumError(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- fprintf(stderr,
- "Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
- fprintf(stderr, "Valid values are:\n");
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name) {
- fprintf(stderr, " %s\n", en->Name);
- }
- }
- fprintf(stderr, "\n");
- exit(1);
-}
-
-static int GteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be greater than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int LteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be less than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int SubrangeError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values range from %d to %d\n",
- *(int *)cmd->p, *((int *)cmd->p+1));
- exit(1);
-}
-
-static int PrintEnum(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- Enum_T *en;
-
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && en->Idx==*(int *)cmd->Val) {
- fprintf(fp, ": %s", en->Name);
- }
- }
- }
- fprintf(fp, "\n");
- return 0;
-}
-
-static int PrintStrArray(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- char *indent,
- **s = *(char***)cmd->Val;
- int l = 4+strlen(cmd->Name);
-
- fprintf(fp, "%s", cmd->Name);
- indent = malloc(l+2);
- memset(indent, ' ', l+1);
- indent[l+1] = 0;
- if(ValFlag) {
- fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
- if(s) while(*s) {
- fprintf(fp, "\n%s %s", indent, *s++);
- }
- }
- free(indent);
- fprintf(fp, "\n");
- return 0;
-}
-
-static char **str2array(s, sep)
-char *s,
- *sep;
-{
- char *p,
- **a;
- int n = 0,
- l;
-
- if(!sep) sep = SepString;
- p = s += strspn(s, sep);
- while(*p) {
- p += strcspn(p, sep);
- p += strspn(p, sep);
- ++n;
- }
- a = calloc(n+1, sizeof(char *));
- p = s;
- n = 0;
- while(*p) {
- l = strcspn(p, sep);
- a[n] = malloc(l+1);
- memcpy(a[n], p, l);
- a[n][l] = 0;
- ++n;
- p += l;
- p += strspn(p, sep);
- }
- return a;
-}
diff --git a/scripts/training/symal/cmd.h b/scripts/training/symal/cmd.h
deleted file mode 100644
index c6fa57e71..000000000
--- a/scripts/training/symal/cmd.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-#if !defined(CMD_H)
-
-#define CMD_H
-
-#define CMDDOUBLETYPE 1
-#define CMDENUMTYPE 2
-#define CMDINTTYPE 3
-#define CMDSTRINGTYPE 4
-#define CMDSUBRANGETYPE 5
-#define CMDGTETYPE 6
-#define CMDLTETYPE 7
-#define CMDSTRARRAYTYPE 8
-#define CMDBOOLTYPE 9
-
-typedef struct {
- char *Name;
- int Idx;
-} Enum_T;
-
-typedef struct {
- int Type;
- char *Name,
- *ArgStr;
- void *Val,
- *p;
-} Cmd_T;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__STDC__)
-int DeclareParams(char *, ...);
-#else
-int DeclareParams();
-#endif
-
-int GetParams(int *n, char ***a,char *CmdFileName),
- SPrintParams(),
- PrintParams();
-
-#ifdef __cplusplus
-}
-#endif
-#endif
-
-
-
diff --git a/scripts/training/symal/giza2bal.pl b/scripts/training/symal/giza2bal.pl
deleted file mode 100755
index b3cc68a1d..000000000
--- a/scripts/training/symal/giza2bal.pl
+++ /dev/null
@@ -1,96 +0,0 @@
-#! /usr/bin/perl
-
-#Converts direct and inverted alignments into a more compact
-#bi-alignment format. It optionally reads the counting file
-#produced by giza containing the frequency of each traning sentence.
-
-#Copyright Marcello Federico, November 2004
-
-($cnt,$dir,$inv)=();
-
-while ($w=shift @ARGV){
- $dir=shift(@ARGV),next if $w eq "-d";
- $inv=shift(@ARGV),next if $w eq "-i";
- $cnt=shift(@ARGV),next if $w eq "-c";
-}
-
-if (!$dir || !inv){
- print "usage: giza2bal.pl [-c <count-file>] -d <dir-align-file> -i <inv-align-file>\n";
- print "input files can be also commands, e.g. -d \"gunzip -c file.gz\"\n";
- exit(0);
-}
-
-$|=1;
-
-open(DIR,"<$dir") || open(DIR,"$dir|") || die "cannot open $dir\n";
-open(INV,"<$inv") || open(INV,"$inv|") || die "cannot open $dir\n";
-
-if ($cnt){
-open(CNT,"<$cnt") || open(CNT,"$cnt|") || die "cannot open $dir\n";
-}
-
-
-sub ReadBiAlign{
- local($fd0,$fd1,$fd2,*s1,*s2,*a,*b,*c)=@_;
- local($dummy,$n);
-
- chop($c=<$fd0>); ## count
- $dummy=<$fd0>; ## header
- $dummy=<$fd0>; ## header
- $c=1 if !$c;
-
- $dummy=<$fd1>; ## header
- chop($s1=<$fd1>);
- chop($t1=<$fd1>);
-
- $dummy=<$fd2>; ## header
- chop($s2=<$fd2>);
- chop($t2=<$fd2>);
-
- @a=@b=();
-
- #get target statistics
- $n=1;
- $t1=~s/NULL \(\{(( \d+)*) \}\)//;
- while ($t1=~s/(\S+) \(\{(( \d+)*) \}\)//){
- grep($a[$_]=$n,split(/ /,$2));
- $n++;
- }
-
- $m=1;
- $t2=~s/NULL \(\{(( \d+)*) \}\)//;
- while ($t2=~s/(\S+) \(\{(( \d+)*) \}\)//){
- grep($b[$_]=$m,split(/ /,$2));
- $m++;
- }
-
- $M=split(/ /,$s1);
- $N=split(/ /,$s2);
-
- return 0 if $m != ($M+1) || $n != ($N+1);
-
- for ($j=1;$j<$m;$j++){
- $a[$j]=0 if !$a[$j];
- }
-
- for ($i=1;$i<$n;$i++){
- $b[$i]=0 if !$b[$i];
- }
-
-
- return 1;
-}
-
-$skip=0;
-while(!eof(DIR)){
-
- if (ReadBiAlign(CNT,DIR,INV,*src,*tgt,*a,*b,*c))
- {
- print "$c\n";
- print $#a," $src \# @a[1..$#a]\n";
- print $#b," $tgt \# @b[1..$#b]\n";
- }
- else{
- print STDERR "." if !(++$skip % 1000);
- }
-};
diff --git a/scripts/training/symal/symal.cpp b/scripts/training/symal/symal.cpp
deleted file mode 100644
index 1a6d762a2..000000000
--- a/scripts/training/symal/symal.cpp
+++ /dev/null
@@ -1,394 +0,0 @@
-using namespace std;
-
-#include <iomanip>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <list>
-#include <vector>
-#include <set>
-#include <algorithm>
-#include "cmd.h"
-
-#define MAX_WORD 100 //maximum lengthsource/target strings
-#define MAX_M 200 //maximum length of source strings
-#define MAX_N 200 //maximum length of target strings
-
-#define UNION 1
-#define INTERSECT 2
-#define GROW 3
-#define BOOL_YES 1
-#define BOOL_NO 0
-
-#define END_ENUM { (char*)0, 0 }
-
-static Enum_T AlignEnum [] = {
-{ "union", UNION },
-{ "u", UNION },
-{ "intersect", INTERSECT},
-{ "i", INTERSECT},
-{ "grow", GROW },
-{ "g", GROW },
-
- END_ENUM
-};
-
-static Enum_T BoolEnum [] = {
- { "true", BOOL_YES },
- { "yes", BOOL_YES },
- { "y", BOOL_YES },
- { "false", BOOL_NO },
- { "no", BOOL_NO },
- { "n", BOOL_NO },
- END_ENUM
-};
-
-
-
-// global variables and constants
-
-int* fa; //counters of covered foreign positions
-int* ea; //counters of covered english positions
-int** A; //alignment matrix with information symmetric/direct/inverse alignments
-
-int verbose=0;
-
-//read an alignment pair from the input stream.
-
-int getals(fstream& inp,int& m, int *a,int& n, int *b)
-{
- char w[MAX_WORD], dummy[10];
- int i,j,freq;
-
- if (inp >> freq){
- //target sentence
- inp >> n; assert(n<MAX_N);
- for (i=1;i<=n;i++){
- inp >> setw(MAX_WORD) >> w;
- assert(strlen(w)<MAX_WORD-1);
- }
-
- inp >> dummy; //# separator
- // inverse alignment
- for (i=1;i<=n;i++) inp >> b[i];
-
- //source sentence
- inp >> m; assert(m<MAX_M);
- for (j=1;j<=m;j++){
- inp >> setw(MAX_WORD) >> w;
- assert(strlen(w)<MAX_WORD-1);
- }
-
- inp >> dummy; //# separator
-
- // direct alignment
- for (j=1;j<=m;j++) {
- inp >> a[j];
- assert(0<=a[j] && a[j]<=n);
- }
-
- //check inverse alignemnt
- for (i=1;i<=n;i++)
- assert(0<=b[i] && b[i]<=m);
-
- return 1;
-
- }
- else
- return 0;
-};
-
-
-//compute union alignment
-int prunionalignment(fstream& out,int m,int *a,int n,int* b){
-
- ostringstream sout;
-
- for (int j=1;j<=m;j++)
- if (a[j])
- sout << j-1 << "-" << a[j]-1 << " ";
-
- for (int i=1;i<=n;i++)
- if (b[i] && a[b[i]]!=i)
- sout << b[i]-1 << "-" << i-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- str.replace(str.length()-1,1,"\n");
-
- out << str;
-
- return 1;
-}
-
-
-
-//Compute unionalignment Alignment
-
-int printersect(fstream& out,int m,int *a,int n,int* b){
-
- ostringstream sout;
-
- for (int j=1;j<=m;j++)
- if (a[j] && b[a[j]]==j)
- sout << j-1 << "-" << a[j]-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- str.replace(str.length()-1,1,"\n");
-
- out << str;
-
- return 1;
-}
-
-
-//Compute Grow Diagonal Alignment
-//Nice property: you will never introduce more points
-//than the unionalignment alignemt. Hence, you will always be able
-//to represent the grow alignment as the unionalignment of a
-//directed and inverted alignment
-
-int printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool final=false,bool bothuncovered=false){
-
- ostringstream sout;
-
- vector <pair <int,int> > neighbors; //neighbors
-
- pair <int,int> entry;
-
- neighbors.push_back(make_pair(-1,-0));
- neighbors.push_back(make_pair(0,-1));
- neighbors.push_back(make_pair(1,0));
- neighbors.push_back(make_pair(0,1));
-
-
- if (diagonal){
- neighbors.push_back(make_pair(-1,-1));
- neighbors.push_back(make_pair(-1,1));
- neighbors.push_back(make_pair(1,-1));
- neighbors.push_back(make_pair(1,1));
- }
-
-
- int i,j,o;
-
-
- //covered foreign and english positions
-
- memset(fa,0,(m+1)*sizeof(int));
- memset(ea,0,(n+1)*sizeof(int));
-
- //matrix to quickly check if one point is in the symmetric
- //alignment (value=2), direct alignment (=1) and inverse alignment
-
- for (int i=1;i<=n;i++) memset(A[i],0,(m+1)*sizeof(int));
-
- set <pair <int,int> > currentpoints; //symmetric alignment
- set <pair <int,int> > unionalignment; //union alignment
-
- pair <int,int> point; //variable to store points
- set<pair <int,int> >::const_iterator k; //iterator over sets
-
- //fill in the alignments
- for (j=1;j<=m;j++){
- if (a[j]){
- unionalignment.insert(make_pair(a[j],j));
- if (b[a[j]]==j){
- fa[j]=1;ea[a[j]]=1;
- A[a[j]][j]=2;
- currentpoints.insert(make_pair(a[j],j));
- }
- else
- A[a[j]][j]=-1;
- }
- }
-
- for (i=1;i<=n;i++)
- if (b[i] && a[b[i]]!=i){ //not intersection
- unionalignment.insert(make_pair(i,b[i]));
- A[i][b[i]]=1;
- }
-
-
- int added=1;
-
- while (added){
- added=0;
- ///scan the current alignment
- for (k=currentpoints.begin();k!=currentpoints.end();k++){
- //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
- for (o=0;o<neighbors.size();o++){
- //cout << "go over check all neighbors\n";
- point.first=k->first+neighbors[o].first;
- point.second=k->second+neighbors[o].second;
- //cout << point.second-1 << " " << point.first-1 << "\n";
- //check if neighbor is inside 'matrix'
- if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
- //check if neighbor is in the unionalignment alignment
- if (b[point.first]==point.second || a[point.second]==point.first){
- //cout << "In unionalignment ";cout.flush();
- //check if it connects at least one uncovered word
- if (!(ea[point.first] && fa[point.second]))
- {
- //insert point in currentpoints!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- ea[point.first]=1; fa[point.second]=1;
- added=1;
- //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
- }
- }
- }
- }
- }
-
- if (final){
- for (k=unionalignment.begin();k!=unionalignment.end();k++)
- if (A[k->first][k->second]==1)
- {
- point.first=k->first;point.second=k->second;
- //one of the two words is not covered yet
- //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
- if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
- (!bothuncovered && !(ea[point.first] && fa[point.second])))
- {
- //add it!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- //keep track of new covered positions
- ea[point.first]=1;fa[point.second]=1;
-
- //added=1;
- //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
- }
- }
-
- for (k=unionalignment.begin();k!=unionalignment.end();k++)
- if (A[k->first][k->second]==-1)
- {
- point.first=k->first;point.second=k->second;
- //one of the two words is not covered yet
- //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
- if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
- (!bothuncovered && !(ea[point.first] && fa[point.second])))
- {
- //add it!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- //keep track of new covered positions
- ea[point.first]=1;fa[point.second]=1;
-
- //added=1;
- //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
- }
- }
- }
-
-
- for (k=currentpoints.begin();k!=currentpoints.end();k++)
- sout << k->second-1 << "-" << k->first-1 << " ";
-
-
- //fix the last " "
- string str = sout.str();
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
- return 1;
-
- return 1;
-}
-
-
-
-//Main file here
-
-
-int main(int argc, char** argv){
-
-int alignment=0;
-char* input="/dev/stdin";
-char* output="/dev/stdout";
-int diagonal=false;
-int final=false;
-int bothuncovered=false;
-
-
- DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum,
- "alignment", CMDENUMTYPE, &alignment, AlignEnum,
- "d", CMDENUMTYPE, &diagonal, BoolEnum,
- "diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
- "f", CMDENUMTYPE, &final, BoolEnum,
- "final", CMDENUMTYPE, &final, BoolEnum,
- "b", CMDENUMTYPE, &bothuncovered, BoolEnum,
- "both", CMDENUMTYPE, &bothuncovered, BoolEnum,
- "i", CMDSTRINGTYPE, &input,
- "o", CMDSTRINGTYPE, &output,
- "v", CMDENUMTYPE, &verbose, BoolEnum,
- "verbose", CMDENUMTYPE, &verbose, BoolEnum,
-
- (char *)NULL);
-
- GetParams(&argc, &argv, (char*) NULL);
-
- if (alignment==0){
- cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
- << "Input file or std must be in .bal format (see script giza2bal.pl).\n";
-
- exit(1);
-
- }
-
- fstream inp(input,ios::in);
- fstream out(output,ios::out);
-
- if (!inp.is_open()){
- cerr << "cannot open " << input << "\n";
- exit(1);
- }
-
- if (!out.is_open()){
- cerr << "cannot open " << output << "\n";
- exit(1);
- }
-
-
- int a[MAX_M],b[MAX_N],m,n;
- fa=new int[MAX_M+1];
- ea=new int[MAX_N+1];
-
-
- A=new int *[MAX_N+1];
- for (int i=1;i<=MAX_N;i++) A[i]=new int[MAX_M+1];
-
- switch (alignment){
- case UNION:
- cerr << "symal: computing union alignment\n";
- while(getals(inp,m,a,n,b)) prunionalignment(out,m,a,n,b);
- break;
- case INTERSECT:
- cerr << "symal: computing intersect alignment\n";
- while(getals(inp,m,a,n,b)) printersect(out,m,a,n,b);
- break;
- case GROW:
- cerr << "symal: computing grow alignment: diagonal ("
- << diagonal << ") final ("<< final << ")"
- << "both-uncovered (" << bothuncovered <<")\n";
-
- while(getals(inp,m,a,n,b))
- printgrow(out,m,a,n,b,diagonal,final,bothuncovered);
-
- break;
- default:
- exit(1);
- }
-
- delete [] fa; delete [] ea;
- for (int i=1;i<=MAX_N;i++) delete [] A[i];
- delete [] A;
-
- exit(0);
-}
diff --git a/scripts/training/train-factored-phrase-model.perl b/scripts/training/train-factored-phrase-model.perl
deleted file mode 100755
index ff49993e8..000000000
--- a/scripts/training/train-factored-phrase-model.perl
+++ /dev/null
@@ -1,1397 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-use Getopt::Long "GetOptions";
-
-# Train Factored Phrase Model
-# (c) 2006 Philipp Koehn
-# with contributions from other JHU WS participants
-# Train a phrase model from a parallel corpus
-
-# -----------------------------------------------------
-$ENV{"LC_ALL"} = "C";
-
-my($_ROOT_DIR,$_CORPUS_DIR,$_GIZA_E2F,$_GIZA_F2E,$_MODEL_DIR,$_CORPUS,$_FIRST_STEP,$_LAST_STEP,$_F,$_E,$_MAX_PHRASE_LENGTH,$_LEXICAL_DIR,$_NO_LEXICAL_WEIGHTING,$_VERBOSE,$_ALIGNMENT,@_LM,$_EXTRACT_FILE,$_GIZA_OPTION,$_HELP,$_PARTS,$_DIRECTION,$_ONLY_PRINT_GIZA,$_REORDERING,$_REORDERING_SMOOTH,$_ALIGNMENT_FACTORS,$_TRANSLATION_FACTORS,$_REORDERING_FACTORS,$_GENERATION_FACTORS,$_DECODING_STEPS,$_PARALLEL, $SCRIPTS_ROOTDIR);
-
-my $debug = 0; # debug this script, do not delete any files in debug mode
-
-$_HELP = 1
- unless &GetOptions('root-dir=s' => \$_ROOT_DIR,
- 'corpus-dir=s' => \$_CORPUS_DIR,
- 'corpus=s' => \$_CORPUS,
- 'f=s' => \$_F,
- 'e=s' => \$_E,
- 'giza-e2f=s' => \$_GIZA_E2F,
- 'giza-f2e=s' => \$_GIZA_F2E,
- 'max-phrase-length=i' => \$_MAX_PHRASE_LENGTH,
- 'lexical-dir=s' => \$_LEXICAL_DIR,
- 'no-lexical-weighting' => \$_NO_LEXICAL_WEIGHTING,
- 'model-dir=s' => \$_MODEL_DIR,
- 'extract-file=s' => \$_EXTRACT_FILE,
- 'alignment=s' => \$_ALIGNMENT,
- 'verbose' => \$_VERBOSE,
- 'first-step=i' => \$_FIRST_STEP,
- 'last-step=i' => \$_LAST_STEP,
- 'giza-option=s' => \$_GIZA_OPTION,
- 'parallel' => \$_PARALLEL,
- 'lm=s' => \@_LM,
- 'help' => \$_HELP,
- 'debug' => \$debug,
- 'parts=i' => \$_PARTS,
- 'direction=i' => \$_DIRECTION,
- 'only-print-giza' => \$_ONLY_PRINT_GIZA,
- 'reordering=s' => \$_REORDERING,
- 'reordering-smooth=s' => \$_REORDERING_SMOOTH,
- 'alignment-factors=s' => \$_ALIGNMENT_FACTORS,
- 'translation-factors=s' => \$_TRANSLATION_FACTORS,
- 'reordering-factors=s' => \$_REORDERING_FACTORS,
- 'generation-factors=s' => \$_GENERATION_FACTORS,
- 'decoding-steps=s' => \$_DECODING_STEPS,
- 'scripts-root-dir=s' => \$SCRIPTS_ROOTDIR,
- );
-
-if ($_HELP) {
- print "Train Phrase Model
-
-Steps: (--first-step to --last-step)
-(1) prepare corpus
-(2) run GIZA
-(3) align words
-(4) learn lexical translation
-(5) extract phrases
-(6) score phrases
-(7) learn reordering model
-(8) learn generation model
-(9) create decoder config file
-
-For more, please check manual or contact koehn\@inf.ed.ac.uk\n";
- exit(1);
-}
-
-if (!defined $SCRIPTS_ROOTDIR) {
- $SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"};
- die "Please set SCRIPTS_ROOTDIR or specify --scripts-root-dir" if !defined $SCRIPTS_ROOTDIR;
-}
-print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";
-
-
-# these variables may have to be adapted to local paths
-my $BINDIR = "/export/ws06osmt/bin";
-my $GIZA = "$BINDIR/GIZA++";
-my $SNT2COOC = "$BINDIR/snt2cooc.out";
-my $PHRASE_EXTRACT = "$SCRIPTS_ROOTDIR/training/phrase-extract/extract";
-my $SYMAL = "$SCRIPTS_ROOTDIR/training/symal/symal";
-my $GIZA2BAL = "$SCRIPTS_ROOTDIR/training/symal/giza2bal.pl";
-my $PHRASE_SCORE = "$SCRIPTS_ROOTDIR/training/phrase-extract/score";
-my $MKCLS = "$BINDIR/mkcls";
-my $ZCAT = "zcat";
-my $BZCAT = "bzcat";
-
-
-# set varibles to defaults or from options
-my $___ROOT_DIR = ".";
-$___ROOT_DIR = $_ROOT_DIR if $_ROOT_DIR;
-my $___CORPUS_DIR = $___ROOT_DIR."/corpus";
-$___CORPUS_DIR = $_CORPUS_DIR if $_CORPUS_DIR;
-die("use --corpus to specify corpus") unless $_CORPUS || ($_FIRST_STEP && $_FIRST_STEP>1);
-my $___CORPUS = $_CORPUS;
-
-# foreign/English language extension
-die("use --f to specify foreign language") unless $_F;
-die("use --e to specify English language") unless $_E;
-my $___F = $_F;
-my $___E = $_E;
-
-# vocabulary files in corpus dir
-my $___VCB_E = $___CORPUS_DIR."/".$___E.".vcb";
-my $___VCB_F = $___CORPUS_DIR."/".$___F.".vcb";
-
-# GIZA generated files
-my $___GIZA = $___ROOT_DIR."/giza";
-my $___GIZA_E2F = $___GIZA.".".$___E."-".$___F;
-my $___GIZA_F2E = $___GIZA.".".$___F."-".$___E;
-$___GIZA_E2F = $_GIZA_E2F if $_GIZA_E2F;
-$___GIZA_F2E = $_GIZA_F2E if $_GIZA_F2E;
-my $___GIZA_OPTION = "";
-$___GIZA_OPTION = $_GIZA_OPTION if $_GIZA_OPTION;
-
-# alignment heuristic
-my $___ALIGNMENT = "grow-diag-final";
-$___ALIGNMENT = $_ALIGNMENT if $_ALIGNMENT;
-my $___NOTE_ALIGNMENT_DROPS = 1;
-
-# model dir and extract file
-my $___MODEL_DIR = $___ROOT_DIR."/model";
-$___MODEL_DIR = $_MODEL_DIR if $_MODEL_DIR;
-my $___EXTRACT_FILE = $___MODEL_DIR."/extract";
-$___EXTRACT_FILE = $_EXTRACT_FILE if $_EXTRACT_FILE;
-
-
-my $___MAX_PHRASE_LENGTH = 7;
-my $___LEXICAL_WEIGHTING = 1;
-my $___LEXICAL_DIR = $___MODEL_DIR;
-$___MAX_PHRASE_LENGTH = $_MAX_PHRASE_LENGTH if $_MAX_PHRASE_LENGTH;
-$___LEXICAL_WEIGHTING = 0 if $_NO_LEXICAL_WEIGHTING;
-$___LEXICAL_DIR = $_LEXICAL_DIR if $_LEXICAL_DIR;
-
-my $___VERBOSE = 0;
-my $___FIRST_STEP = 1;
-my $___LAST_STEP = 9;
-$___VERBOSE = $_VERBOSE if $_VERBOSE;
-$___FIRST_STEP = $_FIRST_STEP if $_FIRST_STEP;
-$___LAST_STEP = $_LAST_STEP if $_LAST_STEP;
-
-my @___LM = ();
-if ($___LAST_STEP == 9) {
- die "use --lm factor:order:filename to specify at least one language model"
- if scalar @_LM == 0;
- foreach my $lm (@_LM) {
- my ($f, $order, $filename) = split /:/, $lm, 3;
- die "Wrong format of --lm. Expected: --lm factor:order:filename"
- if $f !~ /^[0-9]+$/ || $order !~ /^[0-9]+$/ || !defined $filename;
- die "Language model file not found or empty: $filename"
- if ! -s $filename;
- push @___LM, [ $f, $order, $filename ];
- }
-}
-
-my $___PARTS = 1;
-$___PARTS = $_PARTS if $_PARTS;
-
-my $___DIRECTION = 0;
-$___DIRECTION = $_DIRECTION if $_DIRECTION;
-
-# don't fork
-my $___NOFORK = !defined $_PARALLEL;
-
-my $___ONLY_PRINT_GIZA = 0;
-$___ONLY_PRINT_GIZA = 1 if $_ONLY_PRINT_GIZA;
-
-# Reordering model (esp. lexicalized)
-my $___REORDERING = "distance";
-$___REORDERING = $_REORDERING if $_REORDERING;
-my $___REORDERING_SMOOTH = 0.5;
-$___REORDERING_SMOOTH = $_REORDERING_SMOOTH if $_REORDERING_SMOOTH;
-my %REORDERING_MODEL;
-my $REORDERING_LEXICAL = 0; # flag for building lexicalized reordering models
-foreach my $r (split(/,/,$___REORDERING)) {
- if (!( $r eq "orientation-f" ||
- $r eq "orientation-fe" ||
- $r eq "orientation-bidirectional-f" ||
- $r eq "orientation-bidirectional-fe" ||
- $r eq "monotonicity-f" ||
- $r eq "monotonicity-fe" ||
- $r eq "monotonicity-bidirectional-f" ||
- $r eq "monotonicity-bidirectional-fe" ||
- $r eq "distance")) {
- print STDERR "unknwown reordering type: $r";
- exit(1);
- }
- if ($r ne "distance") { $REORDERING_LEXICAL = 1; }
- $REORDERING_MODEL{$r}++;
- if ($r =~ /-f$/) { $REORDERING_MODEL{"f"}++; }
- if ($r =~ /-fe$/) { $REORDERING_MODEL{"fe"}++; }
-}
-my ($mono_previous_f,$swap_previous_f,$other_previous_f);
-my ($mono_previous_fe,$swap_previous_fe,$other_previous_fe);
-my ($mono_following_f,$swap_following_f,$other_following_f);
-my ($mono_following_fe,$swap_following_fe,$other_following_fe);
-my ($f_current,$e_current);
-
-### Factored translation models
-my $___ALIGNMENT_FACTORS = "0-0";
-$___ALIGNMENT_FACTORS = $_ALIGNMENT_FACTORS if defined($_ALIGNMENT_FACTORS);
-die("format for alignment factors is \"0-0\" or \"0,1,2-0,1\", you provided $___ALIGNMENT_FACTORS\n") if $___ALIGNMENT_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*$/;
-
-my $___TRANSLATION_FACTORS = undef;
-$___TRANSLATION_FACTORS = $_TRANSLATION_FACTORS if defined($_TRANSLATION_FACTORS);
-die("format for translation factors is \"0-0\" or \"0-0+1-1\" or \"0-0+0,1-0,1\", you provided $___TRANSLATION_FACTORS\n")
- if defined $___TRANSLATION_FACTORS && $___TRANSLATION_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*(\+\d+(\,\d+)*\-\d+(\,\d+)*)*$/;
-
-my $___REORDERING_FACTORS = undef;
-$___REORDERING_FACTORS = $_REORDERING_FACTORS if defined($_REORDERING_FACTORS);
-die("format for reordering factors is \"0-0\" or \"0-0+1-1\" or \"0-0+0,1-0,1\", you provided $___REORDERING_FACTORS\n")
- if defined $___REORDERING_FACTORS && $___REORDERING_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*(\+\d+(\,\d+)*\-\d+(\,\d+)*)*$/;
-
-my $___GENERATION_FACTORS = undef;
-$___GENERATION_FACTORS = $_GENERATION_FACTORS if defined($_GENERATION_FACTORS);
-die("format for generation factors is \"0-1\" or \"0-1+0-2\" or \"0-1+0,1-1,2\", you provided $___GENERATION_FACTORS\n")
- if defined $___GENERATION_FACTORS && $___GENERATION_FACTORS !~ /^\d+(\,\d+)*\-\d+(\,\d+)*(\+\d+(\,\d+)*\-\d+(\,\d+)*)*$/;
-
-my $___DECODING_STEPS = $_DECODING_STEPS;
-die("use --decoding-steps to specify decoding steps") if ( !defined $_DECODING_STEPS && $___LAST_STEP>=9 && $___FIRST_STEP<=9);
-die("format for decoding steps is \"t0,g0,t1,g1\", you provided $___DECODING_STEPS\n")
- if defined $___DECODING_STEPS && $___DECODING_STEPS !~ /^[tg]\d+(,[tg]\d+)*$/;
-
-my ($factor,$factor_e,$factor_f);
-
-my $alignment_id;
-
-### MAIN
-
-&prepare() if $___FIRST_STEP==1;
-&run_giza() if $___FIRST_STEP<=2 && $___LAST_STEP>=2;
-&word_align() if $___FIRST_STEP<=3 && $___LAST_STEP>=3;
-&get_lexical_factored() if $___FIRST_STEP<=4 && $___LAST_STEP>=4;
-&extract_phrase_factored() if $___FIRST_STEP<=5 && $___LAST_STEP>=5;
-&score_phrase_factored() if $___FIRST_STEP<=6 && $___LAST_STEP>=6;
-&get_reordering_factored() if $___FIRST_STEP<=7 && $___LAST_STEP>=7;
-&get_generation_factored() if $___FIRST_STEP<=8 && $___LAST_STEP>=8;
-&create_ini() if $___LAST_STEP==9;
-
-### (1) PREPARE CORPUS
-
-sub prepare {
- print STDERR "(1) preparing corpus @ ".`date`;
- safesystem("mkdir -p $___CORPUS_DIR") or die;
-
- print STDERR "(1.0) selecting factors @ ".`date`;
- my ($factor_f,$factor_e) = split(/\-/,$___ALIGNMENT_FACTORS);
- my $corpus = $___CORPUS.".".$___ALIGNMENT_FACTORS;
- if ($___NOFORK) {
- &reduce_factors($___CORPUS.".".$___F,$corpus.".".$___F,$factor_f);
- &reduce_factors($___CORPUS.".".$___E,$corpus.".".$___E,$factor_e);
-
- &make_classes($corpus.".".$___F,$___VCB_F.".classes");
- &make_classes($corpus.".".$___E,$___VCB_E.".classes");
-
- my $VCB_F = &get_vocabulary($corpus.".".$___F,$___VCB_F);
- my $VCB_E = &get_vocabulary($corpus.".".$___E,$___VCB_E);
-
- &numberize_txt_file($VCB_F,$corpus.".".$___F,
- $VCB_E,$corpus.".".$___E,
- $___CORPUS_DIR."/$___F-$___E-int-train.snt");
-
- &numberize_txt_file($VCB_E,$corpus.".".$___E,
- $VCB_F,$corpus.".".$___F,
- $___CORPUS_DIR."/$___E-$___F-int-train.snt");
- } else {
- print "Forking...\n";
- my $pid = fork();
- die "couldn't fork" unless defined $pid;
- if (!$pid) {
- &reduce_factors($___CORPUS.".".$___F,$corpus.".".$___F,$factor_f);
- exit 0;
- } else {
- &reduce_factors($___CORPUS.".".$___E,$corpus.".".$___E,$factor_e);
- }
- waitpid($pid, 0);
- my $pid2 = 0;
- $pid = fork();
- die "couldn't fork" unless defined $pid;
- if (!$pid) {
- &make_classes($corpus.".".$___F,$___VCB_F.".classes");
- exit 0;
- } # parent
- $pid2 = fork();
- die "couldn't fork again" unless defined $pid2;
- if (!$pid2) { #child
- &make_classes($corpus.".".$___E,$___VCB_E.".classes");
- exit 0;
- }
-
- my $VCB_F = &get_vocabulary($corpus.".".$___F,$___VCB_F);
- my $VCB_E = &get_vocabulary($corpus.".".$___E,$___VCB_E);
-
- &numberize_txt_file($VCB_F,$corpus.".".$___F,
- $VCB_E,$corpus.".".$___E,
- $___CORPUS_DIR."/$___F-$___E-int-train.snt");
-
- &numberize_txt_file($VCB_E,$corpus.".".$___E,
- $VCB_F,$corpus.".".$___F,
- $___CORPUS_DIR."/$___E-$___F-int-train.snt");
- waitpid($pid2, 0);
- waitpid($pid, 0);
- }
-}
-
-sub reduce_factors {
- my ($full,$reduced,$factors) = @_;
- if (-e $reduced) {
- print STDERR "already $reduced in place, reusing\n";
- return;
- }
- # my %INCLUDE;
- # foreach my $factor (split(/,/,$factors)) {
- # $INCLUDE{$factor} = 1;
- # }
- my @INCLUDE = sort {$a <=> $b} split(/,/,$factors);
- open(IN,$full) or die "Can't read $full";
- open(OUT,">".$reduced) or die "Can't write $reduced";
- my $nr = 0;
- while(<IN>) {
- $nr++;
- print STDERR "." if $nr % 10000 == 0;
- print STDERR "($nr)" if $nr % 100000 == 0;
- chomp; s/ +/ /g; s/^ //; s/ $//;
- my $first = 1;
- foreach (split) {
- my @FACTOR = split(/\|/);
- print OUT " " unless $first;
- $first = 0;
- my $first_factor = 1;
- foreach my $outfactor (@INCLUDE) {
- print OUT "|" unless $first_factor;
- $first_factor = 0;
- my $out = $FACTOR[$outfactor];
- die "Couldn't find factor $outfactor in token \"$_\" in $full LINE $nr" if !defined $out;
- print OUT $out;
- }
- # for(my $factor=0;$factor<=$#FACTOR;$factor++) {
- # next unless defined($INCLUDE{$factor});
- # print OUT "|" unless $first_factor;
- # $first_factor = 0;
- # print OUT $FACTOR[$factor];
- # }
- }
- print OUT "\n";
- }
- print STDERR "\n";
- close(OUT);
- close(IN);
-}
-
-sub make_classes {
- my ($corpus,$classes) = @_;
- my $cmd = "$MKCLS -c50 -n2 -p$corpus -V$classes opt";
- print STDERR "(1.1) running mkcls @ ".`date`."$cmd\n";
- safesystem("$cmd"); # ignoring the wrong exit code from mkcls (not dying)
-}
-
-sub get_vocabulary {
- return unless $___LEXICAL_WEIGHTING;
- my($corpus,$vcb) = @_;
- print STDERR "(1.2) creating vcb file $vcb @ ".`date`;
-
- my %WORD;
- open(TXT,$corpus) or die "Can't read $corpus";
- while(<TXT>) {
- chop;
- foreach (split) { $WORD{$_}++; }
- }
- close(TXT);
-
- my @NUM;
- foreach my $word (keys %WORD) {
- my $vcb_with_number = sprintf("%07d %s",$WORD{$word},$word);
- push @NUM,$vcb_with_number;
- }
-
- my %VCB;
- open(VCB,">$vcb") or die "Can't write $vcb";
- print VCB "1\tUNK\t0\n";
- my $id=2;
- foreach (reverse sort @NUM) {
- my($count,$word) = split;
- printf VCB "%d\t%s\t%d\n",$id,$word,$count;
- $VCB{$word} = $id;
- $id++;
- }
- close(VCB);
-
- return \%VCB;
-}
-
-sub numberize_txt_file {
- my ($VCB_DE,$in_de,$VCB_EN,$in_en,$out) = @_;
- my %OUT;
- print STDERR "(1.3) numberizing corpus $out @ ".`date`;
- open(IN_DE,$in_de) or die "Can't read $in_de";
- open(IN_EN,$in_en) or die "Can't read $in_en";
- open(OUT,">$out") or die "Can't write $out";
- while(my $de = <IN_DE>) {
- my $en = <IN_EN>;
- print OUT "1\n";
- print OUT &numberize_line($VCB_EN,$en);
- print OUT &numberize_line($VCB_DE,$de);
- }
- close(IN_DE);
- close(IN_EN);
- close(OUT);
-}
-
-sub numberize_line {
- my ($VCB,$txt) = @_;
- chomp($txt);
- my $out = "";
- my $not_first = 0;
- foreach (split(/ /,$txt)) {
- next if $_ eq '';
- $out .= " " if $not_first++;
- print STDERR "Unknown word '$_'\n" unless defined($$VCB{$_});
- $out .= $$VCB{$_};
- }
- return $out."\n";
-}
-
-### (2) RUN GIZA
-
-sub run_giza {
- return &run_giza_on_parts if $___PARTS>1;
-
- print STDERR "(2) running giza @ ".`date`;
- if ($___DIRECTION == 1 || $___DIRECTION == 2 || $___NOFORK) {
- &run_single_giza($___GIZA_F2E,$___E,$___F,
- $___VCB_E,$___VCB_F,
- $___CORPUS_DIR."/$___F-$___E-int-train.snt")
- unless $___DIRECTION == 2;
- &run_single_giza($___GIZA_E2F,$___F,$___E,
- $___VCB_F,$___VCB_E,
- $___CORPUS_DIR."/$___E-$___F-int-train.snt")
- unless $___DIRECTION == 1;
- } else {
- my $pid = fork();
- if (!defined $pid) {
- die "Failed to fork";
- }
- if (!$pid) { # i'm the child
- &run_single_giza($___GIZA_F2E,$___E,$___F,
- $___VCB_E,$___VCB_F,
- $___CORPUS_DIR."/$___F-$___E-int-train.snt");
- exit 0; # child exits
- } else { #i'm the parent
- &run_single_giza($___GIZA_E2F,$___F,$___E,
- $___VCB_F,$___VCB_E,
- $___CORPUS_DIR."/$___E-$___F-int-train.snt");
- }
- printf "Waiting on second GIZA process...\n";
- waitpid($pid, 0);
- }
-}
-
-sub run_giza_on_parts {
- print STDERR "(2) running giza on $___PARTS cooc parts @ ".`date`;
- my $size = `cat $___CORPUS_DIR/$___F-$___E-int-train.snt | wc -l`;
- die "Failed to get number of lines in $___CORPUS_DIR/$___F-$___E-int-train.snt"
- if $size == 0;
-
- if ($___DIRECTION == 1 || $___DIRECTION == 2 || $___NOFORK) {
- &run_single_giza_on_parts($___GIZA_F2E,$___E,$___F,
- $___VCB_E,$___VCB_F,
- $___CORPUS_DIR."/$___F-$___E-int-train.snt",$size)
- unless $___DIRECTION == 2;
-
- &run_single_giza_on_parts($___GIZA_E2F,$___F,$___E,
- $___VCB_F,$___VCB_E,
- $___CORPUS_DIR."/$___E-$___F-int-train.snt",$size)
- unless $___DIRECTION == 1;
- } else {
- my $pid = fork();
- if (!defined $pid) {
- die "Failed to fork";
- }
- if (!$pid) { # i'm the child
- &run_single_giza_on_parts($___GIZA_F2E,$___E,$___F,
- $___VCB_E,$___VCB_F,
- $___CORPUS_DIR."/$___F-$___E-int-train.snt",$size);
- exit 0; # child exits
- } else { #i'm the parent
- &run_single_giza_on_parts($___GIZA_E2F,$___F,$___E,
- $___VCB_F,$___VCB_E,
- $___CORPUS_DIR."/$___E-$___F-int-train.snt",$size);
- }
- printf "Waiting on second GIZA process...\n";
- waitpid($pid, 0);
- }
-}
-
-sub run_single_giza_on_parts {
- my($dir,$e,$f,$vcb_e,$vcb_f,$train,$size) = @_;
-
- my $part = 0;
-
- # break up training data into parts
- open(SNT,$train) or die "Can't read $train";
- {
- my $i=0;
- while(<SNT>) {
- $i++;
- if ($i%3==1 && $part < ($___PARTS*$i)/$size && $part<$___PARTS) {
- close(PART) if $part;
- $part++;
- safesystem("mkdir -p $___CORPUS_DIR/part$part") or die;
- open(PART,">$___CORPUS_DIR/part$part/$f-$e-int-train.snt")
- or die "Can't write $___CORPUS_DIR/part$part/$f-$e-int-train.snt";
- }
- print PART $_;
- }
- }
- close(PART);
- close(SNT);
-
- # run snt2cooc in parts
- for(my $i=1;$i<=$___PARTS;$i++) {
- &run_single_snt2cooc("$dir/part$i",$e,$f,$vcb_e,$vcb_f,"$___CORPUS_DIR/part$i/$f-$e-int-train.snt");
- }
-
- # merge parts
- open(COOC,">$dir/$f-$e.cooc") or die "Can't write $dir/$f-$e.cooc";
- my(@PF,@CURRENT);
- for(my $i=1;$i<=$___PARTS;$i++) {
- open($PF[$i],"$dir/part$i/$f-$e.cooc")or die "Can't read $dir/part$i/$f-$e.cooc";
- my $pf = $PF[$i];
- $CURRENT[$i] = <$pf>;
- chop($CURRENT[$i]) if $CURRENT[$i];
- }
-
- while(1) {
- my ($min1,$min2) = (1e20,1e20);
- for(my $i=1;$i<=$___PARTS;$i++) {
- next unless $CURRENT[$i];
- my ($w1,$w2) = split(/ /,$CURRENT[$i]);
- if ($w1 < $min1 || ($w1 == $min1 && $w2 < $min2)) {
- $min1 = $w1;
- $min2 = $w2;
- }
- }
- last if $min1 == 1e20;
- print COOC "$min1 $min2\n";
- for(my $i=1;$i<=$___PARTS;$i++) {
- next unless $CURRENT[$i];
- my ($w1,$w2) = split(/ /,$CURRENT[$i]);
- if ($w1 == $min1 && $w2 == $min2) {
- my $pf = $PF[$i];
- $CURRENT[$i] = <$pf>;
- chop($CURRENT[$i]) if $CURRENT[$i];
- }
- }
- }
- for(my $i=1;$i<=$___PARTS;$i++) {
- close($PF[$i]);
- }
- close(COOC);
-
- # run giza
- &run_single_giza($dir,$e,$f,$vcb_e,$vcb_f,$train);
-}
-
-sub run_single_giza {
- my($dir,$e,$f,$vcb_e,$vcb_f,$train) = @_;
-
- my %GizaDefaultOptions =
- (p0 => .999 ,
- m1 => 5 ,
- m2 => 0 ,
- m3 => 3 ,
- m4 => 3 ,
- o => "giza" ,
- nodumps => 1 ,
- onlyaldumps => 1 ,
- nsmooth => 4 ,
- model1dumpfrequency => 1,
- model4smoothfactor => 0.4 ,
- t => $vcb_f,
- s => $vcb_e,
- c => $train,
- CoocurrenceFile => "$dir/$f-$e.cooc",
- o => "$dir/$f-$e");
-
- if ($___GIZA_OPTION) {
- foreach (split(/[ ,]+/,$___GIZA_OPTION)) {
- my ($option,$value) = split(/=/,$_,2);
- $GizaDefaultOptions{$option} = $value;
- }
- }
-
- my $GizaOptions;
- foreach my $option (sort keys %GizaDefaultOptions){
- my $value = $GizaDefaultOptions{$option} ;
- $GizaOptions .= " -$option $value" ;
- }
-
- &run_single_snt2cooc($dir,$e,$f,$vcb_e,$vcb_f,$train) if $___PARTS == 1;
-
- print STDERR "(2.1b) running giza $f-$e @ ".`date`."$GIZA $GizaOptions\n";
- print "$GIZA $GizaOptions\n";
- return if $___ONLY_PRINT_GIZA;
- safesystem("$GIZA $GizaOptions");
- die "Giza did not produce the output file $dir/$f-$e.A3.final. Is your corpus clean (reasonably-sized sentences)?"
- if ! -e "$dir/$f-$e.A3.final";
- safesystem("rm -f $dir/$f-$e.A3.final.gz") or die;
- safesystem("gzip $dir/$f-$e.A3.final") or die;
-}
-
-sub run_single_snt2cooc {
- my($dir,$e,$f,$vcb_e,$vcb_f,$train) = @_;
- print STDERR "(2.1a) running snt2cooc $f-$e @ ".`date`."\n";
- safesystem("mkdir -p $dir") or die;
- print "$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc\n";
- safesystem("$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc") or die;
-}
-
-### (3) CREATE WORD ALIGNMENT FROM GIZA ALIGNMENTS
-
-sub word_align {
-
- print STDERR "(3) generate word alignment @ ".`date`;
- my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);
- print STDERR "Combining forward and inverted alignment from files:\n";
- print STDERR " $___GIZA_F2E/$___F-$___E.A3.final.{bz2,gz}\n";
- print STDERR " $___GIZA_E2F/$___F-$___E.A3.final.{bz2,gz}\n";
-
- ### build arguments for giza2bal.pl
- my($__ALIGNMENT_CMD,$__ALIGNMENT_INV_CMD);
-
- if (-e "$___GIZA_F2E/$___F-$___E.A3.final.bz2"){
- $__ALIGNMENT_CMD="\"$BZCAT $___GIZA_F2E/$___F-$___E.A3.final.bz2\"";
- } elsif (-e "$___GIZA_F2E/$___F-$___E.A3.final.gz") {
- $__ALIGNMENT_CMD="\"$ZCAT $___GIZA_F2E/$___F-$___E.A3.final.gz\"";
- } else {
- die "Can't read $___GIZA_F2E/$___F-$___E.A3.final.{bz2,gz}\n";
- }
-
- if ( -e "$___GIZA_F2E/$___F-$___E.A3.final.bz2"){
- $__ALIGNMENT_INV_CMD="\"$BZCAT $___GIZA_E2F/$___E-$___F.A3.final.bz2\"";
- }elsif (-e "$___GIZA_F2E/$___F-$___E.A3.final.gz"){
- $__ALIGNMENT_INV_CMD="\"$ZCAT $___GIZA_E2F/$___E-$___F.A3.final.gz\"";
- }else{
- die "Can't read $___GIZA_E2F/$___F-$___E.A3.final.{bz2,gz}\n\n";
- }
-
- safesystem("mkdir -p $___MODEL_DIR") or die;
-
- #build argumens for symal
- my($__symal_a)="";
- $__symal_a="union" if $___ALIGNMENT eq 'union';
- $__symal_a="intersect" if $___ALIGNMENT=~ /intersect/;
- $__symal_a="grow" if $___ALIGNMENT=~ /grow/;
-
- my($__symal_d,$__symal_f,$__symal_b);
- ($__symal_d,$__symal_f,$__symal_b)=("no","no","no");
-
- $__symal_d="yes" if $___ALIGNMENT=~ /diag/;
- $__symal_f="yes" if $___ALIGNMENT=~ /final/;
- $__symal_b="yes" if $___ALIGNMENT=~ /final-and/;
-
- safesystem("$GIZA2BAL -d $__ALIGNMENT_INV_CMD -i $__ALIGNMENT_CMD |".
- "$SYMAL -alignment=\"$__symal_a\" -diagonal=\"$__symal_d\" ".
- "-final=\"$__symal_f\" -both=\"$__symal_b\" > ".
- "$___MODEL_DIR/aligned.$___ALIGNMENT")
- ||
- die "Can't generate symmetrized alignment file\n"
-
-}
-
-### (4) BUILDING LEXICAL TRANSLATION TABLE
-
-sub get_lexical_factored {
- print STDERR "(4) generate lexical translation table $___TRANSLATION_FACTORS @ ".`date`;
- foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {
- $factor = $f;
- ($factor_f,$factor_e) = split(/\-/,$factor);
- &reduce_factors($___CORPUS.".".$___F,
- $___MODEL_DIR."/aligned.".$factor_f.".".$___F,
- $factor_f);
- &reduce_factors($___CORPUS.".".$___E,
- $___MODEL_DIR."/aligned.".$factor_e.".".$___E,
- $factor_e);
- &get_lexical();
- }
-}
-
-sub get_lexical {
- print STDERR "(4) [$factor] generate lexical translation table @ ".`date`;
- my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);
-
- &open_alignment();
- while(my $e = <E>) {
- if (($alignment_id++ % 1000) == 0) { print STDERR "!"; }
- chomp($e);
- my @ENGLISH = split(/ /,$e);
- my $f = <F>; chomp($f);
- my @FOREIGN = split(/ /,$f);
- my $a = <A>; chomp($a);
-
- my (%FOREIGN_ALIGNED,%ENGLISH_ALIGNED);
- foreach (split(/ /,$a)) {
- my ($fi,$ei) = split(/\-/);
- if ($fi >= scalar(@FOREIGN) || $ei >= scalar(@ENGLISH)) {
- print STDERR "alignment point ($fi,$ei) out of range (0-$#FOREIGN,0-$#ENGLISH) in line $alignment_id, ignoring\n";
- }
- else {
- # local counts
- $FOREIGN_ALIGNED{$fi}++;
- $ENGLISH_ALIGNED{$ei}++;
-
- # global counts
- $WORD_TRANSLATION{$FOREIGN[$fi]}{$ENGLISH[$ei]}++;
- $TOTAL_FOREIGN{$FOREIGN[$fi]}++;
- $TOTAL_ENGLISH{$ENGLISH[$ei]}++;
- }
- }
-
- # unaligned words
- for(my $ei=0;$ei<scalar(@ENGLISH);$ei++) {
- next if defined($ENGLISH_ALIGNED{$ei});
- $WORD_TRANSLATION{"NULL"}{$ENGLISH[$ei]}++;
- $TOTAL_ENGLISH{$ENGLISH[$ei]}++;
- $TOTAL_FOREIGN{"NULL"}++;
- }
- for(my $fi=0;$fi<scalar(@FOREIGN);$fi++) {
- next if defined($FOREIGN_ALIGNED{$fi});
- $WORD_TRANSLATION{$FOREIGN[$fi]}{"NULL"}++;
- $TOTAL_FOREIGN{$FOREIGN[$fi]}++;
- $TOTAL_ENGLISH{"NULL"}++;
- }
- }
- &close_alignment();
- &save_word_translation(\%WORD_TRANSLATION,\%TOTAL_FOREIGN,\%TOTAL_ENGLISH);
-}
-
-sub open_alignment {
- open(E,"$___MODEL_DIR/aligned.$factor_e.$___E")
- or die "Can't read $___MODEL_DIR/aligned.$factor_e.$___E";
- open(F,"$___MODEL_DIR/aligned.$factor_f.$___F")
- or die "Can't read $___MODEL_DIR/aligned.$factor_f.$___F";
- open(A,"$___MODEL_DIR/aligned.$___ALIGNMENT")
- or die "Can't read $___MODEL_DIR/aligned.$___ALIGNMENT";
- $alignment_id=0;
-}
-
-sub close_alignment {
- print STDERR "\n";
- close(A);
- close(F);
- close(E);
-}
-
-sub save_word_translation {
- my ($WORD_TRANSLATION,$TOTAL_FOREIGN,$TOTAL_ENGLISH) = @_;
- safesystem("mkdir -p $___LEXICAL_DIR") or die;
- open(F2E,">$___LEXICAL_DIR/lex.$factor.f2n")
- or die "Can't write $___LEXICAL_DIR/lex.$factor.f2n";
- open(E2F,">$___LEXICAL_DIR/lex.$factor.n2f")
- or die "Can't write $___LEXICAL_DIR/lex.$factor.n2f";
- foreach my $f (keys %{$WORD_TRANSLATION}) {
- foreach my $e (keys %{$$WORD_TRANSLATION{$f}}) {
- printf F2E "%s %s %.7f\n",$e,$f,$$WORD_TRANSLATION{$f}{$e}/$$TOTAL_FOREIGN{$f};
- printf E2F "%s %s %.7f\n",$f,$e,$$WORD_TRANSLATION{$f}{$e}/$$TOTAL_ENGLISH{$e};
- }
- }
- close(E2F);
- close(F2E);
- print STDERR "Saved: $___LEXICAL_DIR/lex.$factor.f2n and $___LEXICAL_DIR/lex.$factor.n2f\n";
-}
-
-### (5) PHRASE EXTRACTION
-
-sub extract_phrase_factored {
- print STDERR "(5) extract phrases @ ".`date`;
- my %generated;
- foreach my $f (split(/\+/,"$___TRANSLATION_FACTORS"
- .($REORDERING_LEXICAL ? "+$___REORDERING_FACTORS" : ""))) {
- # we extract phrases for all translation steps and also for reordering factors (if lexicalized reordering is used)
- next if $generated{$f};
- $generated{$f} = 1;
- $factor = $f;
- ($factor_f,$factor_e) = split(/\-/,$factor);
- &extract_phrase();
- }
-}
-
-sub extract_phrase {
- print STDERR "(5) [$factor] extract phrases @ ".`date`;
- my $cmd = "$PHRASE_EXTRACT $___MODEL_DIR/aligned.$factor_e.$___E $___MODEL_DIR/aligned.$factor_f.$___F $___MODEL_DIR/aligned.$___ALIGNMENT $___EXTRACT_FILE.$factor $___MAX_PHRASE_LENGTH orientation";
- print STDERR "$cmd\n";
- safesystem("$cmd") or die "Phrase extraction failed (missing input files?)";
- safesystem("cat $___EXTRACT_FILE.$factor.o.part* > $___EXTRACT_FILE.$factor.o") or die;
- safesystem("rm -f $___EXTRACT_FILE.$factor.o.gz") or die;
- safesystem("gzip $___EXTRACT_FILE.$factor.o") or die;
- if (! $debug) { safesystem("rm -f $___EXTRACT_FILE.$factor.o.part*") or die;}
- safesystem("cat $___EXTRACT_FILE.$factor.part* > $___EXTRACT_FILE.$factor") or die;
- if (! $debug) { safesystem("rm -f $___EXTRACT_FILE.$factor.part*") or die;}
- safesystem("cat $___EXTRACT_FILE.$factor.inv.part* > $___EXTRACT_FILE.$factor.inv") or die;
- if (! $debug) { safesystem("rm -f $___EXTRACT_FILE.$factor.inv.part*") or die;}
-}
-
-### (6) PHRASE SCORING
-
-sub score_phrase_factored {
- print STDERR "(6) score phrases @ ".`date`;
- foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {
- $factor = $f;
- ($factor_f,$factor_e) = split(/\-/,$factor);
- &score_phrase();
- }
-}
-
-sub score_phrase {
- print STDERR "(6) [$factor] score phrases @ ".`date`;
- if (-e "$___EXTRACT_FILE.$factor.gz") {
- safesystem("gunzip < $___EXTRACT_FILE.$factor.gz > $___EXTRACT_FILE.$factor") or die;
- }
- if (-e "$___EXTRACT_FILE.$factor.inv.gz") {
- safesystem("gunzip < $___EXTRACT_FILE.$factor.inv.gz > $___EXTRACT_FILE.$factor.inv") or die;
- }
- print STDERR "(6.1) [$factor] sorting @ ".`date`;
- # print "LC_ALL=C sort -T $___MODEL_DIR $___EXTRACT_FILE.$factor > $___EXTRACT_FILE.$factor.sorted\n";
- safesystem("LC_ALL=C sort -T $___MODEL_DIR $___EXTRACT_FILE.$factor > $___EXTRACT_FILE.$factor.sorted") or die;
- safesystem("rm -f $___EXTRACT_FILE.$factor.gz") or die;
- safesystem("gzip $___EXTRACT_FILE.$factor") or die;
- print STDERR "(6.2) [$factor] sorting inv @ ".`date`;
- # print "LC_ALL=C sort -T $___MODEL_DIR $___EXTRACT_FILE.$factor.inv > $___EXTRACT_FILE.$factor.inv.sorted\n";
- safesystem("LC_ALL=C sort -T $___MODEL_DIR $___EXTRACT_FILE.$factor.inv > $___EXTRACT_FILE.$factor.inv.sorted") or die;
- safesystem("rm -f $___EXTRACT_FILE.$factor.inv.gz") or die;
- safesystem("gzip $___EXTRACT_FILE.$factor.inv") or die;
-
- for my $direction ("f2n","n2f") {
- print STDERR "(6.3) [$factor] creating table half $___MODEL_DIR/phrase-table-half.$factor.$direction @ ".`date`;
- my $extract = "$___EXTRACT_FILE.$factor.sorted";
- $extract = "$___EXTRACT_FILE.$factor.inv.sorted" if $direction eq "n2f";
- my $inverse = "";
- $inverse = " inverse" if $direction eq "n2f";
- my $part_count = &split_extract($extract);
- for(my $i=0;$i<$part_count;$i++) {
- my $part = sprintf("%04d",$i);
- print "$PHRASE_SCORE $extract.part$part $___LEXICAL_DIR/lex.$factor.$direction $___MODEL_DIR/phrase-table-half.$factor.$direction.part$part $inverse\n";
- safesystem("$PHRASE_SCORE $extract.part$part $___LEXICAL_DIR/lex.$factor.$direction $___MODEL_DIR/phrase-table-half.$factor.$direction.part$part $inverse")
- or die "Scoring of phrases failed";
- if (! $debug) { safesystem("rm $extract.part$part") or die;}
- }
- safesystem("cat $___MODEL_DIR/phrase-table-half.$factor.$direction.part* >$___MODEL_DIR/phrase-table-half.$factor.$direction") or die;
- }
- print STDERR "(6.4) [$factor] sorting inverse n2f table@ ".`date`;
- print "LC_ALL=C sort -T $___MODEL_DIR $___MODEL_DIR/phrase-table-half.$factor.n2f > $___MODEL_DIR/phrase-table-half.$factor.n2f inverse.sorted\n";
- safesystem("LC_ALL=C sort -T $___MODEL_DIR $___MODEL_DIR/phrase-table-half.$factor.n2f > $___MODEL_DIR/phrase-table-half.$factor.n2f.sorted") or die;
- print STDERR "(6.5) [$factor] consolidating the two halves @ ".`date`;
- open(F2N,"$___MODEL_DIR/phrase-table-half.$factor.f2n")
- or die "Can't read $___MODEL_DIR/phrase-table-half.$factor.f2n";
- open(N2F,"$___MODEL_DIR/phrase-table-half.$factor.n2f.sorted")
- or die "Can't read $___MODEL_DIR/phrase-table-half.$factor.n2f.sorted";
- open(TABLE,">$___MODEL_DIR/phrase-table.$factor")
- or die "Can't write $___MODEL_DIR/phrase-table.$factor";
- my $i=0;
- my $mismatch = 0;
- while(my $f2n = <F2N>) {
- $i++;
- my $n2f = <N2F>;
- my ($english,$foreign,$p) = split(/ \|\|\| /,$n2f); chop($p);
- my ($english2,$foreign2,$p2) = split(/ \|\|\| /,$f2n); chop($p2);
- if ($english ne $english2 || $foreign ne $foreign2) {
- print STDERR "mismatch line $i: ($english ne $english2 || $foreign ne $foreign2)\n";
- $mismatch++;
- last if $mismatch > 10;
- next;
- }
- print TABLE "$english ||| $foreign ||| $p $p2 2.718\n";
- }
- close(N2F);
- close(F2N);
- die "There were mismatches! (printed only first 10)" if $mismatch;
- if (! $debug) { safesystem("rm -f $___MODEL_DIR/phrase-table-half.$factor.*") or die;}
- if (! $debug) { safesystem("rm -f $___MODEL_DIR/extract*sorted*") or die;}
- safesystem("rm -f $___MODEL_DIR/phrase-table.$factor.gz") or die;
- safesystem("gzip $___MODEL_DIR/phrase-table.$factor") or die;
-}
-
-sub split_extract {
- my ($file) = @_;
- my $i=0;
- my $part = 1;
- my $split_when_possible = 0;
- my ($first,$dummy);
- my $partfname = sprintf("%s.part%04d",$file,0);
- open(PART,">$partfname") or die "Can't write $partfname";
- open(EXTRACT,$file) or die "Can't read $file";
- while(<EXTRACT>) {
- if ($i>0 && $i % 10000000 == 0) {
- $split_when_possible = 1;
- ($first,$dummy) = split(/ \|\|\| /);
- }
- elsif ($split_when_possible) {
- my ($f,$dummy) = split(/ \|\|\| /);
- if ($f ne $first) {
- close(PART) if $i;
- my $partfname = sprintf("%s.part%04d",$file,$part);
- open(PART,">$partfname") or die "Can't write $partfname";
- $split_when_possible = 0;
- $part++;
- }
- }
- print PART $_;
- $i++;
- }
- close(EXTRACT);
- return $part;
-}
-
-### (7) LEARN REORDERING MODEL
-
-sub get_reordering_factored {
- print STDERR "(7) learn reordering model @ ".`date`;
- if ($REORDERING_LEXICAL) {
- foreach my $f (split(/\+/,$___REORDERING_FACTORS)) {
- $factor = $f;
- ($factor_f,$factor_e) = split(/\-/,$factor);
- &get_reordering();
- }
- } else {
- print STDERR " ... skipping this step, reordering is not lexicalized ...\n";
- }
-}
-
-sub get_reordering {
- print STDERR "(7) [$factor] learn reordering model @ ".`date`;
- print STDERR "(7.1) [$factor] sorting extract.o @ ".`date`;
- if (-e "$___EXTRACT_FILE.$factor.o.gz") {
- safesystem("gunzip $___EXTRACT_FILE.$factor.o.gz") or die;
- }
- # print "LC_ALL=C sort -T $___MODEL_DIR $___EXTRACT_FILE.$factor.o > $___EXTRACT_FILE.$factor.o.sorted\n";
- safesystem("LC_ALL=C sort -T $___MODEL_DIR $___EXTRACT_FILE.$factor.o > $___EXTRACT_FILE.$factor.o.sorted") or die;
- safesystem("rm -f $___EXTRACT_FILE.$factor.o.gz") or die;
- safesystem("gzip $___EXTRACT_FILE.$factor.o") or die;
-
- my $smooth = $___REORDERING_SMOOTH;
- my @REORDERING_SMOOTH_PREVIOUS = ($smooth,$smooth,$smooth);
- my @REORDERING_SMOOTH_FOLLOWING = ($smooth,$smooth,$smooth);
-
- my (%SMOOTH_PREVIOUS,%SMOOTH_FOLLOWING);
- if ($smooth =~ /(.+)u$/) {
- $smooth = $1;
- my $smooth_total = 0;
- open(O,"$___EXTRACT_FILE.$factor.o.sorted")
- or die "Can't read $___EXTRACT_FILE.$factor.o.sorted";
- while(<O>) {
- chomp;
- my ($f,$e,$o) = split(/ \|\|\| /);
- my ($o_previous,$o_following) = split(/ /,$o);
- $SMOOTH_PREVIOUS{$o_previous}++;
- $SMOOTH_FOLLOWING{$o_following}++;
- $smooth_total++;
- }
- close(O);
- @REORDERING_SMOOTH_PREVIOUS = ($smooth*($SMOOTH_PREVIOUS{"mono"}+0.1)/$smooth_total,
- $smooth*($SMOOTH_PREVIOUS{"swap"}+0.1)/$smooth_total,
- $smooth*($SMOOTH_PREVIOUS{"other"}+0.1)/$smooth_total);
- @REORDERING_SMOOTH_FOLLOWING = ($smooth*($SMOOTH_FOLLOWING{"mono"}+0.1)/$smooth_total,
- $smooth*($SMOOTH_FOLLOWING{"swap"}+0.1)/$smooth_total,
- $smooth*($SMOOTH_FOLLOWING{"other"}+0.1)/$smooth_total);
- printf "$smooth*($SMOOTH_FOLLOWING{mono}+0.1)/$smooth_total,
- $smooth*($SMOOTH_FOLLOWING{swap}+0.1)/$smooth_total,
- $smooth*($SMOOTH_FOLLOWING{other}+0.1)/$smooth_total\n";
- printf "smoothed following to %f,%f,%f\n",@REORDERING_SMOOTH_FOLLOWING;
- }
-
- ($mono_previous_f,$swap_previous_f,$other_previous_f) = @REORDERING_SMOOTH_PREVIOUS;
- ($mono_previous_fe,$swap_previous_fe,$other_previous_fe) = @REORDERING_SMOOTH_PREVIOUS;
- ($mono_following_f,$swap_following_f,$other_following_f) = @REORDERING_SMOOTH_FOLLOWING;
- ($mono_following_fe,$swap_following_fe,$other_following_fe) = @REORDERING_SMOOTH_FOLLOWING;
-
- print STDERR "(7.2) building tables @ ".`date`;
- open(O,"$___EXTRACT_FILE.$factor.o.sorted")
- or die "Can't read $___EXTRACT_FILE.$factor.o.sorted";
- open(OF, "|gzip >$___MODEL_DIR/orientation-table.$factor.f.$___REORDERING_SMOOTH.gz")
- if defined($REORDERING_MODEL{"orientation-f"});
- open(OFE, "|gzip >$___MODEL_DIR/orientation-table.$factor.fe.$___REORDERING_SMOOTH.gz")
- if defined($REORDERING_MODEL{"orientation-fe"});
- open(OBF, "|gzip >$___MODEL_DIR/orientation-table.$factor.bi.f.$___REORDERING_SMOOTH.gz")
- if defined($REORDERING_MODEL{"orientation-bidirectional-f"});
- open(OBFE,"|gzip >$___MODEL_DIR/orientation-table.$factor.bi.fe.$___REORDERING_SMOOTH.gz")
- if defined($REORDERING_MODEL{"orientation-bidirectional-fe"});
- open(MF, "|gzip >$___MODEL_DIR/monotonicity-table.$factor.f.$___REORDERING_SMOOTH.gz")
- if defined($REORDERING_MODEL{"monotonicity-f"});
- open(MFE, "|gzip >$___MODEL_DIR/monotonicity-table.$factor.fe.$___REORDERING_SMOOTH.gz")
- if defined($REORDERING_MODEL{"monotonicity-fe"});
- open(MBF, "|gzip >$___MODEL_DIR/monotonicity-table.$factor.bi.f.$___REORDERING_SMOOTH.gz")
- if defined($REORDERING_MODEL{"monotonicity-bidirectional-f"});
- open(MBFE,"|gzip >$___MODEL_DIR/monotonicity-table.$factor.bi.fe.$___REORDERING_SMOOTH.gz")
- if defined($REORDERING_MODEL{"monotonicity-bidirectional-fe"});
-
- my $first = 1;
- while(<O>) {
- chomp;
- my ($f,$e,$o) = split(/ \|\|\| /);
- my ($o_previous,$o_following) = split(/ /,$o);
-
- # store counts if new f,e
- if ($first) {
- $f_current = $f;
- $e_current = $e;
- $first = 0;
- }
- elsif ($f ne $f_current || $e ne $e_current) {
-
- if (defined($REORDERING_MODEL{"fe"})) {
- # compute probs, store them
- &store_reordering_fe();
-
- # reset counters
- ($mono_previous_fe,$swap_previous_fe,$other_previous_fe) = @REORDERING_SMOOTH_PREVIOUS;
- ($mono_following_fe,$swap_following_fe,$other_following_fe) = @REORDERING_SMOOTH_FOLLOWING;
- }
-
- # store counts if new f
- if ($f ne $f_current && defined($REORDERING_MODEL{"f"})) {
-
- # compute probs, store them
- &store_reordering_f();
-
- # reset counters
- ($mono_previous_f,$swap_previous_f,$other_previous_f) = @REORDERING_SMOOTH_PREVIOUS;
- ($mono_following_f,$swap_following_f,$other_following_f) = @REORDERING_SMOOTH_FOLLOWING;
-
- }
- $f_current = $f;
- $e_current = $e;
- }
- # update counts
- if ($o_previous eq 'mono') { $mono_previous_f++; $mono_previous_fe++; }
- elsif ($o_previous eq 'swap') { $swap_previous_f++; $swap_previous_fe++; }
- elsif ($o_previous eq 'other'){ $other_previous_f++; $other_previous_fe++; }
- else { print STDERR "buggy line (o_previous:$o_previous): $_\n"; }
-
- if ($o_following eq 'mono') { $mono_following_f++; $mono_following_fe++; }
- elsif ($o_following eq 'swap') { $swap_following_f++; $swap_following_fe++; }
- elsif ($o_following eq 'other'){ $other_following_f++; $other_following_fe++; }
- else { print STDERR "buggy line (o_following:$o_following): $_\n"; }
-
- }
- if (defined($REORDERING_MODEL{"f"})) {
- &store_reordering_f();
- }
- if (defined($REORDERING_MODEL{"fe"})) {
- &store_reordering_fe();
- }
- if (! $debug) { safesystem("rm $___EXTRACT_FILE.$factor.o.sorted") or die;}
-}
-
-sub store_reordering_f {
- my $total_previous_f = $mono_previous_f+$swap_previous_f+$other_previous_f;
- my $total_following_f = $mono_following_f+$swap_following_f+$other_following_f;
- if(defined($REORDERING_MODEL{"orientation-f"})) {
- printf OF ("%s ||| %.5f %.5f %.5f\n",
- $f_current,
- $mono_previous_f/$total_previous_f,
- $swap_previous_f/$total_previous_f,
- $other_previous_f/$total_previous_f);
- }
- if(defined($REORDERING_MODEL{"orientation-bidirectional-f"})) {
- printf OBF ("%s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",
- $f_current,
- $mono_previous_f/$total_previous_f,
- $swap_previous_f/$total_previous_f,
- $other_previous_f/$total_previous_f,
- $mono_following_f/$total_following_f,
- $swap_following_f/$total_following_f,
- $other_following_f/$total_following_f);
- }
- if(defined($REORDERING_MODEL{"monotonicity-f"})) {
- printf MF ("%s ||| %.5f %.5f\n",
- $f_current,
- $mono_previous_f/$total_previous_f,
- ($swap_previous_f+$other_previous_f)/$total_previous_f);
- }
- if(defined($REORDERING_MODEL{"monotonicity-bidirectional-f"})) {
- printf MBF ("%s ||| %.5f %.5f %.5f %.5f\n",
- $f_current,
- $mono_previous_f/$total_previous_f,
- ($swap_previous_f+$other_previous_f)/$total_previous_f,
- $mono_following_f/$total_following_f,
- ($swap_following_f+$other_following_f)/$total_following_f);
- }
-}
-
-sub store_reordering_fe {
- my $total_previous_fe = $mono_previous_fe+$swap_previous_fe+$other_previous_fe;
- my $total_following_fe = $mono_following_fe+$swap_following_fe+$other_following_fe;
-
- if(defined($REORDERING_MODEL{"orientation-fe"})) {
- printf OFE ("%s ||| %s ||| %.5f %.5f %.5f\n",
- $f_current, $e_current,
- $mono_previous_fe/$total_previous_fe,
- $swap_previous_fe/$total_previous_fe,
- $other_previous_fe/$total_previous_fe);
- }
- if(defined($REORDERING_MODEL{"orientation-bidirectional-fe"})) {
- printf OBFE ("%s ||| %s ||| %.5f %.5f %.5f %.5f %.5f %.5f\n",
- $f_current, $e_current,
- $mono_previous_fe/$total_previous_fe,
- $swap_previous_fe/$total_previous_fe,
- $other_previous_fe/$total_previous_fe,
- $mono_following_fe/$total_following_fe,
- $swap_following_fe/$total_following_fe,
- $other_following_fe/$total_following_fe);
- }
- if(defined($REORDERING_MODEL{"monotonicity-fe"})) {
- printf MFE ("%s ||| %s ||| %.5f %.5f\n",
- $f_current, $e_current,
- $mono_previous_fe/$total_previous_fe,
- ($swap_previous_fe+$other_previous_fe)/$total_previous_fe);
- }
- if(defined($REORDERING_MODEL{"monotonicity-bidirectional-fe"})) {
- printf MBFE ("%s ||| %s ||| %.5f %.5f %.5f %.5f\n",
- $f_current, $e_current,
- $mono_previous_fe/$total_previous_fe,
- ($swap_previous_fe+$other_previous_fe)/$total_previous_fe,
- $mono_following_fe/$total_following_fe,
- ($swap_following_fe+$other_following_fe)/$total_following_fe);
- }
-}
-
-### (8) LEARN GENERATION MODEL
-
-my $factor_e_source;
-sub get_generation_factored {
- print STDERR "(8) learn generation model @ ".`date`;
- if (defined $___GENERATION_FACTORS) {
- foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {
- $factor = $f;
- ($factor_e_source,$factor_e) = split(/\-/,$factor);
- &get_generation();
- }
- } else {
- print STDERR " no generation model requested, skipping step\n";
- }
-}
-
-sub get_generation {
- print STDERR "(8) [$factor] generate generation table @ ".`date`;
- my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);
-
- my %INCLUDE_SOURCE;
- foreach my $factor (split(/,/,$factor_e_source)) {
-
- $INCLUDE_SOURCE{$factor} = 1;
- }
- my %INCLUDE;
- foreach my $factor (split(/,/,$factor_e)) {
- $INCLUDE{$factor} = 1;
- }
-
- my (%GENERATION,%GENERATION_TOTAL_SOURCE,%GENERATION_TOTAL_TARGET);
- open(E,$___CORPUS.".".$___E) or die "Can't read ".$___CORPUS.".".$___E;
- $alignment_id=0;
- while(<E>) {
- chomp;
- foreach (split) {
- my @FACTOR = split(/\|/);
-
- my ($source,$target);
- my $first_factor = 1;
- foreach my $factor (split(/,/,$factor_e_source)) {
- $source .= "|" unless $first_factor;
- $first_factor = 0;
- $source .= $FACTOR[$factor];
- }
-
- $first_factor = 1;
- foreach my $factor (split(/,/,$factor_e)) {
- $target .= "|" unless $first_factor;
- $first_factor = 0;
- $target .= $FACTOR[$factor];
- }
- $GENERATION{$source}{$target}++;
- $GENERATION_TOTAL_SOURCE{$source}++;
- $GENERATION_TOTAL_TARGET{$target}++;
- }
- }
- close(E);
-
- open(GEN,">$___MODEL_DIR/generation.$factor") or die "Can't write $___MODEL_DIR/generation.$factor";
- foreach my $source (keys %GENERATION) {
- foreach my $target (keys %{$GENERATION{$source}}) {
- printf GEN ("%s %s %.7f %.7f\n",$source,$target,
- $GENERATION{$source}{$target}/$GENERATION_TOTAL_SOURCE{$source},
- $GENERATION{$source}{$target}/$GENERATION_TOTAL_TARGET{$target});
- }
- }
- close(GEN);
- safesystem("rm -f $___MODEL_DIR/generation.$factor.gz") or die;
- safesystem("gzip $___MODEL_DIR/generation.$factor") or die;
-}
-
-### (9) CREATE CONFIGURATION FILE
-
-sub create_ini {
- print STDERR "(9) create moses.ini @ ".`date`;
-
- &full_path(\$___MODEL_DIR);
- &full_path(\$___VCB_E);
- &full_path(\$___VCB_F);
- open(INI,">$___MODEL_DIR/moses.ini") or die "Can't write $___MODEL_DIR/moses.ini";
- print INI "#########################
-### MOSES CONFIG FILE ###
-#########################
-\n";
-
- if (defined $___TRANSLATION_FACTORS) {
- print INI "# input factors\n";
- print INI "[input-factors]\n";
- my $INPUT_FACTOR_MAX = 0;
- foreach my $table (split /\+/, $___TRANSLATION_FACTORS) {
- my ($factor_list, $output) = split /-+/, $table;
- foreach (split(/,/,$factor_list)) {
- $INPUT_FACTOR_MAX = $_ if $_>$INPUT_FACTOR_MAX;
- }
- }
- for (my $c = 0; $c <= $INPUT_FACTOR_MAX; $c++) { print INI "$c\n"; }
- } else {
- die "No translation steps defined, cannot prepare [input-factors] section\n";
- }
-
-
- my %stepsused;
- print INI "\n# mapping steps
-[mapping]\n";
- foreach (split(/,/,$___DECODING_STEPS)) {
- s/t/T /g;
- s/g/G /g;
- my ($type, $num) = split /\s+/;
- $stepsused{$type} = $num+1 if !defined $stepsused{$type} || $stepsused{$type} < $num+1;
- print INI $_."\n";
- }
- print INI "\n# translation tables: source-factors, target-factors, number of scores, file
-[ttable-file]\n";
- my $num_of_ttables = 0;
- foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {
- $num_of_ttables++;
- my $ff = $f;
- $ff =~ s/\-/ /;
- print INI "$ff 5 $___MODEL_DIR/phrase-table.$f.gz\n";
- }
- if ($num_of_ttables != $stepsused{"T"}) {
- print STDERR "WARNING: Your [mapping-steps] require translation steps up to id $stepsused{T} but you defined translation steps 0..$num_of_ttables\n";
- exit 1 if $num_of_ttables < $stepsused{"T"}; # fatal to define less
- }
-
- my $weights_per_generation_model = 2;
-
- if (defined $___GENERATION_FACTORS) {
- print INI "\n# generation models: source-factors, target-factors, number-of-weights, filename\n";
- print INI "[generation-file]\n";
- my $cnt = 0;
- foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {
- $cnt++;
- my $ff = $f;
- $ff =~ s/\-/ /;
- print INI "$ff $weights_per_generation_model $___MODEL_DIR/generation.$f.gz\n";
- }
- if ($cnt != $stepsused{"G"}) {
- print STDERR "WARNING: Your [mapping-steps] require generation steps up to id $stepsused{G} but you defined generation steps 0..$cnt\n";
- exit 1 if $cnt < $stepsused{"G"}; # fatal to define less
- }
- } else {
- print INI "\n# no generation models, no generation-file section\n";
- }
-
-print INI "\n# language models: type(srilm/irstlm), factors, order, file
-[lmodel-file]\n";
- foreach my $lm (@___LM) {
- my ($f, $o, $fn) = @$lm;
- my $type = 0; # default to srilm
- print INI "$type $f $o $fn\n";
- }
-
-print INI "\n\n# limit on how many phrase translations e for each phrase f are loaded
-# 0 = all elements loaded
-[ttable-limit]
-20\n";
- foreach(1..$num_of_ttables) {
- print INI "0\n";
- }
-
- my $weight_d_count = 0;
- if ($___REORDERING ne "distance") {
- my $file = "# distortion (reordering) files\n[distortion-file]\n";
- my $type = "# distortion (reordering) type\n[distortion-type]\n";
- my $factor_i = 0;
- foreach my $factor (split(/\+/,$___REORDERING_FACTORS)) {
- foreach my $r (keys %REORDERING_MODEL) {
- next if $r eq "fe" || $r eq "f";
- next if $r eq "distance" && $factor_i>0;
- $type .= $r."\n";
- if ($r eq "distance") { $weight_d_count++; }
- else {
- $r =~ s/-bidirectional/.bi/;
- $r =~ s/-f/.f/;
- $r =~ s/orientation/orientation-table.$factor/;
- $r =~ s/monotonicity/monotonicity-table.$factor/;
- $file .= "$___MODEL_DIR/$r.$___REORDERING_SMOOTH.gz\n";
-
- my $w;
- if ($r =~ /orient/) { $w = 3; } else { $w = 1; }
- if ($r =~ /bi/) { $w *= 2; }
- $weight_d_count += $w;
- }
- }
- $factor_i++;
- }
- print INI $type."\n".$file."\n";
- }
- else {
- $weight_d_count = 1;
- }
-
- print INI "# distortion (reordering) weight\n[weight-d]\n";
- for(my $i=0;$i<$weight_d_count;$i++) {
- print INI "".(0.6/(scalar keys %REORDERING_MODEL))."\n";
- }
- print INI "\n# language model weights
-[weight-l]\n";
- my $lmweighttotal = 0.5;
- foreach(1..scalar @___LM) {
- printf INI "%.4f\n", $lmweighttotal / scalar @___LM;
- }
-
-print INI "\n\n# translation model weights
-[weight-t]\n";
- foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {
- print INI "0.2\n0.2\n0.2\n0.2\n0.2\n";
- }
-
- if (defined $___GENERATION_FACTORS) {
- print INI "\n# generation model weights, for each model $weights_per_generation_model weights\n";
- print INI "[weight-generation]\n";
- foreach my $f (split(/\+/,$___GENERATION_FACTORS)) {
- print INI "0.3\n0\n";
- }
- } else {
- print INI "\n# no generation models, no weight-generation section\n";
- }
-
-print INI "\n# word penalty
-[weight-w]
--1
-
-[distortion-limit]
-6
-";
-
-
- close(INI);
-}
-
-sub full_path {
- my ($PATH) = @_;
- return if $$PATH =~ /^\//;
- $$PATH = `pwd`."/".$$PATH;
- $$PATH =~ s/[\r\n]//g;
- $$PATH =~ s/\/\.\//\//g;
- $$PATH =~ s/\/+/\//g;
- my $sanity = 0;
- while($$PATH =~ /\/\.\.\// && $sanity++<10) {
- $$PATH =~ s/\/+/\//g;
- $$PATH =~ s/\/[^\/]+\/\.\.\//\//g;
- }
- $$PATH =~ s/\/[^\/]+\/\.\.$//;
- $$PATH =~ s/\/+$//;
-}
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}