Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphikoehn <pkoehn@inf.ed.ac.uk>2014-07-23 18:44:55 +0400
committerphikoehn <pkoehn@inf.ed.ac.uk>2014-07-23 18:44:55 +0400
commit573076976f7d24f67a60e3a8e4190110517f1f91 (patch)
tree02ca9fc99941310043605991c0ceb783867d3670 /scripts/Transliteration
parent2d11fe39161e8c5ce0fa7bb66d3aa82ef6919b28 (diff)
added transliteration into ems example config, minor fixes
Diffstat (limited to 'scripts/Transliteration')
-rwxr-xr-xscripts/Transliteration/post-decoding-transliteration.pl9
-rwxr-xr-xscripts/Transliteration/train-transliteration-module.pl12
2 files changed, 14 insertions, 7 deletions
diff --git a/scripts/Transliteration/post-decoding-transliteration.pl b/scripts/Transliteration/post-decoding-transliteration.pl
index 8aca3460d..69fd8bf46 100755
--- a/scripts/Transliteration/post-decoding-transliteration.pl
+++ b/scripts/Transliteration/post-decoding-transliteration.pl
@@ -21,12 +21,12 @@ die("ERROR: wrong syntax when invoking postDecodingTransliteration.perl")
'transliteration-model-dir=s' => \$TRANSLIT_MODEL,
'input-extension=s' => \$INPUT_EXTENSION,
'output-extension=s' => \$OUTPUT_EXTENSION,
- 'decoder=s' => \$DECODER,
+ 'decoder=s' => \$DECODER,
'oov-file=s' => \$OOV_FILE,
'input-file=s' => \$INPUT_FILE,
'output-file=s' => \$OUTPUT_FILE,
'verbose' => \$VERBOSE,
- 'language-model=s' => \$LM_FILE);
+ 'language-model=s' => \$LM_FILE);
# check if the files are in place
die("ERROR: you need to define --moses-src-dir --external-bin-dir, --transliteration-model-dir, --oov-file, --output-file --input-extension, --output-extension, and --language-model")
@@ -38,6 +38,11 @@ die("ERROR: you need to define --moses-src-dir --external-bin-dir, --translitera
defined($INPUT_FILE)&&
defined($EXTERNAL_BIN_DIR)&&
defined($LM_FILE));
+if (! -e $LM_FILE) {
+ my $LM_FILE_WORD = `ls $LM_FILE*word*`;
+ chop($LM_FILE_WORD);
+ $LM_FILE = $LM_FILE_WORD if $LM_FILE_WORD ne "";
+}
die("ERROR: could not find Language Model '$LM_FILE'")
unless -e $LM_FILE;
die("ERROR: could not find Transliteration Model '$TRANSLIT_MODEL'")
diff --git a/scripts/Transliteration/train-transliteration-module.pl b/scripts/Transliteration/train-transliteration-module.pl
index 355232222..7739e2a2b 100755
--- a/scripts/Transliteration/train-transliteration-module.pl
+++ b/scripts/Transliteration/train-transliteration-module.pl
@@ -13,7 +13,7 @@ print STDERR "Training Transliteration Module - Start\n".`date`;
my $ORDER = 5;
my $OUT_DIR = "/tmp/Transliteration-Model.$$";
my $___FACTOR_DELIMITER = "|";
-my ($MOSES_SRC_DIR,$CORPUS_F,$CORPUS_E,$ALIGNMENT,$SRILM_DIR,$FACTOR,$EXTERNAL_BIN_DIR,$INPUT_EXTENSION, $OUTPUT_EXTENSION, $SOURCE_SYNTAX, $TARGET_SYNTAX);
+my ($MOSES_SRC_DIR,$CORPUS_F,$CORPUS_E,$ALIGNMENT,$SRILM_DIR,$FACTOR,$EXTERNAL_BIN_DIR,$INPUT_EXTENSION, $OUTPUT_EXTENSION, $SOURCE_SYNTAX, $TARGET_SYNTAX,$DECODER);
# utilities
my $ZCAT = "gzip -cd";
@@ -31,8 +31,9 @@ die("ERROR: wrong syntax when invoking train-transliteration-module.perl")
'factor=s' => \$FACTOR,
'srilm-dir=s' => \$SRILM_DIR,
'out-dir=s' => \$OUT_DIR,
- 'source-syntax' => \$SOURCE_SYNTAX,
- 'target-syntax' => \$TARGET_SYNTAX);
+ 'decoder=s' => \$DECODER,
+ 'source-syntax' => \$SOURCE_SYNTAX,
+ 'target-syntax' => \$TARGET_SYNTAX);
# check if the files are in place
die("ERROR: you need to define --corpus-e, --corpus-f, --alignment, --srilm-dir, --moses-src-dir --external-bin-dir, --input-extension and --output-extension")
@@ -48,8 +49,9 @@ die("ERROR: could not find input corpus file '$CORPUS_F'")
unless -e $CORPUS_F;
die("ERROR: could not find output corpus file '$CORPUS_E'")
unless -e $CORPUS_E;
-die("ERROR: could not find algnment file '$ALIGNMENT'")
+die("ERROR: could not find alignment file '$ALIGNMENT'")
unless -e $ALIGNMENT;
+$DECODER = "$MOSES_SRC_DIR/bin/moses" unless defined($DECODER);
`mkdir $OUT_DIR`;
@@ -184,7 +186,7 @@ sub train_transliteration_module{
`$MOSES_SRC_DIR/scripts/ems/support/substitute-filtered-tables.perl $OUT_DIR/tuning/filtered/moses.ini < $OUT_DIR/model/moses.ini > $OUT_DIR/tuning/moses.filtered.ini`;
- `$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $MOSES_SRC_DIR/bin/moses $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/mert -threads=16 --no-filter-phrase-table`;
+ `$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $DECODER $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/mert -threads=16 --no-filter-phrase-table`;
`cp $OUT_DIR/tuning/tmp/moses.ini $OUT_DIR/tuning/moses.ini`;