Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Haddow <barry.haddow@gmail.com>2013-04-12 19:07:26 +0400
committerBarry Haddow <barry.haddow@gmail.com>2013-04-12 19:07:26 +0400
commit9d42c7f6f74bbb0079768a762fc4546d20d6b634 (patch)
treeab1a2a2884a3b3b809a969ea0eb36fb98416347e /scripts/training
parentc5965b8587b37986ebab786905a8ef9f218403de (diff)
parent517d6c7bb834e40bcf25e8cbc79985180cb7f29f (diff)
Merge branch 'master' of github.com:moses-smt/mosesdecoder
Diffstat (limited to 'scripts/training')
-rwxr-xr-xscripts/training/clean-corpus-n.perl5
-rwxr-xr-xscripts/training/filter-rule-table.py3
-rwxr-xr-xscripts/training/mert-moses.pl4
-rwxr-xr-xscripts/training/train-model.perl3
4 files changed, 9 insertions, 6 deletions
diff --git a/scripts/training/clean-corpus-n.perl b/scripts/training/clean-corpus-n.perl
index bea32052a..2865fe391 100755
--- a/scripts/training/clean-corpus-n.perl
+++ b/scripts/training/clean-corpus-n.perl
@@ -47,7 +47,7 @@ my $l1input = "$corpus.$l1";
if (-e $l1input) {
$opn = $l1input;
} elsif (-e $l1input.".gz") {
- $opn = "zcat $l1input.gz |";
+ $opn = "gunzip -c $l1input.gz |";
} else {
die "Error: $l1input does not exist";
}
@@ -57,7 +57,7 @@ my $l2input = "$corpus.$l2";
if (-e $l2input) {
$opn = $l2input;
} elsif (-e $l2input.".gz") {
- $opn = "zcat $l2input.gz |";
+ $opn = "gunzip -c $l2input.gz |";
} else {
die "Error: $l2input does not exist";
}
@@ -160,3 +160,4 @@ sub word_count {
my @w = split(/ /,$line);
return scalar @w;
}
+
diff --git a/scripts/training/filter-rule-table.py b/scripts/training/filter-rule-table.py
index 8bef034de..86c8b300e 100755
--- a/scripts/training/filter-rule-table.py
+++ b/scripts/training/filter-rule-table.py
@@ -40,7 +40,8 @@ def printUsage():
def main():
parser = optparse.OptionParser()
parser.add_option("-c", "--min-non-initial-rule-count",
- action="store", dest="minCount", type="int", default="1",
+ action="store", dest="minCount",
+ type="float", default="0.0",
help="prune non-initial rules where count is below N",
metavar="N")
(options, args) = parser.parse_args()
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index 688e8ce55..9f5f25f15 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/perl -w
# $Id$
# Usage:
# mert-moses.pl <foreign> <english> <decoder-executable> <decoder-config>
@@ -371,7 +371,7 @@ my $pro_optimizer = File::Spec->catfile($mertdir, "megam_i686.opt"); # or set t
if (($___PAIRWISE_RANKED_OPTIMIZER || $___PRO_STARTING_POINT) && ! -x $pro_optimizer) {
print "Could not find $pro_optimizer, installing it in $mertdir\n";
- my $megam_url = "http://www.umiacs.umd.edu/~hal/megam/";
+ my $megam_url = "http://hal3.name/megam";
if (&is_mac_osx()) {
die "Error: Sorry for Mac OS X users! Please get the source code of megam and compile by hand. Please see $megam_url for details.";
}
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index 5b0553581..e4292007e 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -38,8 +38,9 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
$_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
$_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,
@_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,
- $_SPARSE_TRANSLATION_TABLE, @_BASELINE_ALIGNMENT_MODEL, $_BASELINE_EXTRACT, $_BASELINE_CORPUS, $_BASELINE_ALIGNMENT,
+ $_SPARSE_TRANSLATION_TABLE, @_BASELINE_ALIGNMENT_MODEL, $_BASELINE_EXTRACT, $_BASELINE_ALIGNMENT,
$_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $_LMODEL_OOV_FEATURE, $IGNORE);
+my $_BASELINE_CORPUS = "";
my $_CORES = 1;
my $debug = 0; # debug this script, do not delete any files in debug mode