Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <fishandfrolick@gmail.com>2013-05-03 17:36:39 +0400
committerHieu Hoang <fishandfrolick@gmail.com>2013-05-03 17:36:39 +0400
commite2f2aff94a526ddd110da81494c66de828d3e7ee (patch)
treee2dc02ed2c97514af3bfce97a0a6bd6deab32751 /scripts/training
parentb9373c7edf4355bae0d08a45e0ac75204c58c727 (diff)
parent86cbe7b93b2807e1b9d93a5ce3508e4f6b03d18f (diff)
merged. Mostly by discarding new changes
Diffstat (limited to 'scripts/training')
-rwxr-xr-xscripts/training/binarize-model.perl62
-rwxr-xr-xscripts/training/train-model.perl11
2 files changed, 72 insertions, 1 deletions
diff --git a/scripts/training/binarize-model.perl b/scripts/training/binarize-model.perl
new file mode 100755
index 000000000..15ad23ac4
--- /dev/null
+++ b/scripts/training/binarize-model.perl
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+
+#
+# Binarize a Moses model
+#
+
+use strict;
+
+use Getopt::Long "GetOptions";
+use FindBin qw($RealBin);
+
+$ENV{"LC_ALL"} = "C";
+my $SCRIPTS_ROOTDIR = $RealBin;
+if ($SCRIPTS_ROOTDIR eq '') {
+ $SCRIPTS_ROOTDIR = dirname(__FILE__);
+}
+$SCRIPTS_ROOTDIR =~ s/\/training$//;
+
+my ($binarizer, $input_config, $output_config);
+my $opt_hierarchical = 0;
+$binarizer = "$SCRIPTS_ROOTDIR/../bin/processPhraseTable";
+GetOptions(
+ "Hierarchical" => \$opt_hierarchical,
+ "Binarizer=s" => \$binarizer
+) or exit(1);
+
+$input_config = shift;
+$output_config = shift;
+
+if (!defined $input_config || !defined $output_config) {
+ print STDERR "usage: binarize-model.perl input-config output-config [-Binarizer binarizer]\n";
+ exit 1;
+}
+
+my $hierarchical = "";
+$hierarchical = "-Hierarchical" if $opt_hierarchical;
+my $targetdir = "$output_config.tables";
+
+safesystem("$RealBin/filter-model-given-input.pl $targetdir $input_config /dev/null $hierarchical -nofilter -Binarizer $binarizer") || die "binarising failed";
+safesystem("rm -f $output_config; ln -s $targetdir/moses.ini $output_config") || die "failed to link new ini file";
+
+#FIXME: Why isn't this in a module?
+sub safesystem {
+ print STDERR "Executing: @_\n";
+ system(@_);
+ if ($? == -1) {
+ print STDERR "Failed to execute: @_\n $!\n";
+ exit(1);
+ }
+ elsif ($? & 127) {
+ printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
+ ($? & 127), ($? & 128) ? 'with' : 'without';
+ exit(1);
+ }
+ else {
+ my $exitcode = $? >> 8;
+ print STDERR "Exit code: $exitcode\n" if $exitcode;
+ return ! $exitcode;
+ }
+}
+
+
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index 8beb61c30..653a325d0 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -39,7 +39,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
$_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,
@_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,
$_SPARSE_TRANSLATION_TABLE, @_BASELINE_ALIGNMENT_MODEL, $_BASELINE_EXTRACT, $_BASELINE_ALIGNMENT,
- $_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $_LMODEL_OOV_FEATURE, $IGNORE);
+ $_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $_LMODEL_OOV_FEATURE, $_NUM_LATTICE_FEATURES, $IGNORE);
my $_BASELINE_CORPUS = "";
my $_CORES = 1;
@@ -136,6 +136,7 @@ $_HELP = 1
'cores=i' => \$_CORES,
'instance-weights-file=s' => \$_INSTANCE_WEIGHTS_FILE,
'lmodel-oov-feature' => \$_LMODEL_OOV_FEATURE,
+ 'num-lattice-features=i' => \$_NUM_LATTICE_FEATURES,
);
if ($_HELP) {
@@ -2019,6 +2020,14 @@ sub create_ini {
print INI "\n# no generation models, no weight-generation section\n";
}
+ if ($_NUM_LATTICE_FEATURES) {
+ print INI "\n\n#lattice or confusion net weights\n[weight-i]\n";
+ for (1..$_NUM_LATTICE_FEATURES) {
+ print INI "0.1\n";
+ }
+ print "\n";
+ }
+
print INI "\n# word penalty\n[weight-w]\n-1\n\n";
if ($_HIERARCHICAL) {