Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>2006-09-01 22:02:56 +0400
committerphkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>2006-09-01 22:02:56 +0400
commit572c577ef7f5bbeaa6f40245c73b4e1d9efadb60 (patch)
tree6efa69ee37b2580e06297494309057305131ae4f /scripts
parent041e6ed3c5b11beb34f91a564802c8f9f0a0a5a3 (diff)
initial release
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@806 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/training/exodus.perl153
1 files changed, 153 insertions, 0 deletions
diff --git a/scripts/training/exodus.perl b/scripts/training/exodus.perl
new file mode 100755
index 000000000..2be891273
--- /dev/null
+++ b/scripts/training/exodus.perl
@@ -0,0 +1,153 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my @LINE = <STDIN>;
+
+print "# Moses configuration file\n";
+print "# automatic exodus from pharaoh.ini ".`date`;
+print "\n";
+
+# replicate the old header
+my $header = 0;
+while($LINE[$header] =~ /^$/ || $LINE[$header] =~ /^\#/) {
+ $header++;
+}
+for(my $i=0;$i<$header;$i++) { print $LINE[$i]; }
+
+# read Pharaoh parameters that will be absorbed
+my(@LMODEL_TYPE, @DISTORTION_TYPE);
+for(my $i=$header;$i<=$#LINE;$i++) {
+ # language model specification
+ if ($LINE[$i] =~ /^\[lmodel-type\]/) {
+ $i = &read(\@LMODEL_TYPE,$i);
+ foreach (@LMODEL_TYPE) {
+ $_ = "3gram" if $_ eq "normal";
+ $_ = "3gram-".$_ unless /\d/;
+ $_ =~ s/gram//;
+ }
+ }
+ # distortion model specification
+ elsif ($LINE[$i] =~ /^\[distortion-type\]/) {
+ my @DT;
+ $i = &read(\@DT,$i);
+ foreach (@DT) {
+ next if /distance/;
+ s/orientation/msd/;
+ s/monotonicity/monotone/;
+ push @DISTORTION_TYPE,$_;
+ }
+ }
+}
+# adapt/replicate Pharaoh parameters
+for(my $i=$header;$i<=$#LINE;$i++) {
+ # parameters to be dropped
+ if ($LINE[$i] =~ /^\[lmodel-type\]/) {
+ my @DUMMY;
+ $i = &read(\@DUMMY,$i);
+ }
+ #
+ elsif ($LINE[$i] =~ /^\[distortion-type\]/) {
+ my @DISTORTION_TYPE;
+ $i = &read(\@DISTORTION_TYPE,$i);
+ foreach (@DISTORTION_TYPE) {
+ next if /distance/;
+ s/orientation/msd/;
+ s/monotonicity/monotone/;
+ s/unidirectional/backward/;
+ }
+ }
+ # parameters to be changed
+ elsif ($LINE[$i] =~ /^\[lmodel-file\]/) {
+ print $LINE[$i];
+ # add language model type, factors
+ my @LMODEL_FILE;
+ $i = &read(\@LMODEL_FILE,$i);
+ for(my $j=0;$j<=$#LMODEL_FILE;$j++) {
+ print "0 0 ";
+ if (defined($LMODEL_TYPE[$j])) {
+ print $LMODEL_TYPE[$j];
+ }
+ else {
+ print "3";
+ }
+ print " $LMODEL_FILE[$j]\n";
+ }
+ print "\n";
+ }
+ elsif ($LINE[$i] =~ /^\[ttable-file\]/) {
+ print $LINE[$i];
+ # add factors
+ my @TTABLE_FILE;
+ $i = &read(\@TTABLE_FILE,$i);
+ my $first_line;
+ if (-e $TTABLE_FILE[0]) {
+ if ($TTABLE_FILE[0] =~ /\.gz$/) {
+ $first_line = `zcat $TTABLE_FILE[0] | head -1`;
+ }
+ else {
+ $first_line = `head -1 $TTABLE_FILE[0]`;
+ }
+ }
+ elsif (-e $TTABLE_FILE[0].".gz") {
+ $first_line = `zcat $TTABLE_FILE[0] | head -1`;
+ }
+ else {
+ print STDERR "ERROR: Thou shalt have a translation table in '$TTABLE_FILE[0]'\n";
+ exit;
+ }
+ chop($first_line);
+ my ($f,$e,$p) = split(/ \|\|\| /,$first_line);
+ $p =~ s/ +/ /g; $p =~ s/^ //; $p =~ s/ $//;
+ my @P = split(/ /,$p);
+ my $p_count = scalar @P;
+ print "0 0 $p_count $TTABLE_FILE[0]\n\n";
+ }
+ elsif ($LINE[$i] =~ /^\[distortion-file\]/) {
+ print $LINE[$i];
+ my @DISTORTION_FILE;
+ $i = &read(\@DISTORTION_FILE,$i);
+ for(my $j=0;$j<=$#DISTORTION_FILE;$j++) {
+ if (!defined($DISTORTION_TYPE[$j])) {
+ die("ERROR: no distortion type specified for distortion file $DISTORTION_FILE[$j]\n");
+ }
+ my $weight_count = 2;
+ $weight_count++ if $DISTORTION_TYPE[$j] =~ /msd/;
+ $weight_count*=2 if $DISTORTION_TYPE[$j] =~ /fe/;
+ print "0-0 $DISTORTION_TYPE[$j] $weight_count $DISTORTION_FILE[$j]\n";
+ }
+ print "\n";
+ }
+ elsif ($LINE[$i] =~ /\# distortion \(reordering\) type/) {}
+ else {
+ # keep unchanged
+ print $LINE[$i];
+ }
+}
+
+# add Moses-specific configuration
+
+print "\n[input-factors]\n0\n\n";
+print "[mapping]\nT 0\n\n";
+
+# sub: read values for one parameter
+sub read {
+ my ($VALUE,$i) = @_;
+ $i++;
+ while($i<=$#LINE && $LINE[$i] !~ /^\[/) {
+ if ($LINE[$i] !~ /^\s*$/ && # ignore comments and empty lines
+ $LINE[$i] !~ /^\#/) {
+ # store value
+ my $line = $LINE[$i];
+ chop($line);
+ push @{$VALUE},$line;
+ }
+ $i++;
+ }
+ $i--;
+
+ # leave comments above next parameter
+ while($LINE[$i] =~ /^\#/) { $i--; }
+
+ return $i;
+}