Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2015-04-27 16:35:19 +0300
committerHieu Hoang <hieuhoang@gmail.com>2015-04-27 16:35:19 +0300
commita47fc006359b68eea2fcc369fae983338226a925 (patch)
treea15261111a47367ee5f7546ca4bb5cbfd4660300 /scripts
parentda648fd65b7de9709b5cd3b094138f68a9584de0 (diff)
option to output factors
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/training/wrappers/madamira-wrapper.perl57
1 files changed, 53 insertions, 4 deletions
diff --git a/scripts/training/wrappers/madamira-wrapper.perl b/scripts/training/wrappers/madamira-wrapper.perl
index 6e7efe245..9866e6421 100755
--- a/scripts/training/wrappers/madamira-wrapper.perl
+++ b/scripts/training/wrappers/madamira-wrapper.perl
@@ -8,21 +8,32 @@ use File::Basename;
use FindBin qw($RealBin);
use Cwd 'abs_path';
+sub GetFactors;
+
+
my $TMPDIR = "tmp";
my $SCHEME = "D2";
my $KEEP_TMP = 0;
my $MADA_DIR;
+my $FACTORS_STR;
+my @FACTORS;
+
GetOptions(
"scheme=s" => \$SCHEME,
"tmpdir=s" => \$TMPDIR,
"keep-tmp" => \$KEEP_TMP,
- "mada-dir=s" => \$MADA_DIR
+ "mada-dir=s" => \$MADA_DIR,
+ "factors=s" => \$FACTORS_STR
) or die("ERROR: unknown options");
$TMPDIR = abs_path($TMPDIR);
print STDERR "TMPDIR=$TMPDIR \n";
+if (defined($FACTORS_STR)) {
+ @FACTORS = split(",", $FACTORS_STR);
+}
+
#binmode(STDIN, ":utf8");
#binmode(STDOUT, ":utf8");
@@ -75,13 +86,21 @@ while(my $line = <MADA_OUT>) {
print "\n";
}
elsif (index($line, ";;WORD") == 0) {
- # word
+ # word
my $word = substr($line, 7, length($line) - 8);
- #print STDERR "FOund $word\n";
+ #print STDERR "FOund $word\n";
+
+ for (my $i = 0; $i < 4; ++$i) {
+ $line = <MADA_OUT>;
+ }
+
+ my $factors = GetFactors($line, \@FACTORS);
+ $word .= $factors;
+
print "$word ";
}
else {
- #print STDERR "NADA\n";
+ #print STDERR "NADA\n";
}
}
close (MADA_OUT);
@@ -91,3 +110,33 @@ if ($KEEP_TMP == 0) {
# `rm -rf $TMPDIR`;
}
+
+###########################
+sub GetFactors
+{
+ my $line = shift;
+ my $factorsRef = shift;
+ my @factors = @{$factorsRef};
+
+ # all factors
+ my %allFactors;
+ my @toks = split(" ", $line);
+ for (my $i = 1; $i < scalar(@toks); ++$i) {
+ #print " tok=" .$toks[$i];
+
+ my ($key, $value) = split(":", $toks[$i]);
+ $allFactors{$key} = $value;
+ }
+
+ my $ret = "";
+ my $factorType;
+ foreach $factorType(@factors) {
+ #print "factorType=$factorType ";
+ my $value = $allFactors{$factorType};
+
+ $ret .= "|$value";
+ }
+
+ return $ret;
+}
+