diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2015-04-27 16:35:19 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2015-04-27 16:35:19 +0300 |
commit | a47fc006359b68eea2fcc369fae983338226a925 (patch) | |
tree | a15261111a47367ee5f7546ca4bb5cbfd4660300 /scripts | |
parent | da648fd65b7de9709b5cd3b094138f68a9584de0 (diff) |
option to output factors
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/training/wrappers/madamira-wrapper.perl | 57 |
1 files changed, 53 insertions, 4 deletions
diff --git a/scripts/training/wrappers/madamira-wrapper.perl b/scripts/training/wrappers/madamira-wrapper.perl index 6e7efe245..9866e6421 100755 --- a/scripts/training/wrappers/madamira-wrapper.perl +++ b/scripts/training/wrappers/madamira-wrapper.perl @@ -8,21 +8,32 @@ use File::Basename; use FindBin qw($RealBin); use Cwd 'abs_path'; +sub GetFactors; + + my $TMPDIR = "tmp"; my $SCHEME = "D2"; my $KEEP_TMP = 0; my $MADA_DIR; +my $FACTORS_STR; +my @FACTORS; + GetOptions( "scheme=s" => \$SCHEME, "tmpdir=s" => \$TMPDIR, "keep-tmp" => \$KEEP_TMP, - "mada-dir=s" => \$MADA_DIR + "mada-dir=s" => \$MADA_DIR, + "factors=s" => \$FACTORS_STR ) or die("ERROR: unknown options"); $TMPDIR = abs_path($TMPDIR); print STDERR "TMPDIR=$TMPDIR \n"; +if (defined($FACTORS_STR)) { + @FACTORS = split(",", $FACTORS_STR); +} + #binmode(STDIN, ":utf8"); #binmode(STDOUT, ":utf8"); @@ -75,13 +86,21 @@ while(my $line = <MADA_OUT>) { print "\n"; } elsif (index($line, ";;WORD") == 0) { - # word + # word my $word = substr($line, 7, length($line) - 8); - #print STDERR "FOund $word\n"; + #print STDERR "FOund $word\n"; + + for (my $i = 0; $i < 4; ++$i) { + $line = <MADA_OUT>; + } + + my $factors = GetFactors($line, \@FACTORS); + $word .= $factors; + print "$word "; } else { - #print STDERR "NADA\n"; + #print STDERR "NADA\n"; } } close (MADA_OUT); @@ -91,3 +110,33 @@ if ($KEEP_TMP == 0) { # `rm -rf $TMPDIR`; } + +########################### +sub GetFactors +{ + my $line = shift; + my $factorsRef = shift; + my @factors = @{$factorsRef}; + + # all factors + my %allFactors; + my @toks = split(" ", $line); + for (my $i = 1; $i < scalar(@toks); ++$i) { + #print " tok=" .$toks[$i]; + + my ($key, $value) = split(":", $toks[$i]); + $allFactors{$key} = $value; + } + + my $ret = ""; + my $factorType; + foreach $factorType(@factors) { + #print "factorType=$factorType "; + my $value = $allFactors{$factorType}; + + $ret .= "|$value"; + } + + return $ret; +} + |