Welcome to mirror list, hosted at ThFree Co, Russian Federation.

convert-pt.perl « other « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 60c8cbdb2bcbc9ae3d135675a1892c237303e43d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env perl
#
# This file is part of moses.  Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

# $Id$
# convert a phrase-table with alignment in Moses' dead-end format
#    a . ||| A . ||| (0) (0,1) ||| (0,1) (1) ||| 1 0.0626124 1 0.032119 2.718
# to
#    a . ||| A . ||| 1 0.0626124 1 0.032119 2.718 ||| 0-0 1-0 1-1


use strict;
use warnings;
use Getopt::Long;
use IO::File;
use File::Basename;

sub ConvertAlignment($);

binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");


my $lineNum = 0;
while (my $line = <STDIN>) {
  chomp($line);
	++$lineNum;

  my @toks = split(/\|/, $line);
  my $source = $toks[0];
  my $target = $toks[3];
  my $scores = $toks[12];

  my $alignS = $toks[6];
  my $align = ConvertAlignment($alignS);

  print "$source|||$target|||$scores ||| $align\n";

}

sub ConvertAlignment($ $)
{
  my $ret = "";
  my $alignS = shift;
  $alignS =~ s/^\s+//;
  $alignS =~ s/\s+$//;

  #print "alignS=$alignS\n";

  my @toks = split(/ /, $alignS);
  for (my $posS = 0; $posS < scalar @toks; ++$posS) {
    my $tok = $toks[$posS];
    $tok = substr($tok, 1, length($tok) - 2);
    #print "tok=$tok\n";

    my @posTvec = split(/,/, $tok);
    for (my $j = 0; $j < scalar @posTvec; ++$j) {
      my $posT = $posTvec[$j];
      $ret .= "$posS-$posT ";
    }
  }

  #print "ret=$ret \n";
  return $ret;
}