Welcome to mirror list, hosted at ThFree Co, Russian Federation.

wrap-xml.perl « support « ems « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 587e4c54158f3b8370d5b772819c9aeff4d331ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env perl 

use strict;

my ($language,$src,$system) = @ARGV;
die("wrapping frame not found ($src)") unless -e $src;
$system = "Edinburgh" unless $system;

open(SRC,$src) or die "Cannot open: $!";
my @OUT = <STDIN>;
chomp(@OUT);
#my @OUT = `cat $decoder_output`;
my $missing_end_seg = 0;
while(<SRC>) {
    chomp;
    if (/^<srcset/) {
	s/<srcset/<tstset trglang="$language"/i;
    }
    elsif (/^<\/srcset/) {
	s/<\/srcset/<\/tstset/i;
    }
    elsif (/^<doc/i) {
  s/ *sysid="[^\"]+"//;
	s/<doc/<doc sysid="$system"/i;
    }
    elsif (/<seg/) {
	my $line = shift(@OUT);
        $line = "" if $line =~ /NO BEST TRANSLATION/;
        if (/<\/seg>/) {
	  s/(<seg[^>]+> *).*(<\/seg>)/$1$line$2/i;
          $missing_end_seg = 0;
        }
        else {
	  s/(<seg[^>]+> *)[^<]*/$1$line<\/seg>/i;
          $missing_end_seg = 1;
        }
    }
    elsif ($missing_end_seg) {
      if (/<\/doc>/) {
        $missing_end_seg = 0;
      }
      else {
        next;
      }
    }
    print $_."\n";
}