Welcome to mirror list, hosted at ThFree Co, Russian Federation.

absolutize_moses_model.pl « training « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 5c9c0970a1773f083dc2a0481e19b154a2f9870a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env perl 

# $Id$
# given a moses.ini file, prints a copy to stdout but replaces all relative
# paths with absolute paths.
#
# Ondrej Bojar.

use warnings;

my $ini = shift;
die "usage: absolutize_moses_model.pl path-to-moses.ini > moses.abs.ini"
  if !defined $ini;

binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");

$inih = my_open($ini);
while (<$inih>) {
  if (/^\[([^\]]*)\]\s*$/) {
    $section = $1;
  }
  if (/^[0-9]/) {
    if ($section eq "ttable-file") {
      chomp;
      my ($type, $b, $c, $d, $fn) = split(/ /, $_, 5);

		if ( $type eq '8' ) {
			# suffix arrays model: <src-corpus> <tgt-corpus> <alignment>.
			my ($src, $tgt, $align) = split(/ /, $fn);

			my $abs_src = ensure_absolute($src, $ini);
			die "File not found or empty: $src (searched for $abs_src or $abs_src.binphr.idx)"
				if ! -s $abs_src && ! -s $abs_src.".binphr.idx"; # accept binarized ttables

			my $abs_tgt = ensure_absolute($tgt, $ini);
			die "File not found or empty: $tgt (searched for $abs_tgt or $abs_tgt.binphr.idx)"
				if ! -s $abs_tgt && ! -s $abs_tgt.".binphr.idx"; # accept binarized ttables

			my $abs_align = ensure_absolute($align, $ini);
			die "File not found or empty: $align (searched for $abs_align or $abs_align.binphr.idx)"
				if ! -s $abs_align && ! -s $abs_align.".binphr.idx"; # accept binarized ttables

			$_ = "$type $b $c $d $abs_src $abs_tgt $abs_align\n";
		}
                elsif ( $type eq '12' ) {
		  $abs = ensure_absolute($fn, $ini);
		  die "File not found or empty: $fn (searched for $abs.minphr)"
		    if ! -s $abs.".minphr"; # accept compact binarized ttables
		  $_ = "$type $b $c $d $abs\n";                    
                }
		else {
		  $abs = ensure_absolute($fn, $ini);
		  die "File not found or empty: $fn (searched for $abs or $abs.binphr.idx)"
		    if ! -s $abs && ! -s $abs.".binphr.idx"; # accept binarized ttables
		  $_ = "$type $b $c $d $abs\n";
		}	
    }
    if ($section eq "generation-file" || $section eq "lmodel-file") {
      chomp;
      my ($a, $b, $c, $fn) = split / /;
      $abs = ensure_absolute($fn, $ini);
      die "File not found or empty: $fn (searched for $abs)"
        if ! -s $abs;
      $_ = "$a $b $c $abs\n";
    }
    if ($section eq "distortion-file") {
      chomp;
      my ($a, $b, $c, $fn) = split / /;
      $abs = ensure_absolute($fn, $ini);
      die "File not found or empty: $fn (searched for $abs or $abs.binlexr.idx or $abs.minlexr)"
        if ! -s $abs && ! -s $abs.".binlexr.idx" && ! -s $abs.".minlexr"; # accept binarized and compact lexro models
      $_ = "$a $b $c $abs\n";
    }
  }
  print $_;
}
close $inih if $ini ne "-";

sub safesystem {
  print STDERR "Executing: @_\n";
  system(@_);
  if ($? == -1) {
      print STDERR "Failed to execute: @_\n  $!\n";
      exit(1);
  }
  elsif ($? & 127) {
      printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",
          ($? & 127),  ($? & 128) ? 'with' : 'without';
  }
  else {
    my $exitcode = $? >> 8;
    print STDERR "Exit code: $exitcode\n" if $exitcode;
    return ! $exitcode;
  }
}

sub ensure_absolute {
  my $target = shift;
  my $originfile = shift;

  my $cwd = `pawd 2> /dev/null`;
  $cwd = `pwd` if ! defined $cwd; # not everyone has pawd!
  die "Failed to absolutize $target. Failing to get cwd!" if ! defined $cwd;
  chomp $cwd;
  $cwd.="/";

  my $absorigin = ensure_relative_to_origin($originfile, $cwd);
  return ensure_relative_to_origin($target, $absorigin);
}

sub ensure_relative_to_origin {
  my $target = shift;
  my $originfile = shift;
  return $target if $target =~ /^\/|^~/; # the target path is absolute already
  $originfile =~ s/[^\/]*$//; # where does the origin reside
  my $out = $originfile."/".$target;
  $out =~ s/\/+/\//g;
  $out =~ s/\/(\.\/)+/\//g;
  return $out;
}

sub my_open {
  my $f = shift;
  if ($f eq "-") {
    binmode(STDIN, ":utf8");
    return *STDIN;
  }

  die "Not found: $f" if ! -e $f;

  my $opn;
  my $hdl;
  my $ft = `file '$f'`;
  # file might not recognize some files!
  if ($f =~ /\.gz$/ || $ft =~ /gzip compressed data/) {
    $opn = "zcat '$f' |";
  } elsif ($f =~ /\.bz2$/ || $ft =~ /bzip2 compressed data/) {
    $opn = "bzcat '$f' |";
  } else {
    $opn = "$f";
  }
  open $hdl, $opn or die "Can't open '$opn': $!";
  binmode $hdl, ":utf8";
  return $hdl;
}