Welcome to mirror list, hosted at ThFree Co, Russian Federation.

threshold-filter.perl « training « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: a23fb8b5ca2abc2ca4f77ace5ac024c79c37477d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env perl 

use warnings;
use strict;

my %MIN_SCORE;
# legacy: same threshold for direct and indirect phrase translation probabilities
if ($ARGV[0] =~ /^[\d\.]+$/) {
  $MIN_SCORE{0} = $ARGV[0];
  $MIN_SCORE{2} = $ARGV[2];
}
# advanced: field:threshold,field:threshold
# recommended use is "2:0.0001"
else {
  foreach (split(/,/,$ARGV[0])) {
    my ($id,$score) = split(/:/);
    if ($score == 0) {
      die("error in spec $_ (full spec $ARGV[0])");
    }
    $MIN_SCORE{$id} = $score;
    print STDERR "score $id must be at least $score\n";
  }
}
die("please specify threshold (e.g., 0.0001)") unless scalar keys %MIN_SCORE;

my ($filtered,$total) = (0,0);
while(my $line = <STDIN>) {
  my @ITEM = split(/ \|\|\| /,$line);
  my @SCORE = split(/ /,$ITEM[2]);
  $total++;
  my $filter_this = 0;
  foreach my $key (keys %MIN_SCORE) {
    if ($SCORE[$key] < $MIN_SCORE{$key}) {
      $filter_this++;
    }
  }
  if ($filter_this) {
    $filtered++;
    next;
  }
  print $line;
}

print STDERR "filtered out $filtered of $total phrase pairs.\n";