Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Koehn <phi@jhu.edu>2014-12-21 04:37:05 +0300
committerPhilipp Koehn <phi@jhu.edu>2014-12-21 04:37:05 +0300
commit59fdb3d99c5b769f75ca9e4f5a87fe43f1f9c27e (patch)
tree0540f972e64eac48019415ff7ecf50c4f5b16035
parent831f947874250c69f4fb0adc6d20f06f12f2db84 (diff)
same spec for dedicated script as for train-model.perl and filter-model-given-input.pl
-rwxr-xr-xscripts/training/threshold-filter.perl33
1 files changed, 28 insertions, 5 deletions
diff --git a/scripts/training/threshold-filter.perl b/scripts/training/threshold-filter.perl
index 55f408d9b..1d5cfbbb4 100755
--- a/scripts/training/threshold-filter.perl
+++ b/scripts/training/threshold-filter.perl
@@ -2,17 +2,40 @@
use strict;
-my $THRESHOLD = $ARGV[0];
-die("please specify threshold (e.g., 0.00001)") unless defined($THRESHOLD) || $THRESHOLD > 0;
+my %MIN_SCORE;
+# legacy: same threshold for direct and indirect phrase translation probabilities
+if ($ARGV[0] =~ /^[\d\.]+$/) {
+ $MIN_SCORE{0} = $ARGV[0];
+ $MIN_SCORE{2} = $ARGV[2];
+}
+# advanced: field:threshold,field:threshold
+# recommended use is "2:0.0001"
+else {
+ foreach (split(/,/,$ARGV[0])) {
+ my ($id,$score) = split(/:/);
+ if ($score == 0) {
+ die("error in spec $_ (full spec $ARGV[0])");
+ }
+ $MIN_SCORE{$id} = $score;
+ print STDERR "score $id must be at least $score\n";
+ }
+}
+die("please specify threshold (e.g., 0.0001)") unless scalar keys %MIN_SCORE;
my ($filtered,$total) = (0,0);
while(my $line = <STDIN>) {
my @ITEM = split(/ \|\|\| /,$line);
my @SCORE = split(/ /,$ITEM[2]);
$total++;
- if ($SCORE[0] < $THRESHOLD || $SCORE[2] < $THRESHOLD) {
- $filtered++;
- next;
+ my $filter_this = 0;
+ foreach my $key (keys %MIN_SCORE) {
+ if ($SCORE[$key] < $MIN_SCORE{$key}) {
+ $filter_this++;
+ }
+ }
+ if ($filter_this) {
+ $filtered++;
+ next;
}
print $line;
}