Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/analysis')
-rwxr-xr-xscripts/analysis/suspicious_tokenization.pl6
1 files changed, 5 insertions, 1 deletions
diff --git a/scripts/analysis/suspicious_tokenization.pl b/scripts/analysis/suspicious_tokenization.pl
index f7ca3c60d..29e32d271 100755
--- a/scripts/analysis/suspicious_tokenization.pl
+++ b/scripts/analysis/suspicious_tokenization.pl
@@ -49,10 +49,14 @@ foreach my $ngr (keys %$ngrams) {
$report->{$ngr}->{"tok"} = $tokcnt;
$report->{$ngr}->{"untok"} = $untokcnt;
$report->{$ngr}->{"diff"} = abs($untokcnt-$tokcnt);
+ $report->{$ngr}->{"sum"} = $untokcnt+$tokcnt;
}
# Report
-foreach my $ngr (sort {$report->{$a}->{"diff"} <=> $report->{$b}->{"diff"}}
+foreach my $ngr (sort {
+ $report->{$a}->{"diff"} <=> $report->{$b}->{"diff"}
+ || $report->{$b}->{"sum"} <=> $report->{$a}->{"sum"}
+ }
keys %$report) {
print "$ngr\t$report->{$ngr}->{untok}\t$report->{$ngr}->{tok}\t$report->{$ngr}->{diff}\n";
}