Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomáš Fulajtár <ful_tom@yahoo.com>2015-08-27 16:15:32 +0300
committerTomáš Fulajtár <ful_tom@yahoo.com>2015-08-27 16:15:32 +0300
commit1a26cb84140bde842b0b60c6888e7f169536e849 (patch)
tree7cbbce75ee1eae0cbe73eaffe8980dd3ef4b3596 /scripts/analysis
parenta16d118a95921a8a5913940c21ba8b81d344a11c (diff)
Added a simple support for the factored systems.
Diffstat (limited to 'scripts/analysis')
-rwxr-xr-xscripts/analysis/oov.pl7
1 files changed, 7 insertions, 0 deletions
diff --git a/scripts/analysis/oov.pl b/scripts/analysis/oov.pl
index 9756887c9..5228f0f45 100755
--- a/scripts/analysis/oov.pl
+++ b/scripts/analysis/oov.pl
@@ -176,6 +176,13 @@ sub ngrams {
return { md5(encode_utf8($sent)) => 1 };
} else {
my @words = split /\s+/, $sent;
+
+ #factors
+ if ( $sent =~ m/[|]/) {
+ my $use_index = 0; # default factor is the first one
+ @words = map { ( split /[|]/, $_ ) [$use_index] } @words;
+ }
+
my $out;
if ($n == 1) {
foreach my $w (@words) {