diff options
author | Tomáš Fulajtár <ful_tom@yahoo.com> | 2015-08-27 16:15:32 +0300 |
---|---|---|
committer | Tomáš Fulajtár <ful_tom@yahoo.com> | 2015-08-27 16:15:32 +0300 |
commit | 1a26cb84140bde842b0b60c6888e7f169536e849 (patch) | |
tree | 7cbbce75ee1eae0cbe73eaffe8980dd3ef4b3596 /scripts/analysis | |
parent | a16d118a95921a8a5913940c21ba8b81d344a11c (diff) |
Added a simple support for the factored systems.
Diffstat (limited to 'scripts/analysis')
-rwxr-xr-x | scripts/analysis/oov.pl | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/scripts/analysis/oov.pl b/scripts/analysis/oov.pl index 9756887c9..5228f0f45 100755 --- a/scripts/analysis/oov.pl +++ b/scripts/analysis/oov.pl @@ -176,6 +176,13 @@ sub ngrams { return { md5(encode_utf8($sent)) => 1 }; } else { my @words = split /\s+/, $sent; + + #factors + if ( $sent =~ m/[|]/) { + my $use_index = 0; # default factor is the first one + @words = map { ( split /[|]/, $_ ) [$use_index] } @words; + } + my $out; if ($n == 1) { foreach my $w (@words) { |