Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoralvations <alvations@gmail.com>2019-01-03 15:51:27 +0300
committerGitHub <noreply@github.com>2019-01-03 15:51:27 +0300
commit8fdbc74bbf9253fd0e442231d18762e5c67213b4 (patch)
treefc8ce75ec13a4e952e022f40c649e07958570217
parentdfbb17e549d4cb4ece452c7224ae47a590b7a4da (diff)
Reverting split_xml()
-rwxr-xr-xscripts/recaser/train-truecaser.perl16
1 files changed, 2 insertions, 14 deletions
diff --git a/scripts/recaser/train-truecaser.perl b/scripts/recaser/train-truecaser.perl
index 589ee43e3..94ddbf2fa 100755
--- a/scripts/recaser/train-truecaser.perl
+++ b/scripts/recaser/train-truecaser.perl
@@ -103,20 +103,8 @@ sub split_xml {
while($line =~ /\S/) {
# XML tag
if ($line =~ /^\s*(<\S[^>]*>)(.*)$/) {
- my $potential_xml = $1;
- my $line_next = $2;
- # exception for factor that is an XML tag
- if ($line =~ /^\S/ && scalar(@WORD)>0 && $WORD[$i-1] =~ /\|$/) {
- $WORD[$i-1] .= $potential_xml;
- if ($line_next =~ /^(\|+)(.*)$/) {
- $WORD[$i-1] .= $1;
- $line_next = $2;
- }
- }
- else {
- $MARKUP[$i] .= $potential_xml." ";
- }
- $line = $line_next;
+ $MARKUP[$i] .= $1." ";
+ $line = $2;
}
# non-XML text
elsif ($line =~ /^\s*([^\s<>]+)(.*)$/) {