Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphikoehn <pkoehn@inf.ed.ac.uk>2012-06-26 02:37:59 +0400
committerphikoehn <pkoehn@inf.ed.ac.uk>2012-06-26 02:37:59 +0400
commit135e38d355b4bafbb54b4fc29798da548e762a55 (patch)
treeff938d57fdaf0264b03c4cdb1f5e33803615b482 /scripts
parent765e789c0c904906098427af80f9520adff78c17 (diff)
escape bar character with proper html escape sequence
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/tokenizer/deescape-special-chars.perl3
-rwxr-xr-xscripts/tokenizer/detokenizer.perl3
-rwxr-xr-xscripts/tokenizer/escape-special-chars.perl2
-rwxr-xr-xscripts/tokenizer/tokenizer.perl2
4 files changed, 6 insertions, 4 deletions
diff --git a/scripts/tokenizer/deescape-special-chars.perl b/scripts/tokenizer/deescape-special-chars.perl
index 345555990..7dc6bc539 100755
--- a/scripts/tokenizer/deescape-special-chars.perl
+++ b/scripts/tokenizer/deescape-special-chars.perl
@@ -3,7 +3,8 @@
use strict;
while(<STDIN>) {
- s/\&bar;/\|/g; # factor separator
+ s/\&bar;/\|/g; # factor separator (legacy)
+ s/\&#124;/\|/g; # factor separator
s/\&lt;/\</g; # xml
s/\&gt;/\>/g; # xml
s/\&bra;/\[/g; # syntax non-terminal (legacy)
diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl
index 8233b419c..488ff7b5a 100755
--- a/scripts/tokenizer/detokenizer.perl
+++ b/scripts/tokenizer/detokenizer.perl
@@ -66,7 +66,8 @@ sub detokenize {
$text = " $text ";
$text =~ s/ \@\-\@ /-/g;
# de-escape special chars
- $text =~ s/\&bar;/\|/g; # factor separator
+ $text =~ s/\&bar;/\|/g; # factor separator (legacy)
+ $text =~ s/\&#124;/\|/g; # factor separator
$text =~ s/\&lt;/\</g; # xml
$text =~ s/\&gt;/\>/g; # xml
$text =~ s/\&bra;/\[/g; # syntax non-terminal (legacy)
diff --git a/scripts/tokenizer/escape-special-chars.perl b/scripts/tokenizer/escape-special-chars.perl
index 5d9690c04..d0bf75796 100755
--- a/scripts/tokenizer/escape-special-chars.perl
+++ b/scripts/tokenizer/escape-special-chars.perl
@@ -13,7 +13,7 @@ while(<STDIN>) {
# special characters in moses
s/\&/\&amp;/g; # escape escape
- s/\|/\&bar;/g; # factor separator
+ s/\|/\&#124;/g; # factor separator
s/\</\&lt;/g; # xml
s/\>/\&gt;/g; # xml
s/\'/\&apos;/g; # xml
diff --git a/scripts/tokenizer/tokenizer.perl b/scripts/tokenizer/tokenizer.perl
index 0cb713740..6e7651542 100755
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@@ -150,7 +150,7 @@ sub tokenize {
#escape special chars
$text =~ s/\&/\&amp;/g; # escape escape
- $text =~ s/\|/\&bar;/g; # factor separator
+ $text =~ s/\|/\&#124;/g; # factor separator
$text =~ s/\</\&lt;/g; # xml
$text =~ s/\>/\&gt;/g; # xml
$text =~ s/\'/\&apos;/g; # xml