Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mgizapp/scripts/merge_alignment.py')
-rwxr-xr-xmgizapp/scripts/merge_alignment.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/mgizapp/scripts/merge_alignment.py b/mgizapp/scripts/merge_alignment.py
index 626bc68..c4e8b95 100755
--- a/mgizapp/scripts/merge_alignment.py
+++ b/mgizapp/scripts/merge_alignment.py
@@ -5,8 +5,16 @@
# prodcuced by MGIZA, which has sentence IDs, and every file is
# ordered inside
+from __future__ import unicode_literals
import sys
import re
+import codecs
+import io
+
+if sys.version_info < (3,0,0):
+ sys.stdin = codecs.getreader('UTF-8')(sys.stdin)
+ sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
+ sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)
if len(sys.argv)<2:
sys.stderr.write("Provide me the file names (at least 2)\n");
@@ -21,7 +29,7 @@ sents = [];
done = [];
for i in range(1,len(sys.argv)):
- files.append(open(sys.argv[i],"r"));
+ files.append(io.open(sys.argv[i],"r", encoding="UTF-8"));
ids.append(0);
sents.append("");
done.append(False);