diff options
author | Rico Sennrich <rico.sennrich@gmx.ch> | 2014-11-14 20:52:39 +0300 |
---|---|---|
committer | Rico Sennrich <rico.sennrich@gmx.ch> | 2014-11-14 20:52:39 +0300 |
commit | c3be9fdcf82cb5a7a0ecc2d5ad993d0d73fbdf4b (patch) | |
tree | 5840526312634c4ae2fe5259f88965e5476da15b /mgizapp/scripts/sntpostproc.py | |
parent | 15713b294d4864814b0c19a7836ae9c6ad496705 (diff) |
use UTF-8 encoding in python scripts
Diffstat (limited to 'mgizapp/scripts/sntpostproc.py')
-rwxr-xr-x | mgizapp/scripts/sntpostproc.py | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/mgizapp/scripts/sntpostproc.py b/mgizapp/scripts/sntpostproc.py index b3bf528..f2f1f35 100755 --- a/mgizapp/scripts/sntpostproc.py +++ b/mgizapp/scripts/sntpostproc.py @@ -3,15 +3,25 @@ # This script post process the snt file -- either in single-line format or in multi-line format # The output, however, will always be in single-line format +from __future__ import unicode_literals from sys import * from optparse import OptionParser import re; +import codecs +import io + usage = """ The script post process the snt file, the input could be single-line snt file or multi-line, (triple line) and can insert sentence weight to the file (-w) or add partial alignment to the file (-a) Usage %prog -s sntfile -w weight-file -a alignfile -o outputfile """ + +if sys.version_info < (3,0,0): + sys.stdin = codecs.getreader('UTF-8')(sys.stdin) + sys.stdout = codecs.getwriter('UTF-8')(sys.stdout) + sys.stderr = codecs.getwriter('UTF-8')(sys.stderr) + parser = OptionParser(usage=usage) @@ -37,21 +47,21 @@ if options.snt == None: parser.print_help(); exit(); else: - sfile = open(options.snt,"r"); + sfile = io.open(options.snt,"r", encoding="UTF-8"); if options.output=="-": ofile = stdout; else: - ofile = open(options.output,"w"); + ofile = io.open(options.output,"w", encoding="UTF-8"); wfile = None; if options.weight <> None: - wfile = open(options.weight,"r"); + wfile = io.open(options.weight,"r", encoding="UTF-8"); afile = None; if options.align <> None: - afile = open(options.align,"r"); + afile = io.open(options.align,"r", encoding="UTF-8"); rr = re.compile("[\\|\\#\\*]"); wt = 0.0; |