diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2018-11-09 15:58:22 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2018-11-09 15:58:22 +0300 |
commit | a70086c1e6ad3bd3357f9f890e54bb46b9c48ac8 (patch) | |
tree | 601d7caf4935e449a3034321230dafbca768e077 | |
parent | 2451c469603bd297a2f52369c2d57b2fab835ef4 (diff) |
python wrapper works
-rw-r--r-- | scripts/tokenizer/python-wrapper/__init__.py | 2 | ||||
-rw-r--r-- | scripts/tokenizer/python-wrapper/tokenizer.py | 15 |
2 files changed, 9 insertions, 8 deletions
diff --git a/scripts/tokenizer/python-wrapper/__init__.py b/scripts/tokenizer/python-wrapper/__init__.py index 8ff517176..d815a91dc 100644 --- a/scripts/tokenizer/python-wrapper/__init__.py +++ b/scripts/tokenizer/python-wrapper/__init__.py @@ -29,3 +29,5 @@ __all__ = [ "MosesSentenceSplitter", "MosesPunctuationNormalizer", ] + + diff --git a/scripts/tokenizer/python-wrapper/tokenizer.py b/scripts/tokenizer/python-wrapper/tokenizer.py index eb5aec3dc..b3af06647 100644 --- a/scripts/tokenizer/python-wrapper/tokenizer.py +++ b/scripts/tokenizer/python-wrapper/tokenizer.py @@ -41,17 +41,17 @@ class MosesTokenizer(ToolWrapper): ['Hello', 'World', '!'] """ - def __init__(self, lang="en", old_version=False): + def __init__(self, lang="en"): self.lang = lang program = path.join( path.dirname(__file__), - "tokenizer-" + ("v1.0" if old_version else "v1.1") + ".perl" + "../tokenizer.perl" ) argv = ["perl", program, "-q", "-l", self.lang] - if not old_version: - # -b = disable output buffering - # -a = aggressive hyphen splitting - argv.extend(["-b", "-a"]) + + # -b = disable output buffering + # -a = aggressive hyphen splitting + argv.extend(["-b", "-a"]) super().__init__(argv) def __str__(self): @@ -80,8 +80,7 @@ def main(): if not args["<lang>"]: sys.exit(0) tokenize = MosesTokenizer( - args["<lang>"], - old_version=args["--old"], + args["<lang>"] ) inputfile = openfile(args["<inputfile>"]) outputfile = openfile(args["<outputfile>"], "wt") |