Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2018-11-09 15:58:22 +0300
committerHieu Hoang <hieuhoang@gmail.com>2018-11-09 15:58:22 +0300
commita70086c1e6ad3bd3357f9f890e54bb46b9c48ac8 (patch)
tree601d7caf4935e449a3034321230dafbca768e077
parent2451c469603bd297a2f52369c2d57b2fab835ef4 (diff)
python wrapper works
-rw-r--r--scripts/tokenizer/python-wrapper/__init__.py2
-rw-r--r--scripts/tokenizer/python-wrapper/tokenizer.py15
2 files changed, 9 insertions, 8 deletions
diff --git a/scripts/tokenizer/python-wrapper/__init__.py b/scripts/tokenizer/python-wrapper/__init__.py
index 8ff517176..d815a91dc 100644
--- a/scripts/tokenizer/python-wrapper/__init__.py
+++ b/scripts/tokenizer/python-wrapper/__init__.py
@@ -29,3 +29,5 @@ __all__ = [
"MosesSentenceSplitter",
"MosesPunctuationNormalizer",
]
+
+
diff --git a/scripts/tokenizer/python-wrapper/tokenizer.py b/scripts/tokenizer/python-wrapper/tokenizer.py
index eb5aec3dc..b3af06647 100644
--- a/scripts/tokenizer/python-wrapper/tokenizer.py
+++ b/scripts/tokenizer/python-wrapper/tokenizer.py
@@ -41,17 +41,17 @@ class MosesTokenizer(ToolWrapper):
['Hello', 'World', '!']
"""
- def __init__(self, lang="en", old_version=False):
+ def __init__(self, lang="en"):
self.lang = lang
program = path.join(
path.dirname(__file__),
- "tokenizer-" + ("v1.0" if old_version else "v1.1") + ".perl"
+ "../tokenizer.perl"
)
argv = ["perl", program, "-q", "-l", self.lang]
- if not old_version:
- # -b = disable output buffering
- # -a = aggressive hyphen splitting
- argv.extend(["-b", "-a"])
+
+ # -b = disable output buffering
+ # -a = aggressive hyphen splitting
+ argv.extend(["-b", "-a"])
super().__init__(argv)
def __str__(self):
@@ -80,8 +80,7 @@ def main():
if not args["<lang>"]:
sys.exit(0)
tokenize = MosesTokenizer(
- args["<lang>"],
- old_version=args["--old"],
+ args["<lang>"]
)
inputfile = openfile(args["<inputfile>"])
outputfile = openfile(args["<outputfile>"], "wt")