Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/OpenNMT/OpenNMT.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/onmt
diff options
context:
space:
mode:
authorJean Senellart <jean@senellart.com>2017-12-10 15:55:29 +0300
committerGitHub <noreply@github.com>2017-12-10 15:55:29 +0300
commit8247820ae68249ec0bcb9736a6d90ac667daaaa2 (patch)
treea2dde4383beabda982e5502797e00babddd52765 /onmt
parentdce6a169719ddba649f60cf7e7ec7b6da91b41bc (diff)
Fix450 (#454)
* fix #450 - fully integrate hook in translate.lua and drop normalizer
Diffstat (limited to 'onmt')
-rw-r--r--onmt/translate/Translator.lua55
1 files changed, 55 insertions, 0 deletions
diff --git a/onmt/translate/Translator.lua b/onmt/translate/Translator.lua
index dd6059f0..5a442c6c 100644
--- a/onmt/translate/Translator.lua
+++ b/onmt/translate/Translator.lua
@@ -133,6 +133,61 @@ function Translator.declareOpts(cmd)
cmd:setCmdLineOptions(options, 'Translator')
end
+function Translator.expandOpts(cmd, dataType)
+ local torenameOpts = {};
+ local current_block;
+ local pref = "{src,tgt}_"
+ if dataType == "monotext" then pref = "" end
+ if dataType == "feattext" then pref = "tgt_" end
+ for i, v in ipairs(cmd.helplines) do
+ if type(v) == "string" then
+ local p = v:find(" options")
+ if p then
+ current_block = v:sub(1,p-1);
+ if current_block == "MPreprocessing" or current_block == "Tokenizer" then
+ cmd.helplines[i] = cmd.helplines[i]
+ end
+ end
+ else
+ if current_block == "MPreprocessing" or current_block == "Tokenizer" then
+ torenameOpts[v.key] = current_block:sub(1,3):lower()
+ v.key="-"..current_block:sub(1,3):lower().."_"..pref..v.key:sub(2)
+ end
+ end
+ end
+
+ cmd.options['-mode'] =
+ {
+ type= "string",
+ key= cmd.options['-mode'].key,
+ default= 'space',
+ help= [[Define how aggressive should the tokenization be. `space` is space-tokenization.]],
+ meta= {
+ enum = {'conservative', 'aggressive', 'space'}
+ }
+ }
+
+ local newOpts = {}
+ for k, v in pairs(cmd.options) do
+ if torenameOpts[k] then
+ cmd.options[k] = nil
+ if dataType == 'monotext' then
+ local ksrc = '-'..torenameOpts[k]..'_'..k:sub(2)
+ newOpts[ksrc] = onmt.utils.Table.deepCopy(v)
+ elseif dataType == 'bitext' then
+ local ksrc = '-'..torenameOpts[k]..'_src_'..k:sub(2)
+ newOpts[ksrc] = onmt.utils.Table.deepCopy(v)
+ end
+ if dataType ~= 'monotext' then
+ local ktgt = '-'..torenameOpts[k]..'_tgt_'..k:sub(2)
+ newOpts[ktgt] = onmt.utils.Table.deepCopy(v)
+ end
+ end
+ end
+ for k, v in pairs(newOpts) do
+ cmd.options[k] = v
+ end
+end
function Translator:__init(args, model, dicts)
self.args = args