diff options
author | Jean Senellart <jean@senellart.com> | 2017-12-10 15:55:29 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-10 15:55:29 +0300 |
commit | 8247820ae68249ec0bcb9736a6d90ac667daaaa2 (patch) | |
tree | a2dde4383beabda982e5502797e00babddd52765 /onmt | |
parent | dce6a169719ddba649f60cf7e7ec7b6da91b41bc (diff) |
Fix450 (#454)
* fix #450 - fully integrate hook in translate.lua and drop normalizer
Diffstat (limited to 'onmt')
-rw-r--r-- | onmt/translate/Translator.lua | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/onmt/translate/Translator.lua b/onmt/translate/Translator.lua index dd6059f0..5a442c6c 100644 --- a/onmt/translate/Translator.lua +++ b/onmt/translate/Translator.lua @@ -133,6 +133,61 @@ function Translator.declareOpts(cmd) cmd:setCmdLineOptions(options, 'Translator') end +function Translator.expandOpts(cmd, dataType) + local torenameOpts = {}; + local current_block; + local pref = "{src,tgt}_" + if dataType == "monotext" then pref = "" end + if dataType == "feattext" then pref = "tgt_" end + for i, v in ipairs(cmd.helplines) do + if type(v) == "string" then + local p = v:find(" options") + if p then + current_block = v:sub(1,p-1); + if current_block == "MPreprocessing" or current_block == "Tokenizer" then + cmd.helplines[i] = cmd.helplines[i] + end + end + else + if current_block == "MPreprocessing" or current_block == "Tokenizer" then + torenameOpts[v.key] = current_block:sub(1,3):lower() + v.key="-"..current_block:sub(1,3):lower().."_"..pref..v.key:sub(2) + end + end + end + + cmd.options['-mode'] = + { + type= "string", + key= cmd.options['-mode'].key, + default= 'space', + help= [[Define how aggressive should the tokenization be. `space` is space-tokenization.]], + meta= { + enum = {'conservative', 'aggressive', 'space'} + } + } + + local newOpts = {} + for k, v in pairs(cmd.options) do + if torenameOpts[k] then + cmd.options[k] = nil + if dataType == 'monotext' then + local ksrc = '-'..torenameOpts[k]..'_'..k:sub(2) + newOpts[ksrc] = onmt.utils.Table.deepCopy(v) + elseif dataType == 'bitext' then + local ksrc = '-'..torenameOpts[k]..'_src_'..k:sub(2) + newOpts[ksrc] = onmt.utils.Table.deepCopy(v) + end + if dataType ~= 'monotext' then + local ktgt = '-'..torenameOpts[k]..'_tgt_'..k:sub(2) + newOpts[ktgt] = onmt.utils.Table.deepCopy(v) + end + end + end + for k, v in pairs(newOpts) do + cmd.options[k] = v + end +end function Translator:__init(args, model, dicts) self.args = args |