diff options
author | Taku Kudo <taku@google.com> | 2018-07-26 10:28:32 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2018-07-26 10:28:32 +0300 |
commit | 5dac4835fa47b2510e07ea0e7bd205b1c99a3835 (patch) | |
tree | 11fccfaafbc159d9bc6ae68e6801c678c879cb96 /src/spm_train_main.cc | |
parent | 9c3ea57a98642249259bcdffa57374954ff8e4ae (diff) |
Added --unk_surface option to allow user to change unknown surface string.
Diffstat (limited to 'src/spm_train_main.cc')
-rw-r--r-- | src/spm_train_main.cc | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc index a9890a1..32e73a0 100644 --- a/src/spm_train_main.cc +++ b/src/spm_train_main.cc @@ -83,6 +83,9 @@ DEFINE_int32(eos_id, kDefaultTrainerSpec.eos_id(), "Override EOS (</s>) id. Set -1 to disable EOS."); DEFINE_int32(pad_id, kDefaultTrainerSpec.pad_id(), "Override PAD (<pad>) id. Set -1 to disable PAD."); +DEFINE_string(unk_surface, kDefaultTrainerSpec.unk_surface(), + "Dummy surface string for <unk>. In decoding <unk> is decoded to " + "`unk_surface`."); int main(int argc, char *argv[]) { sentencepiece::flags::ParseCommandLineFlags(argc, argv); @@ -125,6 +128,7 @@ int main(int argc, char *argv[]) { SetTrainerSpecFromFlag(bos_id); SetTrainerSpecFromFlag(eos_id); SetTrainerSpecFromFlag(pad_id); + SetTrainerSpecFromFlag(unk_surface); SetRepeatedTrainerSpecFromFlag(input); SetRepeatedTrainerSpecFromFlag(accept_language); SetRepeatedTrainerSpecFromFlag(control_symbols); |