Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-07-26 10:28:32 +0300
committerTaku Kudo <taku@google.com>2018-07-26 10:28:32 +0300
commit5dac4835fa47b2510e07ea0e7bd205b1c99a3835 (patch)
tree11fccfaafbc159d9bc6ae68e6801c678c879cb96 /src/spm_train_main.cc
parent9c3ea57a98642249259bcdffa57374954ff8e4ae (diff)
Added --unk_surface option to allow user to change unknown surface string.
Diffstat (limited to 'src/spm_train_main.cc')
-rw-r--r--src/spm_train_main.cc4
1 files changed, 4 insertions, 0 deletions
diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc
index a9890a1..32e73a0 100644
--- a/src/spm_train_main.cc
+++ b/src/spm_train_main.cc
@@ -83,6 +83,9 @@ DEFINE_int32(eos_id, kDefaultTrainerSpec.eos_id(),
"Override EOS (</s>) id. Set -1 to disable EOS.");
DEFINE_int32(pad_id, kDefaultTrainerSpec.pad_id(),
"Override PAD (<pad>) id. Set -1 to disable PAD.");
+DEFINE_string(unk_surface, kDefaultTrainerSpec.unk_surface(),
+ "Dummy surface string for <unk>. In decoding <unk> is decoded to "
+ "`unk_surface`.");
int main(int argc, char *argv[]) {
sentencepiece::flags::ParseCommandLineFlags(argc, argv);
@@ -125,6 +128,7 @@ int main(int argc, char *argv[]) {
SetTrainerSpecFromFlag(bos_id);
SetTrainerSpecFromFlag(eos_id);
SetTrainerSpecFromFlag(pad_id);
+ SetTrainerSpecFromFlag(unk_surface);
SetRepeatedTrainerSpecFromFlag(input);
SetRepeatedTrainerSpecFromFlag(accept_language);
SetRepeatedTrainerSpecFromFlag(control_symbols);