Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-02-28 13:54:11 +0300
committerTaku Kudo <taku@google.com>2018-02-28 13:54:11 +0300
commit45b4527117c5bf52b9bb14c33de9ec7facae9c93 (patch)
tree3f185406804145defb1a8622c6347f33cd5931de /src/sentencepiece_trainer_test.cc
parentc6a1a196651789ba4c0334dbf41d5885b3334b2f (diff)
Added SentencePieceTrainer class
Diffstat (limited to 'src/sentencepiece_trainer_test.cc')
-rw-r--r--src/sentencepiece_trainer_test.cc50
1 files changed, 50 insertions, 0 deletions
diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc
new file mode 100644
index 0000000..0b4c12b
--- /dev/null
+++ b/src/sentencepiece_trainer_test.cc
@@ -0,0 +1,50 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.!
+
+#include "sentencepiece_trainer.h"
+#include "sentencepiece_model.pb.h"
+#include "testharness.h"
+
+namespace sentencepiece {
+namespace {
+
+TEST(SentencePieceTrainerTest, TrainFromArgsTest) {
+ SentencePieceTrainer::Train(
+ "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000");
+ SentencePieceTrainer::Train(
+ "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
+ "--model_type=bpe");
+ SentencePieceTrainer::Train(
+ "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
+ "--model_type=char");
+ SentencePieceTrainer::Train(
+ "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
+ "--model_type=word");
+}
+
+TEST(SentencePieceTrainerTest, TrainWithCustomNormalizationRule) {
+ SentencePieceTrainer::Train(
+ "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
+ "--normalization_rule_tsv=../data/nfkc.tsv");
+}
+
+TEST(SentencePieceTrainerTest, TrainTest) {
+ TrainerSpec trainer_spec;
+ trainer_spec.add_input("../data/botchan.txt");
+ trainer_spec.set_model_prefix("m");
+ trainer_spec.set_vocab_size(1000);
+ SentencePieceTrainer::Train(trainer_spec);
+}
+} // namespace
+} // namespace sentencepiece