diff options
author | Taku Kudo <taku@google.com> | 2017-03-07 13:43:50 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2017-03-07 13:43:50 +0300 |
commit | 2928ce5307224ea4c012fc6cbd7a098c486590b6 (patch) | |
tree | 38b679886855a7a6b80fdc61f2f62c952cf3bfb7 /src/word_model_trainer.h |
Initialize repository
Diffstat (limited to 'src/word_model_trainer.h')
-rw-r--r-- | src/word_model_trainer.h | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/src/word_model_trainer.h b/src/word_model_trainer.h new file mode 100644 index 0000000..672c5ac --- /dev/null +++ b/src/word_model_trainer.h @@ -0,0 +1,39 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License.! + +#ifndef WORD_MODEL_TRAINER_H_ +#define WORD_MODEL_TRAINER_H_ + +#include "sentencepiece_model.pb.h" +#include "trainer_interface.h" + +namespace sentencepiece { +namespace word { + +// Trainer class for word model. +// +// Word model simply counts the frequency of +// space-delimited tokens, then keep top +// |vocab_size| frequent tokens. +class Trainer : public TrainerInterface { + public: + Trainer(const TrainerSpec &trainer_spec, + const NormalizerSpec &normalizer_spec) + : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec) {} + + void Train() override; +}; +} // namespace word +} // namespace sentencepiece +#endif // WORD_MODEL_TRAINER_H_ |