diff options
author | Taku Kudo <taku@google.com> | 2018-04-09 11:47:42 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2018-04-09 11:47:42 +0300 |
commit | d1028974960d9e7ac9b408f6c212aa90d7c958cb (patch) | |
tree | 4cda91a55a068786d91e6d78afb294b494fd9e3c /src/model_interface.cc | |
parent | 8ff70f28bd33368af3a9d7c74b672a1d9bb01095 (diff) |
Support to change ids of <unk>, <s>, </s>
Diffstat (limited to 'src/model_interface.cc')
-rw-r--r-- | src/model_interface.cc | 21 |
1 files changed, 1 insertions, 20 deletions
diff --git a/src/model_interface.cc b/src/model_interface.cc index 05c25d5..d4602ea 100644 --- a/src/model_interface.cc +++ b/src/model_interface.cc @@ -18,8 +18,6 @@ namespace sentencepiece { -const uint32 ModelInterface::kUnkID = 0; - ModelInterface::ModelInterface(const ModelProto &model_proto) : model_proto_(&model_proto) {} ModelInterface::~ModelInterface() {} @@ -33,7 +31,7 @@ int ModelInterface::PieceToId(StringPiece piece) const { if (it2 != pieces_.end()) { return it2->second; } - return kUnkID; + return unk_id_; } int ModelInterface::GetPieceSize() const { @@ -58,23 +56,6 @@ bool ModelInterface::IsUnknown(int id) const { ModelProto::SentencePiece::UNKNOWN); } -void ModelInterface::CheckControlSymbols() const { - CHECK_NOTNULL(model_proto_); - - CHECK_GE(model_proto_->pieces_size(), 3); // <unk>, <s>, </s> - - // Verify reserved control symbols and unknon symbol. - CHECK_EQ(ModelProto::SentencePiece::UNKNOWN, // <unk> - model_proto_->pieces(0).type()); - CHECK_EQ("<unk>", model_proto_->pieces(0).piece()); - CHECK_EQ(ModelProto::SentencePiece::CONTROL, // <s> - model_proto_->pieces(1).type()); - CHECK_EQ("<s>", model_proto_->pieces(1).piece()); - CHECK_EQ(ModelProto::SentencePiece::CONTROL, // </s> - model_proto_->pieces(2).type()); - CHECK_EQ("</s>", model_proto_->pieces(2).piece()); -} - std::vector<StringPiece> SplitIntoWords(StringPiece text) { const char *begin = text.data(); const char *end = text.data() + text.size(); |