Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-04-09 11:47:42 +0300
committerTaku Kudo <taku@google.com>2018-04-09 11:47:42 +0300
commitd1028974960d9e7ac9b408f6c212aa90d7c958cb (patch)
tree4cda91a55a068786d91e6d78afb294b494fd9e3c /src/model_interface.cc
parent8ff70f28bd33368af3a9d7c74b672a1d9bb01095 (diff)
Support to change ids of <unk>, <s>, </s>
Diffstat (limited to 'src/model_interface.cc')
-rw-r--r--src/model_interface.cc21
1 files changed, 1 insertions, 20 deletions
diff --git a/src/model_interface.cc b/src/model_interface.cc
index 05c25d5..d4602ea 100644
--- a/src/model_interface.cc
+++ b/src/model_interface.cc
@@ -18,8 +18,6 @@
namespace sentencepiece {
-const uint32 ModelInterface::kUnkID = 0;
-
ModelInterface::ModelInterface(const ModelProto &model_proto)
: model_proto_(&model_proto) {}
ModelInterface::~ModelInterface() {}
@@ -33,7 +31,7 @@ int ModelInterface::PieceToId(StringPiece piece) const {
if (it2 != pieces_.end()) {
return it2->second;
}
- return kUnkID;
+ return unk_id_;
}
int ModelInterface::GetPieceSize() const {
@@ -58,23 +56,6 @@ bool ModelInterface::IsUnknown(int id) const {
ModelProto::SentencePiece::UNKNOWN);
}
-void ModelInterface::CheckControlSymbols() const {
- CHECK_NOTNULL(model_proto_);
-
- CHECK_GE(model_proto_->pieces_size(), 3); // <unk>, <s>, </s>
-
- // Verify reserved control symbols and unknon symbol.
- CHECK_EQ(ModelProto::SentencePiece::UNKNOWN, // <unk>
- model_proto_->pieces(0).type());
- CHECK_EQ("<unk>", model_proto_->pieces(0).piece());
- CHECK_EQ(ModelProto::SentencePiece::CONTROL, // <s>
- model_proto_->pieces(1).type());
- CHECK_EQ("<s>", model_proto_->pieces(1).piece());
- CHECK_EQ(ModelProto::SentencePiece::CONTROL, // </s>
- model_proto_->pieces(2).type());
- CHECK_EQ("</s>", model_proto_->pieces(2).piece());
-}
-
std::vector<StringPiece> SplitIntoWords(StringPiece text) {
const char *begin = text.data();
const char *end = text.data() + text.size();