Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-04-28 20:50:07 +0300
committerTaku Kudo <taku@google.com>2018-04-28 20:50:07 +0300
commitd16531bfb866e2fca246a36316876b934aa427f7 (patch)
tree0215e1b3555b02363b17d425b3c94200d92cb6fd /src/bpe_model.cc
parentbaf5d7a2995018ede996173cdf0febcdf23cba2d (diff)
Uses util::Status to propagate error messages
Diffstat (limited to 'src/bpe_model.cc')
-rw-r--r--src/bpe_model.cc23
1 files changed, 6 insertions, 17 deletions
diff --git a/src/bpe_model.cc b/src/bpe_model.cc
index fe9df73..a23ad85 100644
--- a/src/bpe_model.cc
+++ b/src/bpe_model.cc
@@ -22,28 +22,14 @@ namespace bpe {
Model::Model(const ModelProto &model_proto) {
model_proto_ = &model_proto;
-
- for (int i = 0; i < model_proto_->pieces_size(); ++i) {
- const auto &sp = model_proto_->pieces(i);
- CHECK(!sp.piece().empty());
- if (sp.type() == ModelProto::SentencePiece::NORMAL) {
- CHECK(sp.has_score());
- port::InsertOrDie(&pieces_, sp.piece(), i);
- } else if (sp.type() == ModelProto::SentencePiece::USER_DEFINED) {
- // TODO(taku): implement USER_DEFINED symbol.
- LOG(FATAL) << "User defined symbol is not supported in BPE";
- } else {
- port::InsertOrDie(&reserved_id_map_, sp.piece(), i);
- if (sp.type() == ModelProto::SentencePiece::UNKNOWN) unk_id_ = i;
- }
- }
+ InitializePieces(false /* use_user_defined */);
}
Model::~Model() {}
std::vector<std::pair<StringPiece, int>> Model::Encode(
StringPiece normalized) const {
- if (normalized.empty()) {
+ if (!status().ok() || normalized.empty()) {
return {};
}
@@ -110,7 +96,10 @@ std::vector<std::pair<StringPiece, int>> Model::Encode(
++index;
symbols.emplace_back(s);
}
- CHECK(!symbols.empty());
+
+ if (symbols.empty()) {
+ return {};
+ }
// Lookup all bigrams.
for (size_t i = 1; i < symbols.size(); ++i) {