Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/unigram_model_trainer.cc')
-rw-r--r--src/unigram_model_trainer.cc4
1 files changed, 4 insertions, 0 deletions
diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc
index 5f26771..e5dc8c0 100644
--- a/src/unigram_model_trainer.cc
+++ b/src/unigram_model_trainer.cc
@@ -121,7 +121,11 @@ TrainerModel::SentencePieces Trainer::MakeSeedSentencePieces() const {
}
}
+ CHECK_LE(array.size(),
+ static_cast<size_t>(std::numeric_limits<node_int_type>::max()))
+ << "Input corpus too large, try with train_extremely_large_corpus=true";
const node_int_type n = array.size();
+
std::vector<node_int_type> SA(n); // suffix array
std::vector<node_int_type> L(n); // left boundaries of internal node
std::vector<node_int_type> R(n); // right boundaries of internal node