diff options
Diffstat (limited to 'src/unigram_model_trainer.cc')
-rw-r--r-- | src/unigram_model_trainer.cc | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc index 5f26771..e5dc8c0 100644 --- a/src/unigram_model_trainer.cc +++ b/src/unigram_model_trainer.cc @@ -121,7 +121,11 @@ TrainerModel::SentencePieces Trainer::MakeSeedSentencePieces() const { } } + CHECK_LE(array.size(), + static_cast<size_t>(std::numeric_limits<node_int_type>::max())) + << "Input corpus too large, try with train_extremely_large_corpus=true"; const node_int_type n = array.size(); + std::vector<node_int_type> SA(n); // suffix array std::vector<node_int_type> L(n); // left boundaries of internal node std::vector<node_int_type> R(n); // right boundaries of internal node |