From 55bf63ddd73978b46e19f3d8c5606d5677cf560f Mon Sep 17 00:00:00 2001 From: Rico Sennrich Date: Thu, 27 Aug 2015 14:12:52 +0100 Subject: speed up createMmap --- src/createMmap.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/createMmap.cpp b/src/createMmap.cpp index 408d5fe..16a523f 100644 --- a/src/createMmap.cpp +++ b/src/createMmap.cpp @@ -70,24 +70,28 @@ void writeMmap(const string &filename_input, ifstream training(filename_input.c_str()); data_size_t i = 0; std::string line; - std::vector ngram; + std::string delimiters = " \t"; while (std::getline(training, line)) { if ((i%10000000)==0) { std::cerr<at(i*ngram_size+j) = (int)strtol(line.data() + startPos, NULL, 10); + j++; + startPos = line.find_first_not_of(delimiters, endPos); + } + if (j != ngram_size) { - std::cerr << "Error: expected " << ngram_size << " fields in instance, found " << ngram.size() << std::endl; + std::cerr << "Error: expected " << ngram_size << " fields in instance, found " << j << std::endl; std::exit(-1); } - for (int j=0; jat(i*ngram_size+j) = boost::lexical_cast(ngram[j]); - } - ++i; } -- cgit v1.2.3