Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/nplm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRico Sennrich <rico.sennrich@gmx.ch>2015-08-27 16:12:52 +0300
committerRico Sennrich <rico.sennrich@gmx.ch>2015-08-27 16:12:52 +0300
commit55bf63ddd73978b46e19f3d8c5606d5677cf560f (patch)
tree939ef531239b69f2fbcc2558cd58af607eac6d50
parent0612a2f8cc195f0867d5aaa8b8ef05039af3970c (diff)
speed up createMmap
-rw-r--r--src/createMmap.cpp20
1 files changed, 12 insertions, 8 deletions
diff --git a/src/createMmap.cpp b/src/createMmap.cpp
index 408d5fe..16a523f 100644
--- a/src/createMmap.cpp
+++ b/src/createMmap.cpp
@@ -70,24 +70,28 @@ void writeMmap(const string &filename_input,
ifstream training(filename_input.c_str());
data_size_t i = 0;
std::string line;
- std::vector<std::string> ngram;
+ std::string delimiters = " \t";
while (std::getline(training, line)) {
if ((i%10000000)==0) {
std::cerr<<i<<"...";
}
- splitBySpace(line, ngram);
- if (ngram.size() != ngram_size)
+ std::string::size_type startPos = line.find_first_not_of(delimiters, 0);
+ std::string::size_type endPos;
+ size_t j = 0;
+ while (std::string::npos != startPos) {
+ endPos = line.find_first_of(delimiters, startPos);
+ mMapVec->at(i*ngram_size+j) = (int)strtol(line.data() + startPos, NULL, 10);
+ j++;
+ startPos = line.find_first_not_of(delimiters, endPos);
+ }
+ if (j != ngram_size)
{
- std::cerr << "Error: expected " << ngram_size << " fields in instance, found " << ngram.size() << std::endl;
+ std::cerr << "Error: expected " << ngram_size << " fields in instance, found " << j << std::endl;
std::exit(-1);
}
- for (int j=0; j<ngram_size; j++) {
- mMapVec->at(i*ngram_size+j) = boost::lexical_cast<int>(ngram[j]);
- }
-
++i;
}