Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-12-07 03:45:47 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-12-07 03:45:47 +0400
commit2a3c9fc6797c97595f5f6bda7dab0b17ce758afc (patch)
tree3f4f28a35bd8dd6cec7d18eb0e185bf8950b3172 /mert/BleuScorer.cpp
parent8fdec9bf3059e388b267da06b4d5a0ca67615df7 (diff)
Further optimization for extractor.
Fixes inefficient updating N-gram counts. NOTE: Using '--binary' option (this option is not enabled by default yet) for saving outputs would lead to significant speed up.
Diffstat (limited to 'mert/BleuScorer.cpp')
-rw-r--r--mert/BleuScorer.cpp15
1 files changed, 9 insertions, 6 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 8fb814390..1adbd0276 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -65,21 +65,24 @@ size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts,
} else {
TokenizeAndEncode(line, encoded_tokens);
}
+ const size_t len = encoded_tokens.size();
+ vector<int> ngram;
+
for (size_t k = 1; k <= n; ++k) {
//ngram order longer than sentence - no point
- if (k > encoded_tokens.size()) {
+ if (k > len) {
continue;
}
- for (size_t i = 0; i < encoded_tokens.size()-k+1; ++i) {
- vector<int> ngram;
- ngram.reserve(encoded_tokens.size());
- for (size_t j = i; j < i+k && j < encoded_tokens.size(); ++j) {
+ for (size_t i = 0; i < len - k + 1; ++i) {
+ ngram.clear();
+ ngram.reserve(len);
+ for (size_t j = i; j < i+k && j < len; ++j) {
ngram.push_back(encoded_tokens[j]);
}
counts.Add(ngram);
}
}
- return encoded_tokens.size();
+ return len;
}
void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)