Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-12-07 00:12:24 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-12-07 00:12:24 +0400
commit8fdec9bf3059e388b267da06b4d5a0ca67615df7 (patch)
tree1a4a8f18824c786dcb1662ee8be3643f606ea4e8 /mert/BleuScorer.cpp
parent6c04c4ad9c1fa9b184a3ad95c8e7b75bcb5c92c3 (diff)
Use boost::unordered_map instead of std::map.
For storing the word vocabulary used in computation of BLEU scores. This change will reduce the running time of extractor about 2-3 seconds (9% reduction).
Diffstat (limited to 'mert/BleuScorer.cpp')
-rw-r--r--mert/BleuScorer.cpp16
1 files changed, 13 insertions, 3 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index a3ba16b13..8fb814390 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -50,11 +50,21 @@ BleuScorer::BleuScorer(const string& config)
BleuScorer::~BleuScorer() {}
size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts,
- unsigned int n)
+ unsigned int n, bool is_testing)
{
assert(n > 0);
vector<int> encoded_tokens;
- TokenizeAndEncode(line, encoded_tokens);
+
+ // When performing tokenization of a hypothesis translation, we don't have
+ // to update the Scorer's word vocabulary. However, the tokenization of
+ // reference translations requires modifying the vocabulary, which means
+ // this procedure might be slower than the tokenization the hypothesis
+ // translation.
+ if (is_testing) {
+ TokenizeAndEncodeTesting(line, encoded_tokens);
+ } else {
+ TokenizeAndEncode(line, encoded_tokens);
+ }
for (size_t k = 1; k <= n; ++k) {
//ngram order longer than sentence - no point
if (k > encoded_tokens.size()) {
@@ -147,7 +157,7 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
// stats for this line
vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
string sentence = preprocessSentence(text);
- const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder);
+ const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder, true);
const int reference_len = CalcReferenceLength(sid, length);
stats.push_back(reference_len);