From ac045a11c1f0e1d4ae25f0db47061113fc3ae2e1 Mon Sep 17 00:00:00 2001 From: Tetsuo Kiso Date: Thu, 6 Dec 2012 22:08:33 +0900 Subject: Speed up N-gram counts when running extractor. By replacing std::map with boost::unordered_map. Runtime of extractor on 100-best lists of 2679 sentences: Before: real 0m35.314s user 0m34.030s sys 0m1.280s Ater: real 0m26.729s user 0m25.420s sys 0m1.310s --- mert/BleuScorer.cpp | 1 + mert/Ngram.h | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp index ba662680c..a3ba16b13 100644 --- a/mert/BleuScorer.cpp +++ b/mert/BleuScorer.cpp @@ -62,6 +62,7 @@ size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts, } for (size_t i = 0; i < encoded_tokens.size()-k+1; ++i) { vector ngram; + ngram.reserve(encoded_tokens.size()); for (size_t j = i; j < i+k && j < encoded_tokens.size(); ++j) { ngram.push_back(encoded_tokens[j]); } diff --git a/mert/Ngram.h b/mert/Ngram.h index d3e8041a3..811f21f27 100644 --- a/mert/Ngram.h +++ b/mert/Ngram.h @@ -2,12 +2,13 @@ #define MERT_NGRAM_H_ #include -#include #include +#include + namespace MosesTuning { - + /** A simple STL-std::map based n-gram counts. Basically, we provide * typical accessors and mutaors, but we intentionally does not allow * erasing elements. @@ -35,8 +36,8 @@ class NgramCounts { typedef std::vector Key; typedef int Value; - typedef std::map::iterator iterator; - typedef std::map::const_iterator const_iterator; + typedef boost::unordered_map::iterator iterator; + typedef boost::unordered_map::const_iterator const_iterator; NgramCounts() : kDefaultCount(1) { } virtual ~NgramCounts() { } @@ -95,7 +96,7 @@ class NgramCounts { private: const int kDefaultCount; - std::map m_counts; + boost::unordered_map m_counts; }; } -- cgit v1.2.3