diff options
author | Rico Sennrich <rico.sennrich@gmx.ch> | 2014-03-21 14:53:15 +0400 |
---|---|---|
committer | Rico Sennrich <rico.sennrich@gmx.ch> | 2014-03-21 15:12:24 +0400 |
commit | 45630a5851fad3bdd6953e88727e4b7c8d0c4c18 (patch) | |
tree | cd95a3175eb7fc4bb1027cf92d2fa880112b3fee /moses/Incremental.cpp | |
parent | 1c6061e78174d09ea4a7a8125ee0df7f41d88ae5 (diff) |
various optimizations to make CYK+ parser several times faster and eat less memory.
speed-up of decoding depends on how much time is spent in parser:
10-50% speed-up for string-to-tree systems observed (more on long sentences and with high max-chart-span).
if you only use hiero or string-to-tree models (but none with source syntax), use compile-option --unlabelled-source for (small) efficiency gains.
Diffstat (limited to 'moses/Incremental.cpp')
-rw-r--r-- | moses/Incremental.cpp | 28 |
1 files changed, 24 insertions, 4 deletions
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index 5cce4614b..2ee98aade 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -50,7 +50,10 @@ public: void FinishedSearch() { for (ChartCellLabelSet::iterator i(out_.mutable_begin()); i != out_.mutable_end(); ++i) { - ChartCellLabel::Stack &stack = i->second.MutableStack(); + if ((*i) == NULL) { + continue; + } + ChartCellLabel::Stack &stack = (*i)->MutableStack(); Gen *gen = static_cast<Gen*>(stack.incr_generator); gen->FinishedSearch(); stack.incr = &gen->Generating(); @@ -80,6 +83,8 @@ public: void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range); + float CalcEstimateOfBestScore(const TargetPhraseCollection & tpc, const StackVec & stackVec) const; + bool Empty() const { return edges_.Empty(); } @@ -112,7 +117,7 @@ private: const search::Score oov_weight_; }; -template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &) +template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &range) { std::vector<search::PartialVertex> vertices; vertices.reserve(nts.size()); @@ -173,6 +178,17 @@ template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std: edges_.AddEdge(edge); } +// for early pruning +template <class Model> float Fill<Model>::CalcEstimateOfBestScore(const TargetPhraseCollection &targets, const StackVec &nts) const +{ + float below_score = 0.0; + for (StackVec::const_iterator i = nts.begin(); i != nts.end(); ++i) { + below_score += (*i)->GetStack().incr->RootAlternate().Bound(); + } + const TargetPhrase &targetPhrase = **(targets.begin()); + return targetPhrase.GetFutureScore() + below_score; +} + // TODO: factors (but chart doesn't seem to support factors anyway). template <class Model> lm::WordIndex Fill<Model>::Convert(const Word &word) const { @@ -209,8 +225,12 @@ template <class Model, class Best> search::History Manager::PopulateBest(const M size_t size = source_.GetSize(); boost::object_pool<search::Vertex> vertex_pool(std::max<size_t>(size * size / 2, 32)); - for (size_t width = 1; width < size; ++width) { - for (size_t startPos = 0; startPos <= size-width; ++startPos) { + for (int startPos = size-1; startPos >= 0; --startPos) { + for (size_t width = 1; width <= size-startPos; ++width) { + // full range uses RootSearch + if (startPos == 0 && startPos + width == size) { + break; + } WordsRange range(startPos, startPos + width - 1); Fill<Model> filler(context, words, oov_weight); parser_.Create(range, filler); |