various optimizations to make CYK+ parser several times faster and eat less memory.

speed-up of decoding depends on how much time is spent in parser: 10-50% speed-up for string-to-tree systems observed (more on long sentences and with high max-chart-span). if you only use hiero or string-to-tree models (but none with source syntax), use compile-option --unlabelled-source for (small) efficiency gains.
author: Rico Sennrich <rico.sennrich@gmx.ch> 2014-03-21 14:53:15 +0400
committer: Rico Sennrich <rico.sennrich@gmx.ch> 2014-03-21 15:12:24 +0400
commit: 45630a5851fad3bdd6953e88727e4b7c8d0c4c18 (patch)
tree: cd95a3175eb7fc4bb1027cf92d2fa880112b3fee /moses/Incremental.cpp
parent: 1c6061e78174d09ea4a7a8125ee0df7f41d88ae5 (diff)
1 files changed, 24 insertions, 4 deletions
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp
index 5cce4614b..2ee98aade 100644
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@@ -50,7 +50,10 @@ public:
 
   void FinishedSearch() {
     for (ChartCellLabelSet::iterator i(out_.mutable_begin()); i != out_.mutable_end(); ++i) {
-      ChartCellLabel::Stack &stack = i->second.MutableStack();
+      if ((*i) == NULL) {
+        continue;
+      }
+      ChartCellLabel::Stack &stack = (*i)->MutableStack();
       Gen *gen = static_cast<Gen*>(stack.incr_generator);
       gen->FinishedSearch();
       stack.incr = &gen->Generating();
@@ -80,6 +83,8 @@ public:
 
   void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range);
 
+  float CalcEstimateOfBestScore(const TargetPhraseCollection & tpc, const StackVec & stackVec) const;
+
   bool Empty() const {
     return edges_.Empty();
   }
@@ -112,7 +117,7 @@ private:
   const search::Score oov_weight_;
 };
 
-template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &)
+template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &range)
 {
   std::vector<search::PartialVertex> vertices;
   vertices.reserve(nts.size());
@@ -173,6 +178,17 @@ template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std:
   edges_.AddEdge(edge);
 }
 
+// for early pruning
+template <class Model> float Fill<Model>::CalcEstimateOfBestScore(const TargetPhraseCollection &targets, const StackVec &nts) const
+{
+  float below_score = 0.0;
+  for (StackVec::const_iterator i = nts.begin(); i != nts.end(); ++i) {
+    below_score += (*i)->GetStack().incr->RootAlternate().Bound();
+  }
+  const TargetPhrase &targetPhrase = **(targets.begin());
+  return targetPhrase.GetFutureScore() + below_score;
+}
+
 // TODO: factors (but chart doesn't seem to support factors anyway).
 template <class Model> lm::WordIndex Fill<Model>::Convert(const Word &word) const
 {
@@ -209,8 +225,12 @@ template <class Model, class Best> search::History Manager::PopulateBest(const M
   size_t size = source_.GetSize();
   boost::object_pool<search::Vertex> vertex_pool(std::max<size_t>(size * size / 2, 32));
 
-  for (size_t width = 1; width < size; ++width) {
-    for (size_t startPos = 0; startPos <= size-width; ++startPos) {
+  for (int startPos = size-1; startPos >= 0; --startPos) {
+    for (size_t width = 1; width <= size-startPos; ++width) {
+      // full range uses RootSearch
+      if (startPos == 0 && startPos + width == size) {
+        break;
+      }
       WordsRange range(startPos, startPos + width - 1);
       Fill<Model> filler(context, words, oov_weight);
       parser_.Create(range, filler);
author	Rico Sennrich <rico.sennrich@gmx.ch>	2014-03-21 14:53:15 +0400
committer	Rico Sennrich <rico.sennrich@gmx.ch>	2014-03-21 15:12:24 +0400
commit	45630a5851fad3bdd6953e88727e4b7c8d0c4c18 (patch)
tree	cd95a3175eb7fc4bb1027cf92d2fa880112b3fee /moses/Incremental.cpp
parent	1c6061e78174d09ea4a7a8125ee0df7f41d88ae5 (diff)