ChartKBestExtractor: fix memory leak, clean-up code

author: Phil Williams <philip.williams@mac.com> 2014-04-19 13:29:41 +0400
committer: Phil Williams <philip.williams@mac.com> 2014-04-19 13:29:41 +0400
commit: 568685cb66287dc0af72315df5095567a1854853 (patch)
tree: a2ca524360f66149f8cd6b93e5c5176dffa7632a /moses/ChartKBestExtractor.cpp
parent: 00a2bd395aa0f8e5b9d36dfce3651ecf55cbf234 (diff)
1 files changed, 88 insertions, 66 deletions
diff --git a/moses/ChartKBestExtractor.cpp b/moses/ChartKBestExtractor.cpp
index 05f8920c5..72a894ba7 100644
--- a/moses/ChartKBestExtractor.cpp
+++ b/moses/ChartKBestExtractor.cpp
@@ -32,52 +32,48 @@ namespace Moses
 
 // Extract the k-best list from the search graph.
 void ChartKBestExtractor::Extract(
-    const std::vector<const ChartHypothesis*> &topHypos, std::size_t k,
+    const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
     KBestVec &kBestList)
 {
-  typedef std::vector<const ChartHypothesis*> HypoVec;
-
   kBestList.clear();
-  if (topHypos.empty()) {
+  if (topLevelHypos.empty()) {
     return;
   }
 
-  // Create a new top-level ChartHypothesis that has the best hypothesis as its
-  // predecessor.  This is the search hypergraph's target vertex.
-  HypoVec::const_iterator iter = topHypos.begin();
+  // Create a new ChartHypothesis object, supremeHypo, that has the best
+  // top-level hypothesis as its predecessor and has the same score.
+  std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
+  const ChartHypothesis &bestTopLevelHypo = **p;
   boost::scoped_ptr<ChartHypothesis> supremeHypo(
-    new ChartHypothesis(**iter, *this));
+      new ChartHypothesis(bestTopLevelHypo, *this));
 
   // Do the same for each alternative top-level hypothesis, but add the new
   // ChartHypothesis objects as arcs from supremeHypo, as if they had been
   // recombined.
-  float prevScore = (*iter)->GetTotalScore();
-  for (++iter; iter != topHypos.end(); ++iter) {
-    // Check that the first item in topHypos really was the best.
-    UTIL_THROW_IF2((*iter)->GetTotalScore() <= prevScore,
-                   "top-level vertices are not correctly sorted");
+  for (++p; p != topLevelHypos.end(); ++p) {
+    // Check that the first item in topLevelHypos really was the best.
+    UTIL_THROW_IF2((*p)->GetTotalScore() <= bestTopLevelHypo.GetTotalScore(),
+                   "top-level hypotheses are not correctly sorted");
     // Note: there's no need for a smart pointer here: supremeHypo will take
     // ownership of altHypo.
-    ChartHypothesis *altHypo = new ChartHypothesis(**iter, *this);
+    ChartHypothesis *altHypo = new ChartHypothesis(**p, *this);
     supremeHypo->AddArc(altHypo);
   }
 
-  // Create the target vertex corresponding to supremeHypo then generate
-  // it's k-best list.
-  boost::shared_ptr<Vertex> top = FindOrCreateVertex(*supremeHypo);
-  LazyKthBest(*top, k, k);
+  // Create the target vertex then lazily fill its k-best list.
+  boost::shared_ptr<Vertex> targetVertex = FindOrCreateVertex(*supremeHypo);
+  LazyKthBest(*targetVertex, k, k);
 
   // Copy the k-best list from the target vertex, but drop the top edge from
   // each derivation.
-  kBestList.reserve(top->kBestList.size());
-  for (KBestVec::const_iterator p = top->kBestList.begin();
-       p != top->kBestList.end(); ++p) {
-    const Derivation &d = **p;
-    assert(d.edge.tail.size() == 1);  // d should have exactly one predecessor.
-    assert(d.backPointers.size() == 1);
-    std::size_t i = d.backPointers[0];
-    boost::shared_ptr<Derivation> pred = d.edge.tail[0]->kBestList[i];
-    kBestList.push_back(pred);
+  kBestList.reserve(targetVertex->kBestList.size());
+  for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
+        q = targetVertex->kBestList.begin();
+        q != targetVertex->kBestList.end(); ++q) {
+    const boost::shared_ptr<Derivation> d(*q);
+    assert(d);
+    assert(d->subderivations.size() == 1);
+    kBestList.push_back(d->subderivations[0]);
   }
 }
 
@@ -96,8 +92,7 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
     const Word &word = phrase.GetWord(pos);
     if (word.IsNonTerminal()) {
       std::size_t nonTermInd = nonTermIndexMap[pos];
-      const Derivation &subderivation =
-        *d.edge.tail[nonTermInd]->kBestList[d.backPointers[nonTermInd]];
+      const Derivation &subderivation = *d.subderivations[nonTermInd];
       Phrase subPhrase = GetOutputPhrase(subderivation);
       ret.Append(subPhrase);
     } else {
@@ -142,26 +137,6 @@ ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
   return edge;
 }
 
-void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
-{
-  // Create a derivation for v's best incoming edge.
-  UnweightedHyperarc bestEdge = CreateEdge(v.hypothesis);
-  boost::shared_ptr<Derivation> d(new Derivation(bestEdge));
-  v.candidates.push(d);
-  v.seen.insert(d);
-  // Create derivations for the rest of v's incoming edges.
-  const ChartArcList *arcList = v.hypothesis.GetArcList();
-  if (arcList) {
-    for (std::size_t i = 0; i < arcList->size(); ++i) {
-      const ChartHypothesis &recombinedHypo = *(*arcList)[i];
-      UnweightedHyperarc edge = CreateEdge(recombinedHypo);
-      boost::shared_ptr<Derivation> d(new Derivation(edge));
-      v.candidates.push(d);
-      v.seen.insert(d);
-    }
-  }
-}
-
 // Look for the vertex corresponding to a given ChartHypothesis, creating
 // a new one if necessary.
 boost::shared_ptr<ChartKBestExtractor::Vertex>
@@ -174,14 +149,51 @@ ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
     return sp;  // Vertex was already in m_vertexMap.
   }
   sp.reset(new Vertex(h));
+  // Create the 1-best derivation and add it to the vertex's kBestList.
+  UnweightedHyperarc bestEdge;
+  bestEdge.head = sp;
+  const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
+  bestEdge.tail.resize(prevHypos.size());
+  for (std::size_t i = 0; i < prevHypos.size(); ++i) {
+    const ChartHypothesis *prevHypo = prevHypos[i];
+    bestEdge.tail[i] = FindOrCreateVertex(*prevHypo);
+  }
+  boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
+  std::pair<DerivationSet::iterator, bool> q =
+    m_derivations.insert(bestDerivation);
+  assert(q.second);
+  sp->kBestList.push_back(bestDerivation);
   return sp;
 }
 
+// Create the 1-best derivation for each edge in BS(v) (except the best one)
+// and add it to v's candidate queue.
+void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
+{
+  // Create derivations for all of v's incoming edges except the best.  This
+  // means everything in v.hypothesis.GetArcList() and not the edge defined
+  // by v.hypothesis itself.  The 1-best derivation for that edge will already
+  // have been created.
+  const ChartArcList *arcList = v.hypothesis.GetArcList();
+  if (arcList) {
+    for (std::size_t i = 0; i < arcList->size(); ++i) {
+      const ChartHypothesis &recombinedHypo = *(*arcList)[i];
+      boost::shared_ptr<Vertex> w = FindOrCreateVertex(recombinedHypo);
+      assert(w->kBestList.size() == 1);
+      v.candidates.push(w->kBestList[0]);
+    }
+  }
+}
+
+// Lazily fill v's k-best list.
 void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
                                       std::size_t globalK)
 {
   // If this is the first visit to vertex v then initialize the priority queue.
   if (v.visited == false) {
+    // The 1-best derivation should already be in v's k-best list.
+    assert(v.kBestList.size() == 1);
+    // Initialize v's priority queue.
     GetCandidates(v, globalK);
     v.visited = true;
   }
@@ -191,49 +203,57 @@ void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
     if (!v.kBestList.empty()) {
       // Update the priority queue by adding the successors of the last
       // derivation (unless they've been seen before).
-      const Derivation &d = *v.kBestList.back();
-      LazyNext(v, d, globalK);
+      boost::shared_ptr<Derivation> d(v.kBestList.back());
+      LazyNext(v, *d, globalK);
     }
     // Check if there are any derivations left in the queue.
     if (v.candidates.empty()) {
       break;
     }
     // Get the next best derivation and delete it from the queue.
-    boost::shared_ptr<Derivation> d = v.candidates.top();
+    boost::weak_ptr<Derivation> d = v.candidates.top();
     v.candidates.pop();
     // Add it to the k-best list.
     v.kBestList.push_back(d);
   }
 }
 
+// Create the neighbours of Derivation d and add them to v's candidate queue.
 void ChartKBestExtractor::LazyNext(Vertex &v, const Derivation &d,
                                    std::size_t globalK)
 {
-  // Create the neighbours of Derivation d.
-  for (std::size_t i = 0; i < d.backPointers.size(); ++i) {
-    Vertex &predVertex = *d.edge.tail[i];
-    // Ensure that predVertex's k-best list contains enough derivations.
+  for (std::size_t i = 0; i < d.edge.tail.size(); ++i) {
+    Vertex &pred = *d.edge.tail[i];
+    // Ensure that pred's k-best list contains enough derivations.
     std::size_t k = d.backPointers[i] + 2;
-    LazyKthBest(predVertex, k, globalK);
-    if (predVertex.kBestList.size() < k) {
-      // predVertex's derivations have been exhausted.
+    LazyKthBest(pred, k, globalK);
+    if (pred.kBestList.size() < k) {
+      // pred's derivations have been exhausted.
       continue;
     }
     // Create the neighbour.
     boost::shared_ptr<Derivation> next(new Derivation(d, i));
     // Check if it has been created before.
-    std::pair<Vertex::DerivationSet::iterator, bool> p = v.seen.insert(next);
+    std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next);
     if (p.second) {
       v.candidates.push(next);  // Haven't previously seen it.
     }
   }
 }
 
-// Construct a Derivation corresponding to a ChartHypothesis.
+// Construct the 1-best Derivation that ends at edge e.
 ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
 {
   edge = e;
-  backPointers.resize(edge.tail.size(), 0);
+  std::size_t arity = edge.tail.size();
+  backPointers.resize(arity, 0);
+  subderivations.reserve(arity);
+  for (std::size_t i = 0; i < arity; ++i) {
+    const Vertex &pred = *edge.tail[i];
+    assert(pred.kBestList.size() == 1);
+    boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
+    subderivations.push_back(sub);
+  }
   scoreBreakdown = edge.head->hypothesis.GetScoreBreakdown();
   score = edge.head->hypothesis.GetTotalScore();
 }
@@ -244,14 +264,16 @@ ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
   edge.head = d.edge.head;
   edge.tail = d.edge.tail;
   backPointers = d.backPointers;
+  subderivations = d.subderivations;
   std::size_t j = ++backPointers[i];
   scoreBreakdown = d.scoreBreakdown;
   // Deduct the score of the old subderivation.
-  const Derivation &oldSubderivation = *(edge.tail[i]->kBestList[j-1]);
-  scoreBreakdown.MinusEquals(oldSubderivation.scoreBreakdown);
+  scoreBreakdown.MinusEquals(subderivations[i]->scoreBreakdown);
+  // Update the subderivation pointer.
+  boost::shared_ptr<Derivation> newSub(edge.tail[i]->kBestList[j]);
+  subderivations[i] = newSub;
   // Add the score of the new subderivation.
-  const Derivation &newSubderivation = *(edge.tail[i]->kBestList[j]);
-  scoreBreakdown.PlusEquals(newSubderivation.scoreBreakdown);
+  scoreBreakdown.PlusEquals(subderivations[i]->scoreBreakdown);
   score = scoreBreakdown.GetWeightedScore();
 }
author	Phil Williams <philip.williams@mac.com>	2014-04-19 13:29:41 +0400
committer	Phil Williams <philip.williams@mac.com>	2014-04-19 13:29:41 +0400
commit	568685cb66287dc0af72315df5095567a1854853 (patch)
tree	a2ca524360f66149f8cd6b93e5c5176dffa7632a /moses/ChartKBestExtractor.cpp
parent	00a2bd395aa0f8e5b9d36dfce3651ecf55cbf234 (diff)