1 files changed, 496 insertions, 0 deletions
diff --git a/moses/src/BitmapContainer.cpp b/moses/src/BitmapContainer.cpp
new file mode 100644
index 000000000..790550ba9
--- /dev/null
+++ b/moses/src/BitmapContainer.cpp
@@ -0,0 +1,496 @@
+// $Id: BitmapContainer.cpp 1924 2008-10-30 18:43:17Z hieuhoang1972 $
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <algorithm>
+#include <limits>
+#include <utility>
+
+#include "BitmapContainer.h"
+#include "HypothesisStackCubePruning.h"
+#include "DummyScoreProducers.h"
+#include "TranslationOptionList.h"
+
+namespace Moses
+{
+
+class HypothesisScoreOrdererNoDistortion
+{
+	public:
+		bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+		{
+			const float scoreA = hypoA->GetScore();
+			const float scoreB = hypoB->GetScore();
+			
+			if (scoreA > scoreB)
+			{
+				return true;
+			}
+			else if (scoreA < scoreB)
+			{
+				return false;
+			}
+			else
+			{
+				return hypoA < hypoB;
+			}
+		}
+};
+
+class HypothesisScoreOrdererWithDistortion
+{
+	public:
+		static const WordsRange *transOptRange; // TODO. HACK!!
+
+		bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
+		{
+			assert (transOptRange != NULL);
+
+			const float weightDistortion = StaticData::Instance().GetWeightDistortion();
+			const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
+			const float distortionScoreA = dsp->CalculateDistortionScore(
+										hypoA->GetCurrSourceWordsRange(),
+										*transOptRange,
+										hypoA->GetWordsBitmap().GetFirstGapPos()
+									 );
+			const float distortionScoreB = dsp->CalculateDistortionScore(
+										hypoB->GetCurrSourceWordsRange(),
+										*transOptRange,
+										hypoB->GetWordsBitmap().GetFirstGapPos()
+									 );
+
+			const float scoreA = hypoA->GetScore() + distortionScoreA * weightDistortion;
+			const float scoreB = hypoB->GetScore() + distortionScoreB * weightDistortion;
+			
+			if (scoreA > scoreB)
+			{
+				return true;
+			}
+			else if (scoreA < scoreB)
+			{
+				return false;
+			}
+			else
+			{
+				return hypoA < hypoB;
+			}
+		}
+
+};
+
+const WordsRange *HypothesisScoreOrdererWithDistortion::transOptRange = NULL;
+
+////////////////////////////////////////////////////////////////////////////////
+// BackwardsEdge Code
+////////////////////////////////////////////////////////////////////////////////
+
+BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
+							 , BitmapContainer &parent
+							 , const TranslationOptionList &translations
+							 , const SquareMatrix &futureScore)
+  : m_initialized(false)
+  , m_prevBitmapContainer(prevBitmapContainer)
+  , m_parent(parent)
+  , m_translations(translations)
+  , m_futurescore(futureScore)
+  , m_seenPosition()
+{
+
+	// If either dimension is empty, we haven't got anything to do.
+	if(m_prevBitmapContainer.GetHypotheses().size() == 0 || m_translations.size() == 0) {
+		VERBOSE(3, "Empty cube on BackwardsEdge" << std::endl);
+		return;
+	}
+
+	// Fetch the things we need for distortion cost computation.
+	int maxDistortion = StaticData::Instance().GetMaxDistortion();
+
+	if (maxDistortion == -1) {
+		for (HypothesisSet::const_iterator iter = m_prevBitmapContainer.GetHypotheses().begin(); iter != m_prevBitmapContainer.GetHypotheses().end(); ++iter)
+		{
+			m_hypotheses.push_back(*iter);
+		}
+		return;
+	}
+
+	const WordsRange &transOptRange = translations.Get(0)->GetSourceWordsRange();
+	const InputType *itype = StaticData::Instance().GetInput();
+
+	HypothesisSet::const_iterator iterHypo = m_prevBitmapContainer.GetHypotheses().begin();
+	HypothesisSet::const_iterator iterEnd = m_prevBitmapContainer.GetHypotheses().end();
+
+	while (iterHypo != iterEnd)
+	{
+		const Hypothesis &hypo = **iterHypo;
+		// Special case: If this is the first hypothesis used to seed the search,
+		// it doesn't have a valid range, and we create the hypothesis, if the
+		// initial position is not further into the sentence than the distortion limit.
+		if (hypo.GetWordsBitmap().GetNumWordsCovered() == 0)
+			{
+				if (transOptRange.GetStartPos() <= maxDistortion)
+					m_hypotheses.push_back(&hypo);
+			}
+		else
+			{
+				int distortionDistance = itype->ComputeDistortionDistance(hypo.GetCurrSourceWordsRange()
+																		, transOptRange);
+
+				if (distortionDistance <= maxDistortion)
+					m_hypotheses.push_back(&hypo);
+			}
+	
+		++iterHypo;
+	}
+
+	if (m_translations.size() > 1)
+	{
+		assert(m_translations.Get(0)->GetFutureScore() >= m_translations.Get(1)->GetFutureScore());
+	}
+
+	if (m_hypotheses.size() > 1)
+	{
+		assert(m_hypotheses[0]->GetTotalScore() >= m_hypotheses[1]->GetTotalScore());
+	}	
+
+	HypothesisScoreOrdererWithDistortion::transOptRange = &transOptRange;
+	std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererWithDistortion());
+
+	// std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());
+}
+
+BackwardsEdge::~BackwardsEdge()
+{
+	m_seenPosition.clear();
+	m_hypotheses.clear();
+}
+
+
+void
+BackwardsEdge::Initialize()
+{
+	if(m_hypotheses.size() == 0 || m_translations.size() == 0)
+	{
+		m_initialized = true;
+		return;
+	}
+
+	Hypothesis *expanded = CreateHypothesis(*m_hypotheses[0], *m_translations.Get(0));
+	m_parent.Enqueue(0, 0, expanded, this);
+	SetSeenPosition(0, 0);
+	m_initialized = true;
+}
+
+Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt) 
+{
+	// create hypothesis and calculate all its scores
+	Hypothesis *newHypo = hypothesis.CreateNext(transOpt, NULL); // TODO FIXME This is absolutely broken - don't pass null here
+
+	// expand hypothesis further if transOpt was linked
+	std::vector<TranslationOption*>::const_iterator iterLinked = transOpt.GetLinkedTransOpts().begin();
+	std::vector<TranslationOption*>::const_iterator iterEnd = transOpt.GetLinkedTransOpts().end();
+
+	while (iterLinked != iterEnd)
+	{
+		const WordsBitmap hypoBitmap = newHypo->GetWordsBitmap();
+		if (hypoBitmap.Overlap((**iterLinked).GetSourceWordsRange())) {
+			// don't want to add a hypothesis that has some but not all of a linked TO set, so return
+			delete newHypo;
+			return NULL;
+		}
+		else
+		{
+			newHypo->CalcScore(m_futurescore);
+			newHypo = newHypo->CreateNext(**iterLinked, NULL); // TODO FIXME This is absolutely broken - don't pass null here
+		}
+
+		++iterLinked;
+	}
+
+	newHypo->CalcScore(m_futurescore);
+	
+	return newHypo;
+}
+
+bool
+BackwardsEdge::SeenPosition(const size_t x, const size_t y)
+{
+  std::set< int >::iterator iter = m_seenPosition.find((x<<16) + y);
+	return (iter != m_seenPosition.end());
+}
+
+void
+BackwardsEdge::SetSeenPosition(const size_t x, const size_t y)
+{
+  assert(x < (1<<17));
+  assert(y < (1<<17));
+
+	m_seenPosition.insert((x<<16) + y);
+}
+
+
+bool
+BackwardsEdge::GetInitialized()
+{
+	return m_initialized;
+}
+
+const BitmapContainer&
+BackwardsEdge::GetBitmapContainer() const
+{
+	return m_prevBitmapContainer;
+}
+
+void
+BackwardsEdge::PushSuccessors(const size_t x, const size_t y)
+{
+	Hypothesis *newHypo;
+	
+	if(y + 1 < m_translations.size() && !SeenPosition(x, y + 1)) {
+		SetSeenPosition(x, y + 1);
+		newHypo = CreateHypothesis(*m_hypotheses[x], *m_translations.Get(y + 1));
+		if(newHypo != NULL)
+		{
+			m_parent.Enqueue(x, y + 1, newHypo, (BackwardsEdge*)this);
+		}
+	}
+
+	if(x + 1 < m_hypotheses.size() && !SeenPosition(x + 1, y)) {
+	  SetSeenPosition(x + 1, y);
+		newHypo = CreateHypothesis(*m_hypotheses[x + 1], *m_translations.Get(y));
+		if(newHypo != NULL)
+		{
+			m_parent.Enqueue(x + 1, y, newHypo, (BackwardsEdge*)this);
+		}
+	}	
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// BitmapContainer Code
+////////////////////////////////////////////////////////////////////////////////
+
+BitmapContainer::BitmapContainer(const WordsBitmap &bitmap
+																 , HypothesisStackCubePruning &stack)
+  : m_bitmap(bitmap)
+  , m_stack(stack)
+	, m_numStackInsertions(0)
+{
+	m_hypotheses = HypothesisSet();
+	m_edges = BackwardsEdgeSet();
+	m_queue = HypothesisQueue();
+}
+
+BitmapContainer::~BitmapContainer()
+{
+	// As we have created the square position objects we clean up now.
+	HypothesisQueueItem *item = NULL;
+
+	while (!m_queue.empty())
+	{
+		item = m_queue.top();
+		FREEHYPO(item->GetHypothesis());
+		delete item;
+		m_queue.pop();
+	}
+
+	// Delete all edges.
+	RemoveAllInColl(m_edges);
+
+	m_hypotheses.clear();
+	m_edges.clear();
+}
+
+
+void
+BitmapContainer::Enqueue(int hypothesis_pos
+												 , int translation_pos
+												 , Hypothesis *hypothesis
+												 , BackwardsEdge *edge)
+{
+	HypothesisQueueItem *item = new HypothesisQueueItem(hypothesis_pos
+																										  , translation_pos
+																										  , hypothesis
+																											, edge);
+	m_queue.push(item);
+}
+
+HypothesisQueueItem*
+BitmapContainer::Dequeue(bool keepValue)
+{
+	if (!m_queue.empty())
+	{
+		HypothesisQueueItem *item = m_queue.top();
+
+		if (!keepValue)
+		{
+			m_queue.pop();
+		}
+
+		return item;
+	}
+
+	return NULL;
+}
+
+HypothesisQueueItem*
+BitmapContainer::Top() const
+{
+	return m_queue.top();
+}
+
+size_t
+BitmapContainer::Size()
+{
+	return m_queue.size();
+}
+
+bool
+BitmapContainer::Empty() const
+{
+	return m_queue.empty();
+}
+
+
+const WordsBitmap&
+BitmapContainer::GetWordsBitmap()
+{
+	return m_bitmap;
+}
+
+const HypothesisSet&
+BitmapContainer::GetHypotheses() const
+{
+	return m_hypotheses;
+}
+
+size_t
+BitmapContainer::GetHypothesesSize() const
+{
+	return m_hypotheses.size();
+}
+
+const BackwardsEdgeSet&
+BitmapContainer::GetBackwardsEdges()
+{
+	return m_edges;
+}
+
+void
+BitmapContainer::AddHypothesis(Hypothesis *hypothesis)
+{
+	bool itemExists = false;
+	HypothesisSet::const_iterator iter = m_hypotheses.begin();
+	HypothesisSet::const_iterator iterEnd = m_hypotheses.end();
+
+	// cfedermann: do we actually need this check?
+	while (iter != iterEnd)
+	{
+		if (*iter == hypothesis) {
+			itemExists = true;
+			break;
+		}
+
+		++iter;
+	}
+	assert(itemExists == false);
+	m_hypotheses.push_back(hypothesis);
+}
+
+void
+BitmapContainer::AddBackwardsEdge(BackwardsEdge *edge)
+{
+	m_edges.insert(edge);
+}
+
+void
+BitmapContainer::InitializeEdges()
+{
+	BackwardsEdgeSet::iterator iter = m_edges.begin();
+	BackwardsEdgeSet::iterator iterEnd = m_edges.end();
+
+	while (iter != iterEnd)
+	{
+		BackwardsEdge *edge = *iter;
+		edge->Initialize();
+
+		++iter;
+	}
+}
+
+void
+BitmapContainer::EnsureMinStackHyps(const size_t minNumHyps)
+{
+	while ((!Empty()) && m_numStackInsertions < minNumHyps)
+	{
+		ProcessBestHypothesis();
+	}
+}
+
+void
+BitmapContainer::ProcessBestHypothesis()
+{
+	if (m_queue.empty())
+		{
+			return;
+		}
+
+	// Get the currently best hypothesis from the queue.
+	HypothesisQueueItem *item = Dequeue();
+		
+	// If the priority queue is exhausted, we are done and should have exited
+	assert(item != NULL);
+		
+	// check we are pulling things off of priority queue in right order
+	if (!Empty())
+		{
+			HypothesisQueueItem *check = Dequeue(true);
+			assert(item->GetHypothesis()->GetTotalScore() >= check->GetHypothesis()->GetTotalScore());
+		}
+
+	// Logging for the criminally insane
+	IFVERBOSE(3) {
+		//		const StaticData &staticData = StaticData::Instance();
+		item->GetHypothesis()->PrintHypothesis();
+	}
+
+	// Add best hypothesis to hypothesis stack.
+	const bool newstackentry = m_stack.AddPrune(item->GetHypothesis());	
+	if (newstackentry)
+		m_numStackInsertions++;
+
+	IFVERBOSE(3) {
+		TRACE_ERR("new stack entry flag is " << newstackentry << std::endl);
+	}
+
+	// Create new hypotheses for the two successors of the hypothesis just added.
+	item->GetBackwardsEdge()->PushSuccessors(item->GetHypothesisPos(), item->GetTranslationPos());
+
+	// We are done with the queue item, we delete it.
+	delete item;
+}
+
+void
+BitmapContainer::SortHypotheses()
+{
+	std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrderer());
+}
+
+}
+