Added new class for SearchMultilane-multi

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/lane-multi@2190 1f5c12ca-751b-0410-a591-d2e778427230
author: dowobeha <dowobeha@1f5c12ca-751b-0410-a591-d2e778427230> 2009-02-18 21:38:36 +0300
committer: dowobeha <dowobeha@1f5c12ca-751b-0410-a591-d2e778427230> 2009-02-18 21:38:36 +0300
commit: d52e3f332fbe134365f19363703c0c9bfdc732f5 (patch)
tree: 28a2d87b000dc531f123455d98e498434b12ac1d
parent: 7aff1789c99c6d10dc25bfb71c3e63281dd79857 (diff)
7 files changed, 769 insertions, 39 deletions
diff --git a/moses/SearchMulti.cpp b/moses/SearchMulti.cpp
new file mode 100644
index 000000000..257aace96
--- /dev/null
+++ b/moses/SearchMulti.cpp
@@ -0,0 +1,630 @@
+#include "Timer.h"
+#include "SearchMulti.h"
+
+namespace Moses
+{
+	/**
+	 * Organizing main function
+	 *
+	 * /param source input sentence
+	 * /param transOptColl collection of translation options to be used for this sentence
+	 */
+	SearchMulti::SearchMulti(const InputType &source, const InputType &source1, const TranslationOptionCollection &transOptColl, const TranslationOptionCollection &transOptColl1)
+	:m_source(source)
+	,m_source1(source1)
+	//,m_hypoStackColl((source.GetSize() + 1) * (source1.GetSize() + 1))
+	,m_hypoStackColl((source.GetSize() + 1))
+	,m_initialTargetPhrase(Output)
+	,m_start(clock())
+	,interrupted_flag(0)
+	,m_transOptColl(transOptColl)
+	,m_transOptColl1(transOptColl1)
+	{
+		VERBOSE(1, "Translating: " << m_source <<
+				" ...and also: " << m_source1 << endl);
+		const StaticData &staticData = StaticData::Instance();
+		
+		// only if constraint decoding (having to match a specified output)
+		long sentenceID = source.GetTranslationId();
+		m_constraint = staticData.GetConstrainingPhrase(sentenceID);
+		m_WERLimit = staticData.GetWERLimit();
+		if (m_WERLimit < 0.0f) m_WERUnlimited = true;
+		else m_WERUnlimited = false;
+		
+		// initialize the stacks: create data structure and set limits
+		std::vector < HypothesisStackNormal >::iterator iterStack;
+		for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind)
+		{
+			HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal();
+			sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize(),staticData.GetMinHypoStackDiversity());
+			sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
+			
+			m_hypoStackColl[ind] = sourceHypoColl;
+		}
+	}
+	
+	SearchMulti::~SearchMulti()
+	{
+		RemoveAllInColl(m_hypoStackColl);
+	}
+	
+	/**
+	 * Main decoder loop that translates a sentence by expanding
+	 * hypotheses stack by stack, until the end of the sentence.
+	 */
+	void SearchMulti::ProcessSentence()
+	{
+		/*
+		 if (m_constraint!=NULL && m_WERUnlimited) {
+		 // If attempting constraint decoding with unlimited WER allowed,
+		 //    keep increasing allowed WER until a result is obtained.
+		 for (m_WERLimit=0; GetBestHypothesis()==NULL; m_WERLimit++) {
+		 VERBOSE(1, "WER Limit = " << m_WERLimit << endl);
+		 AttemptProcessSentence();
+		 }
+		 //VERBOSE(1, "WER Limit = " << m_WERLimit << "  GetBestHypothesis()==" << *GetBestHypothesis() << endl);
+		 } else {
+		 AttemptProcessSentence();
+		 }
+		 */
+		AttemptProcessSentence();
+	}
+	
+	
+	void SearchMulti::AttemptProcessSentence()
+	{
+		const StaticData &staticData = StaticData::Instance();
+		SentenceStats &stats = staticData.GetSentenceStats();
+		clock_t t=0; // used to track time for steps
+		
+		// initial seed hypothesis: nothing translated, no words produced
+		Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase);
+		m_hypoStackColl[0]->AddPrune(hypo);
+		
+		// go through each stack
+		std::vector < HypothesisStack* >::iterator iterStack;
+		for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack)
+		{
+			// check if decoding ran out of time
+			double _elapsed_time = GetUserTime();
+			if (_elapsed_time > staticData.GetTimeoutThreshold()){
+				VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
+				interrupted_flag = 1;
+				return;
+			}
+			HypothesisStackNormal &sourceHypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
+			
+			// the stack is pruned before processing (lazy pruning):
+			VERBOSE(3,"processing hypothesis from next stack");
+			IFVERBOSE(2) { t = clock(); }
+			sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
+			VERBOSE(3,std::endl);
+			sourceHypoColl.CleanupArcList();
+			IFVERBOSE(2) { stats.AddTimeStack( clock()-t ); }
+			
+			// go through each hypothesis on the stack and try to expand it
+			HypothesisStackNormal::const_iterator iterHypo;
+			for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)
+			{
+				Hypothesis &hypothesis = **iterHypo;
+				ProcessOneHypothesis(hypothesis); // expand the hypothesis
+			}
+			// some logging
+			IFVERBOSE(2) { OutputHypoStackSize(); }
+			
+			// this stack is fully expanded;
+			actual_hypoStack = &sourceHypoColl;
+		}
+		
+		// some more logging
+		IFVERBOSE(2) { staticData.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
+		VERBOSE(2, staticData.GetSentenceStats());
+	}
+	
+	
+	/** Find all translation options to expand one hypothesis, trigger expansion
+	 * this is mostly a check for overlap with already covered words, and for
+	 * violation of reordering limits.
+	 * \param hypothesis hypothesis to be expanded upon
+	 */
+	void SearchMulti::ProcessOneHypothesis(const Hypothesis &hypothesis)
+	{
+		// since we check for reordering limits, its good to have that limit handy
+		int maxDistortion = StaticData::Instance().GetMaxDistortion();
+		bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput;
+		
+		// no limit of reordering: only check for overlap
+		if (maxDistortion < 0)
+		{
+			const WordsBitmap hypoBitmap	= hypothesis.GetWordsBitmap();
+			const size_t hypoFirstGapPos	= hypoBitmap.GetFirstGapPos()
+			, sourceSize			= m_source.GetSize();
+			
+			for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
+			{
+				size_t maxSize = sourceSize - startPos;
+				size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+				maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+				
+				for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
+				{
+					// basic checks
+				    // there have to be translation options
+					if (m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos)).size() == 0 ||
+						// no overlap with existing words
+						hypoBitmap.Overlap(WordsRange(startPos, endPos)) ||
+						// specified reordering constraints (set with -monotone-at-punctuation or xml)
+						!m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) )
+					{
+						continue;
+					}
+					
+					//TODO: does this method include incompatible WordLattice hypotheses?
+					ExpandAllHypotheses(hypothesis, startPos, endPos);
+				}
+			}
+			
+			return; // done with special case (no reordering limit)
+		}
+		
+		// if there are reordering limits, make sure it is not violated
+		// the coverage bitmap is handy here (and the position of the first gap)
+		const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
+		const size_t	hypoFirstGapPos	= hypoBitmap.GetFirstGapPos()
+		, sourceSize			= m_source.GetSize();
+		
+		// MAIN LOOP. go through each possible range
+		for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
+		{
+			size_t maxSize = sourceSize - startPos;
+			size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+			maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+			size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
+			if (isWordLattice) {
+				// first question: is there a path from the closest translated word to the left
+				// of the hypothesized extension to the start of the hypothesized extension?
+				// long version: is there anything to our left? is it farther left than where we're starting anyway? can we get to it?
+				// closestLeft is exclusive: a value of 3 means 2 is covered, our arc is currently ENDING at 3 and can start at 3 implicitly
+				if (closestLeft != 0 && closestLeft != startPos && !m_source.CanIGetFromAToB(closestLeft, startPos)) {
+					continue;
+				}
+			}
+			
+			for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
+			{
+				// basic checks
+				WordsRange extRange(startPos, endPos);
+			    // there have to be translation options
+				if (m_transOptColl.GetTranslationOptionList(extRange).size() == 0 ||
+					// no overlap with existing words
+					hypoBitmap.Overlap(extRange) ||
+					// specified reordering constraints (set with -monotone-at-punctuation or xml)
+					!m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) || //
+					// connection in input word lattice
+					(isWordLattice && !m_source.IsCoveragePossible(extRange)))
+				{
+					continue;
+				}
+				
+				// ask second question here:
+				// we already know we can get to our starting point from the closest thing to the left. We now ask the follow up:
+				// can we get from our end to the closest thing on the right?
+				// long version: is anything to our right? is it farther right than our (inclusive) end? can our end reach it?
+				bool leftMostEdge = (hypoFirstGapPos == startPos);
+				
+				// closest right definition:
+				size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);
+				if (isWordLattice) {
+					//if (!leftMostEdge && closestRight != endPos && closestRight != sourceSize && !m_source.CanIGetFromAToB(endPos, closestRight + 1)) {
+					if (closestRight != endPos && ((closestRight + 1) < sourceSize) && !m_source.CanIGetFromAToB(endPos, closestRight + 1)) {
+						continue;
+					}
+				}
+				
+				// any length extension is okay if starting at left-most edge
+				if (leftMostEdge)
+				{
+					ExpandAllHypotheses(hypothesis, startPos, endPos);
+				}
+				// starting somewhere other than left-most edge, use caution
+				else
+				{
+					// the basic idea is this: we would like to translate a phrase starting
+					// from a position further right than the left-most open gap. The
+					// distortion penalty for the following phrase will be computed relative
+					// to the ending position of the current extension, so we ask now what
+					// its maximum value will be (which will always be the value of the
+					// hypothesis starting at the left-most edge).  If this value is less than
+					// the distortion limit, we don't allow this extension to be made.
+					WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
+					int required_distortion =
+					m_source.ComputeDistortionDistance(extRange, bestNextExtension);
+					
+					if (required_distortion > maxDistortion) {
+						continue;
+					}
+					
+					// everything is fine, we're good to go
+					ExpandAllHypotheses(hypothesis, startPos, endPos);
+					
+				}
+			}
+		}
+	}
+	
+	
+	/**
+	 * Expand a hypothesis given a list of translation options
+	 * \param hypothesis hypothesis to be expanded upon
+	 * \param startPos first word position of span covered
+	 * \param endPos last word position of span covered
+	 */
+	
+	void SearchMulti::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos)
+	{
+		// early discarding: check if hypothesis is too bad to build
+		// this idea is explained in (Moore&Quirk, MT Summit 2007)
+		float expectedScore = 0.0f;
+		if (StaticData::Instance().UseEarlyDiscarding())
+		{
+			// expected score is based on score of current hypothesis
+			expectedScore = hypothesis.GetScore();
+			
+			// add new future score estimate
+			expectedScore += m_transOptColl.GetFutureScore().CalcFutureScore( hypothesis.GetWordsBitmap(), startPos, endPos );
+		}
+		
+		//bool foundSomething = false;
+		
+		// loop through all translation options
+		const TranslationOptionList &transOptList = m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos));
+		TranslationOptionList::const_iterator iter;
+		for (iter = transOptList.begin() ; iter != transOptList.end() ; ++iter)
+		{
+			//LS		ExpandHypothesis(hypothesis, **iter, expectedScore);
+			if (m_constraint == NULL) {
+				ExpandHypothesis(hypothesis, **iter, expectedScore);
+			}
+			else if (m_constraint != NULL && m_WERLimit == 0.0f) {
+				if (isCompatibleWithConstraint(hypothesis, **iter) ) {
+					//(**iter).
+					ExpandHypothesis(hypothesis, **iter, expectedScore);
+					//foundSomething = true;
+				} //else {
+				//	VERBOSE(1,"Expanding incompatible hypothesis" << endl);
+				//	 ExpandHypothesis(hypothesis, **iter, expectedScore);
+				//}
+				
+			} else {
+				TargetPhrase *curTarget = getCurrentTargetPhrase(hypothesis);
+				//LS			float currConstraintWER = getCurrConstraintWER(hypothesis, **iter);
+				float currConstraintWER = getCurrConstraintWER(curTarget, **iter);
+				//LS			VERBOSE(1, "WER==" << currConstraintWER << " for \"" << static_cast<const Phrase&>(*curTarget) << "\"" << endl);
+				//printf("WER: %f  Limit: %f\n", currConstraintWER, m_WERLimit);
+				if (currConstraintWER <= m_WERLimit)
+					ExpandHypothesis(hypothesis, **iter, expectedScore);
+			}
+		}
+		/*
+		 if (m_constraint!=NULL && !foundSomething) {
+		 size_t start = 1 + hypothesis.GetCurrTargetWordsRange().GetEndPos();
+		 const WordsRange range(start, start);
+		 const Phrase &relevantConstraint = m_constraint->GetSubString(range);
+		 //const TargetPhrase tp(relevantConstraint);
+		 Phrase sourcePhrase(Input);
+		 std::string targetPhraseString("hi");
+		 
+		 vector<FactorType> 	output	= Tokenize<FactorType>("0,", ",");
+		 //const vector<FactorType> 	output();
+		 const StaticData &staticData = StaticData::Instance();
+		 TargetPhrase targetPhrase(Output);
+		 //targetPhrase.AddWord(<#const Word newWord#>)
+		 targetPhrase.SetSourcePhrase(&sourcePhrase);
+		 targetPhrase.CreateFromString( output, targetPhraseString, staticData.GetFactorDelimiter());
+		 
+		 TranslationOption newOpt(range, targetPhrase, m_source);
+		 
+		 
+		 VERBOSE(2, "Should we add \"" << relevantConstraint << "\" (" << start << "-" << start << ") " << newOpt << endl);
+		 
+		 ExpandHypothesis(hypothesis, newOpt, expectedScore);
+		 }
+		 VERBOSE(2, "Found something==" << foundSomething << " for " << startPos << "-" << endPos << endl);
+		 */
+	}
+	
+	/**
+	 * Enforce constraint when appropriate
+	 * \param hypothesis hypothesis to be expanded upon
+	 * \param transOptList list of translation options to be applied
+	 */
+	
+	bool SearchMulti::isCompatibleWithConstraint(const Hypothesis &hypothesis, 
+												 const TranslationOption &transOpt) 
+	{
+		size_t constraintSize = m_constraint->GetSize();
+		size_t start = 1 + hypothesis.GetCurrTargetWordsRange().GetEndPos();
+		const Phrase &transOptPhrase = transOpt.GetTargetPhrase();
+		size_t transOptSize = transOptPhrase.GetSize();
+		
+		if (transOptSize==0) {
+			VERBOSE(4, "Empty transOpt IS COMPATIBLE with constraint \"" <<  *m_constraint << "\"" << endl);
+			return true;
+		}
+		size_t endpoint = start + transOptSize - 1;
+		//size_t endpoint = start + transOptSize;
+		//if (endpoint > 0) endpoint = endpoint - 1;
+		WordsRange range(start, endpoint);
+		
+		if (endpoint >= constraintSize) {
+			VERBOSE(4, "Appending \"" << transOptPhrase << "\" after \"" << static_cast<const Phrase&>(hypothesis.GetTargetPhrase()) << "\" (start=" << start << ", endpoint=" << endpoint << ", transOptSize=" << transOptSize << ") would be too long for constraint \"" <<  *m_constraint << "\"" << endl);
+			return false;
+		} else {
+			const Phrase &relevantConstraint = m_constraint->GetSubString(range);
+			if ( ! relevantConstraint.IsCompatible(transOptPhrase) ) {
+				VERBOSE(4, "\"" << transOptPhrase << "\" is incompatible with \"" <<  relevantConstraint << "\" (" << start << "-" << endpoint << ")" << endl);
+				return false;
+			} else {
+				VERBOSE(4, "\"" << transOptPhrase << "\" IS COMPATBILE with \"" <<  relevantConstraint << "\"" << endl);
+				return true;
+			}
+		}
+	}
+	
+	TargetPhrase *SearchMulti::getCurrentTargetPhrase(const Hypothesis &hypothesis) 
+	{ 
+		// Rebuild Target String via recursing on previous hypothesis
+		const Hypothesis *hypo = &hypothesis;
+		std::vector<Phrase> target;
+		
+		while (hypo != NULL) {
+			target.push_back(hypo->GetCurrTargetPhrase());
+			hypo = hypo->GetPrevHypo();	
+		}
+		
+		TargetPhrase *targetphrase = new TargetPhrase();
+		
+		for (int i = target.size() - 1; i >= 0; i--) {
+			targetphrase->Append(target[i]);
+		}
+		
+		return targetphrase;
+	}
+	
+	float SearchMulti::getCurrConstraintWER(TargetPhrase *curTarget, 
+											const TranslationOption &transOpt) 
+	{
+		
+		//const size_t constraintSize = m_constraint->GetSize();
+		const TargetPhrase transOptPhrase = transOpt.GetTargetPhrase();
+		
+		
+		TargetPhrase newTarget = TargetPhrase(*curTarget);
+		newTarget.Append(transOptPhrase);
+		
+		//size_t endpoint = newTarget.GetSize() - 1;
+		
+		// Account for target strings that are longer than the reference
+		//if (endpoint >= constraintSize)
+		//	endpoint = constraintSize - 1;
+		
+		// Extract relevant constraint...
+		//WordsRange range(0, endpoint);
+		//const Phrase &relevantConstraint = m_constraint->GetSubString(range);
+		
+		
+		// Compute WER between reference and target string
+		//float editDistance = computeEditDistance(relevantConstraint, newTarget);
+		float editDistance = computeEditDistance(*m_constraint, newTarget);
+		float normalizedEditDistance = editDistance - (m_constraint->GetSize() - newTarget.GetSize());
+		normalizedEditDistance = (normalizedEditDistance<0) ? editDistance : normalizedEditDistance;
+		//VERBOSE(1, "m_constraint->GetSize() - newTarget.GetSize() == " << m_constraint->GetSize() << " - " << newTarget.GetSize() << endl);
+		VERBOSE(2, "WER==" << normalizedEditDistance << " (" << editDistance << ") for \"" << static_cast<const Phrase&>(newTarget) << "\" with constraint \"" << *m_constraint << "\"" << endl);
+		
+		//return editDistance;
+		return normalizedEditDistance;
+	}
+	
+	
+	float SearchMulti::computeEditDistance(const Phrase &hypPhrase, const Phrase &constraintPhrase) const
+	{
+		const size_t len1 = hypPhrase.GetSize(), len2 = constraintPhrase.GetSize();
+		vector<vector<unsigned int> > d(len1 + 1, vector<unsigned int>(len2 + 1));
+		
+		for(int i = 0; i <= len1; ++i) d[i][0] = i;
+		for(int i = 0; i <= len2; ++i) d[0][i] = i;
+		
+		for(int i = 1; i <= len1; ++i)
+		{
+			for(int j = 1; j <= len2; ++j) {
+				WordsRange s1range(i-1, i-1);
+				WordsRange s2range(j-1, j-1);
+				int cost = hypPhrase.GetSubString(s1range).IsCompatible(constraintPhrase.GetSubString(s2range)) ? 0 : 1;
+				d[i][j] = std::min( std::min(d[i - 1][j] + 1,
+											 d[i][j - 1] + 1),
+								   d[i - 1][j - 1] + cost);
+			}
+		}
+		return d[len1][len2];
+	}
+	
+	/**
+	 * Expand one hypothesis with a translation option.
+	 * this involves initial creation, scoring and adding it to the proper stack
+	 * \param hypothesis hypothesis to be expanded upon
+	 * \param transOpt translation option (phrase translation)
+	 *        that is applied to create the new hypothesis
+	 * \param expectedScore base score for early discarding
+	 *        (base hypothesis score plus future score estimation)
+	 */
+	void SearchMulti::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, float expectedScore)
+	{
+		const StaticData &staticData = StaticData::Instance();
+		SentenceStats &stats = staticData.GetSentenceStats();
+		clock_t t=0; // used to track time for steps
+		
+		Hypothesis *newHypo;
+		if (! staticData.UseEarlyDiscarding())
+		{
+			// simple build, no questions asked
+			IFVERBOSE(2) { t = clock(); }
+			//LS		newHypo = hypothesis.CreateNext(transOpt, m_constraint);
+			newHypo = hypothesis.CreateNext(transOpt);
+			IFVERBOSE(2) { stats.AddTimeBuildHyp( clock()-t ); }
+			if (newHypo==NULL) return;
+			newHypo->CalcScore(m_transOptColl.GetFutureScore());
+			//newHypo->IncrementTotalScore(bonus);
+		}
+		else
+			// early discarding: check if hypothesis is too bad to build
+		{
+			// worst possible score may have changed -> recompute
+			size_t wordsTranslated = hypothesis.GetWordsBitmap().GetNumWordsCovered() + transOpt.GetSize();
+			float allowedScore = m_hypoStackColl[wordsTranslated]->GetWorstScore();
+			if (staticData.GetMinHypoStackDiversity())
+			{
+				WordsBitmapID id = hypothesis.GetWordsBitmap().GetIDPlus(transOpt.GetStartPos(), transOpt.GetEndPos());
+				float allowedScoreForBitmap = m_hypoStackColl[wordsTranslated]->GetWorstScoreForBitmap( id );
+				allowedScore = std::min( allowedScore, allowedScoreForBitmap );
+			}
+			allowedScore += staticData.GetEarlyDiscardingThreshold();
+			
+			// add expected score of translation option
+			expectedScore += transOpt.GetFutureScore();
+			// TRACE_ERR("EXPECTED diff: " << (newHypo->GetTotalScore()-expectedScore) << " (pre " << (newHypo->GetTotalScore()-expectedScorePre) << ") " << hypothesis.GetTargetPhrase() << " ... " << transOpt.GetTargetPhrase() << " [" << expectedScorePre << "," << expectedScore << "," << newHypo->GetTotalScore() << "]" << endl);
+			//expectedScore += bonus;
+			// check if transOpt score push it already below limit
+			if (expectedScore < allowedScore)
+			{
+				IFVERBOSE(2) { stats.AddNotBuilt(); }
+				return;
+			}
+			
+			// build the hypothesis without scoring
+			IFVERBOSE(2) { t = clock(); }
+			//LS		newHypo = hypothesis.CreateNext(transOpt, m_constraint);
+			newHypo = hypothesis.CreateNext(transOpt);
+			if (newHypo==NULL) return;
+			IFVERBOSE(2) { stats.AddTimeBuildHyp( clock()-t ); }
+			
+			// compute expected score (all but correct LM)
+			expectedScore = newHypo->CalcExpectedScore( m_transOptColl.GetFutureScore() );
+			
+			// ... and check if that is below the limit
+			if (expectedScore < allowedScore)
+			{
+				IFVERBOSE(2) { stats.AddEarlyDiscarded(); }
+				FREEHYPO( newHypo );
+				return;
+			}
+			
+			// ok, all is good, compute remaining scores
+			newHypo->CalcRemainingScore();
+			//newHypo->IncrementTotalScore(bonus);
+		}
+		
+		// logging for the curious
+		IFVERBOSE(3) {
+			newHypo->PrintHypothesis();
+		}
+		
+		// add to hypothesis stack
+		size_t wordsTranslated = newHypo->GetWordsBitmap().GetNumWordsCovered();
+		IFVERBOSE(2) { t = clock(); }
+		m_hypoStackColl[wordsTranslated]->AddPrune(newHypo);
+		IFVERBOSE(2) { stats.AddTimeStack( clock()-t ); }
+	}
+	
+	const std::vector < HypothesisStack* >& SearchMulti::GetHypothesisStacks() const
+	{
+		return m_hypoStackColl;
+	}
+	
+	/**
+	 * Find best hypothesis on the last stack.
+	 * This is the end point of the best translation, which can be traced back from here
+	 */
+	const Hypothesis *SearchMulti::GetBestHypothesis() const
+	{
+		/*LS
+		 if (interrupted_flag == 0){
+		 const HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(m_hypoStackColl.back());
+		 return hypoColl.GetBestHypothesis();
+		 }
+		 else{
+		 const HypothesisStackNormal &hypoColl = *actual_hypoStack;
+		 return hypoColl.GetBestHypothesis();
+		 }
+		 */
+		
+		if (interrupted_flag == 0){
+			const HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(m_hypoStackColl.back());
+			
+			if (m_constraint != NULL) {
+				HypothesisStackNormal::const_iterator iter;
+				
+				const Hypothesis *bestHypo = NULL;
+				
+				
+				for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter)
+				{
+					const Hypothesis *hypo = *iter;
+					WordsRange range(0, m_constraint->GetSize() - 1);
+					Phrase constraint = m_constraint->GetSubString(range);
+					
+					if (hypo != NULL) {
+						TargetPhrase targetPhrase = TargetPhrase(hypo->GetCurrTargetPhrase());
+						hypo = hypo->GetPrevHypo();
+						while (hypo != NULL) {
+							TargetPhrase newTargetPhrase = TargetPhrase(hypo->GetCurrTargetPhrase());
+							newTargetPhrase.Append(targetPhrase);
+							targetPhrase = newTargetPhrase;
+							hypo = hypo->GetPrevHypo();
+						}
+						
+						if ( m_WERLimit != 0.0f ) { // is WER-constraint active?
+							//VERBOSE(1, "constraint  : " << constraint << endl);
+							//VERBOSE(1, "targetPhrase: " << targetPhrase << endl);
+							if (computeEditDistance(constraint, targetPhrase) <= m_WERLimit) {
+								//VERBOSE(1, "TRUE" << endl);
+								if (bestHypo==NULL || (*iter)->GetTotalScore() > bestHypo->GetTotalScore())
+									bestHypo = *iter;
+							} else {
+								//VERBOSE(1, "FALSE" << endl);
+							}
+						} else {
+							if (constraint.IsCompatible(targetPhrase) &&
+								(bestHypo==NULL || (*iter)->GetTotalScore() > bestHypo->GetTotalScore()))
+								bestHypo = *iter;
+						}
+					}
+				}
+				return bestHypo;
+				//return NULL;
+			} else {
+				return hypoColl.GetBestHypothesis();
+			}
+		}
+		else{
+			const HypothesisStackNormal &hypoColl = *actual_hypoStack;
+			return hypoColl.GetBestHypothesis();
+		}
+		
+	}
+	
+	/**
+	 * Logging of hypothesis stack sizes
+	 */
+	void SearchMulti::OutputHypoStackSize()
+	{
+		std::vector < HypothesisStack* >::const_iterator iterStack = m_hypoStackColl.begin();
+		TRACE_ERR( "Stack sizes: " << (int)(*iterStack)->size());
+		for (++iterStack; iterStack != m_hypoStackColl.end() ; ++iterStack)
+		{
+			TRACE_ERR( ", " << (int)(*iterStack)->size());
+		}
+		TRACE_ERR( endl);
+	}
+	
+}
+
diff --git a/moses/SearchMulti.h b/moses/SearchMulti.h
new file mode 100644
index 000000000..8ede8dc2a
--- /dev/null
+++ b/moses/SearchMulti.h
@@ -0,0 +1,66 @@
+/*
+ *  SearchMulti.h
+ *  moses
+ *
+ *  Created by Lane Schwartz on 2/15/09.
+ *  Copyright 2009.
+ *
+ */
+
+
+#pragma once
+
+#include <vector>
+#include "Search.h"
+#include "HypothesisStackNormal.h"
+#include "TranslationOptionCollection.h"
+#include "Timer.h"
+
+namespace Moses
+{
+	
+	class InputType;
+	class TranslationOptionCollection;
+	
+	class SearchMulti: public Search
+		{
+		protected:
+			const InputType &m_source;
+			const InputType &m_source1;
+			std::vector < HypothesisStack* > m_hypoStackColl; /**< stacks to store hypotheses (partial translations) */ 
+			// no of elements = no of words in source + 1
+			TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
+			clock_t m_start; /**< starting time, used for logging */
+			size_t interrupted_flag; /**< flag indicating that decoder ran out of time (see switch -time-out) */
+			HypothesisStackNormal* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/ 
+			const TranslationOptionCollection &m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
+			const TranslationOptionCollection &m_transOptColl1; /**< pre-computed list of translation options for the phrases in this sentence */
+			
+			// functions for creating hypotheses
+			void ProcessOneHypothesis(const Hypothesis &hypothesis);
+			void ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos);
+			void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
+			
+			bool isCompatibleWithConstraint(const Hypothesis &hypothesis, const TranslationOption &transOpt);
+			//LS float SearchNormal::getCurrConstraintWER(const Hypothesis &hypothesis, const TranslationOption &transOpt); 
+			TargetPhrase* getCurrentTargetPhrase(const Hypothesis &hypothesis);
+			float getCurrConstraintWER(TargetPhrase *targetphrase, const TranslationOption &transOpt);
+			float computeEditDistance(const Phrase &hypPhrase, const Phrase &constraintPhrase) const;
+			
+			void AttemptProcessSentence();
+			
+		public:
+			SearchMulti(const InputType &source, const InputType &source1, const TranslationOptionCollection &transOptColl, const TranslationOptionCollection &transOptColl1);
+			~SearchMulti();
+			
+			void ProcessSentence();
+			
+			void OutputHypoStackSize();
+			void OutputHypoStack(int stack);
+			
+			virtual const std::vector < HypothesisStack* >& GetHypothesisStacks() const;
+			virtual const Hypothesis *GetBestHypothesis() const;
+		};
+	
+}
+
diff --git a/moses/moses.xcodeproj/project.pbxproj b/moses/moses.xcodeproj/project.pbxproj
index a47ebbd7a..9b71db137 100644
--- a/moses/moses.xcodeproj/project.pbxproj
+++ b/moses/moses.xcodeproj/project.pbxproj
@@ -133,6 +133,8 @@
 		1C8CFF090AD67A9700FA22E2 /* WordsBitmap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE8C0AD67A9700FA22E2 /* WordsBitmap.h */; };
 		1C8CFF0A0AD67A9700FA22E2 /* WordsRange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFE8D0AD67A9700FA22E2 /* WordsRange.cpp */; };
 		1C8CFF0B0AD67A9700FA22E2 /* WordsRange.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C8CFE8E0AD67A9700FA22E2 /* WordsRange.h */; };
+		3D0200CF0F4C58E6007FCB1D /* SearchMulti.h in Headers */ = {isa = PBXBuildFile; fileRef = 3D0200CD0F4C58E6007FCB1D /* SearchMulti.h */; };
+		3D0200D00F4C58E6007FCB1D /* SearchMulti.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D0200CE0F4C58E6007FCB1D /* SearchMulti.cpp */; };
 		3D5CBEEF0F2F683D004520C1 /* LanguageModelRemote.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D5CBEEB0F2F683D004520C1 /* LanguageModelRemote.cpp */; };
 		3D5CBEF00F2F683D004520C1 /* LanguageModelRemote.h in Headers */ = {isa = PBXBuildFile; fileRef = 3D5CBEEC0F2F683D004520C1 /* LanguageModelRemote.h */; };
 		B219B8610E9382EE00EAB407 /* AlignmentElement.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B219B85B0E9382EE00EAB407 /* AlignmentElement.cpp */; };
@@ -302,6 +304,8 @@
 		1C8CFE8C0AD67A9700FA22E2 /* WordsBitmap.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = WordsBitmap.h; path = src/WordsBitmap.h; sourceTree = "<group>"; };
 		1C8CFE8D0AD67A9700FA22E2 /* WordsRange.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = WordsRange.cpp; path = src/WordsRange.cpp; sourceTree = "<group>"; };
 		1C8CFE8E0AD67A9700FA22E2 /* WordsRange.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = WordsRange.h; path = src/WordsRange.h; sourceTree = "<group>"; };
+		3D0200CD0F4C58E6007FCB1D /* SearchMulti.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SearchMulti.h; sourceTree = "<group>"; };
+		3D0200CE0F4C58E6007FCB1D /* SearchMulti.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SearchMulti.cpp; sourceTree = "<group>"; };
 		3D5CBEE90F2F683D004520C1 /* LanguageModelRandLM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelRandLM.cpp; path = src/LanguageModelRandLM.cpp; sourceTree = "<group>"; };
 		3D5CBEEA0F2F683D004520C1 /* LanguageModelRandLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LanguageModelRandLM.h; path = src/LanguageModelRandLM.h; sourceTree = "<group>"; };
 		3D5CBEEB0F2F683D004520C1 /* LanguageModelRemote.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LanguageModelRemote.cpp; path = src/LanguageModelRemote.cpp; sourceTree = "<group>"; };
@@ -411,6 +415,8 @@
 				E21C11150DFEE88800ADAED0 /* SearchCubePruning.h */,
 				E21C11160DFEE88800ADAED0 /* SearchNormal.cpp */,
 				E21C11170DFEE88800ADAED0 /* SearchNormal.h */,
+				3D0200CE0F4C58E6007FCB1D /* SearchMulti.cpp */,
+				3D0200CD0F4C58E6007FCB1D /* SearchMulti.h */,
 				E21C110A0DFEE86B00ADAED0 /* HypothesisStackCubePruning.cpp */,
 				E21C110B0DFEE86B00ADAED0 /* HypothesisStackCubePruning.h */,
 				E21C110C0DFEE86B00ADAED0 /* HypothesisStackNormal.cpp */,
@@ -658,6 +664,7 @@
 				B2639DEB0EF199D400A67519 /* ReorderingConstraint.h in Headers */,
 				B2639DED0EF199D400A67519 /* TranslationOptionList.h in Headers */,
 				3D5CBEF00F2F683D004520C1 /* LanguageModelRemote.h in Headers */,
+				3D0200CF0F4C58E6007FCB1D /* SearchMulti.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -782,6 +789,7 @@
 				B2639DEA0EF199D400A67519 /* ReorderingConstraint.cpp in Sources */,
 				B2639DEC0EF199D400A67519 /* TranslationOptionList.cpp in Sources */,
 				3D5CBEEF0F2F683D004520C1 /* LanguageModelRemote.cpp in Sources */,
+				3D0200D00F4C58E6007FCB1D /* SearchMulti.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
index f656619c7..0e596d5dc 100644
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@@ -76,7 +76,6 @@ vector<TranslationOptionCollection*> * getTranslationOptionCollections(vector<In
 	vector<TranslationOptionCollection*> * collections = new vector<TranslationOptionCollection*>(sources->size());
 	
 	for (int i=0; i<sources->size(); i++) {
-		//TranslationOptionCollection* coll = (*sources)[i]->CreateTranslationOptionCollection();
 		(*collections)[i] =  (*sources)[i]->CreateTranslationOptionCollection();
 	}
 	
@@ -97,7 +96,8 @@ vector<TranslationOptionCollection*> * getTranslationOptionCollections(vector<In
 Manager::Manager(vector<InputType const*> *sources, SearchAlgorithm searchAlgorithm)
 	:m_sources(sources)
 	,m_transOptColls(getTranslationOptionCollections(sources))
-	,m_search(Search::CreateSearch((*((*sources)[0])), searchAlgorithm, (*(*m_transOptColls)[0])))
+	//,m_search(Search::CreateSearch((*((*sources)[0])), searchAlgorithm, (*(*m_transOptColls)[0])))
+	,m_search(Search::CreateSearch(sources, searchAlgorithm, m_transOptColls))
 	,m_start(clock())
 	,interrupted_flag(0)
 	{
@@ -107,7 +107,10 @@ Manager::Manager(vector<InputType const*> *sources, SearchAlgorithm searchAlgori
 	
 Manager::~Manager() 
 {
-  delete m_transOptColls;
+	for (int i=0; i<m_transOptColls->size(); i++) {
+		delete (*m_transOptColls)[i];
+	}
+	delete m_transOptColls;
 	delete m_search;
 
 	StaticData::Instance().CleanUpAfterSentenceProcessing();      
diff --git a/moses/src/Search.cpp b/moses/src/Search.cpp
index be156700f..5b8bd4cc2 100644
--- a/moses/src/Search.cpp
+++ b/moses/src/Search.cpp
@@ -1,34 +1,54 @@
-
-#include "SearchCubePruning.h"
-#include "SearchNormal.h"
-#include "UserMessage.h"
-
-namespace Moses
-{
-Search::Search()
-{
-//	long sentenceID = m_source.GetTranslationId();
-//	m_constraint = staticData.GetConstrainingPhrase(sentenceID);
-}
-
-Search *Search::CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
-{
-	switch(searchAlgorithm)
-	{
-		case Normal:		
-			return new SearchNormal(source, transOptColl);
-		case CubePruning:
-			return new SearchCubePruning(source, transOptColl);
-		case CubeGrowing:
-			return NULL;
-		default:
-			UserMessage::Add("ERROR: search. Aborting\n");
-			abort();
-			return NULL;
-	}
-
-}
- 
-}
-
-
+
+#include "SearchCubePruning.h"
+#include "SearchNormal.h"
+#include "SearchMulti.h"
+#include "UserMessage.h"
+
+namespace Moses
+{
+Search::Search()
+{
+//	long sentenceID = m_source.GetTranslationId();
+//	m_constraint = staticData.GetConstrainingPhrase(sentenceID);
+}
+
+//Search *Search::CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
+//{
+//	switch(searchAlgorithm)
+//	{
+//		case Normal:		
+//			return new SearchNormal(source, transOptColl);
+//		case CubePruning:
+//			return new SearchCubePruning(source, transOptColl);
+//		case CubeGrowing:
+//			return NULL;
+//		default:
+//			UserMessage::Add("ERROR: search. Aborting\n");
+//			abort();
+//			return NULL;
+//	}
+//
+//}
+ 
+Search *Search::CreateSearch(std::vector< InputType const* > *sources, SearchAlgorithm searchAlgorithm, std::vector< TranslationOptionCollection* > *transOptColls)	
+{
+	switch(searchAlgorithm)
+	{
+		case Normal:		
+			return new SearchNormal(*((*sources)[0]), *((*transOptColls)[0]));
+		case CubePruning:
+			return new SearchCubePruning(*((*sources)[0]), *((*transOptColls)[0]));
+		case CubeGrowing:
+			return NULL;
+		case Multi:
+			return new SearchMulti(*((*sources)[0]), *((*sources)[1]), *((*transOptColls)[0]), *((*transOptColls)[1]));
+		default:
+			UserMessage::Add("ERROR: search. Aborting\n");
+			abort();
+			return NULL;
+	}
+}
+	
+}
+
+
diff --git a/moses/src/Search.h b/moses/src/Search.h
index 2d4d2c387..34ac61276 100644
--- a/moses/src/Search.h
+++ b/moses/src/Search.h
@@ -24,8 +24,10 @@ public:
 	{}
 
 	// Factory
-	static Search *CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl);
-
+	//static Search *CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl);
+	
+	static Search *CreateSearch(std::vector< InputType const* > *sources, SearchAlgorithm searchAlgorithm, std::vector< TranslationOptionCollection* > *tranOptColls);
+	
 protected:
 	
 	float m_WERLimit;
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index 847f72d7a..410aea068 100644
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -170,6 +170,7 @@ enum SearchAlgorithm
 	Normal				= 0
 	,CubePruning	= 1
 	,CubeGrowing	= 2
+	,Multi          = 3
 };
 
 // typedef
author	dowobeha <dowobeha@1f5c12ca-751b-0410-a591-d2e778427230>	2009-02-18 21:38:36 +0300
committer	dowobeha <dowobeha@1f5c12ca-751b-0410-a591-d2e778427230>	2009-02-18 21:38:36 +0300
commit	d52e3f332fbe134365f19363703c0c9bfdc732f5 (patch)
tree	28a2d87b000dc531f123455d98e498434b12ac1d
parent	7aff1789c99c6d10dc25bfb71c3e63281dd79857 (diff)