bug fix to lexicalized reordering model

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@834 1f5c12ca-751b-0410-a591-d2e778427230
author: phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230> 2006-09-28 22:36:38 +0400
committer: phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230> 2006-09-28 22:36:38 +0400
commit: 3e7e4570b527282a12415ddf3a0908ebf2425dde (patch)
tree: 86b711da04e9b3c7505e9479ee06d64fedab11dd /moses
parent: ef42ad791e432545add95e0b8a6a7bef80f52ba8 (diff)
11 files changed, 249 insertions, 204 deletions
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
index 0365fa61d..0e946aa5f 100755
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@@ -55,7 +55,7 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
 	, _lmstats(0)
 {	// used for initial seeding of trans process	
 	// initialize scores
-	_hash_computed = false;
+	//_hash_computed = false;
 	s_HypothesesCreated = 1;
 	ResetScore();	
 }
@@ -85,7 +85,7 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
 	// that this hypothesis has already translated!
 	assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));	
 
-	_hash_computed = false;
+	//_hash_computed = false;
   m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
   m_wordDeleted = transOpt.IsDeletionOption();
 	m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
@@ -160,24 +160,36 @@ Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &em
 	return new(ptr) Hypothesis(m_source, emptyTarget);
 }
 
-void Hypothesis::GenerateNGramCompareHash() const
-{
-	_hash = quick_hash((const char*)&m_languageModelStates[0], sizeof(LanguageModelSingleFactor::State) * m_languageModelStates.size(), 0xcafe5137);
-	_hash_computed = true;
-	vector<size_t> wordCoverage = m_sourceCompleted.GetCompressedReprentation();
-	_hash = quick_hash((const char*)&wordCoverage[0], sizeof(size_t)*wordCoverage.size(), _hash);
-}
-
+//void Hypothesis::GenerateNGramCompareHash() const
+//{
+//	_hash = quick_hash((const char*)&m_languageModelStates[0], sizeof(LanguageModelSingleFactor::State) * m_languageModelStates.size(), 0xcafe5137);
+//	_hash_computed = true;
+//	vector<size_t> wordCoverage = m_sourceCompleted.GetCompressedRepresentation();
+//	_hash = quick_hash((const char*)&wordCoverage[0], sizeof(size_t)*wordCoverage.size(), _hash);
+//}
+
+/** check, if two hypothesis can be recombined.
+    this is actually a sorting function that allows us to
+    keep an ordered list of hypotheses. This makes recombination
+    much quicker. 
+*/
 int Hypothesis::NGramCompare(const Hypothesis &compare) const
 { // -1 = this < compare
 	// +1 = this > compare
 	// 0	= this ==compare
 	if (m_languageModelStates < compare.m_languageModelStates) return -1;
 	if (m_languageModelStates > compare.m_languageModelStates) return 1;
+	if (m_sourceCompleted.GetCompressedRepresentation() < compare.m_sourceCompleted.GetCompressedRepresentation()) return -1;
+	if (m_sourceCompleted.GetCompressedRepresentation() > compare.m_sourceCompleted.GetCompressedRepresentation()) return 1;
+	if (m_currSourceWordsRange.GetEndPos() < compare.m_currSourceWordsRange.GetEndPos()) return -1;
+	if (m_currSourceWordsRange.GetEndPos() > compare.m_currSourceWordsRange.GetEndPos()) return 1;
+	if (! StaticData::Instance()->GetSourceStartPosMattersForRecombination()) return 0;
+	if (m_currSourceWordsRange.GetStartPos() < compare.m_currSourceWordsRange.GetStartPos()) return -1;
+	if (m_currSourceWordsRange.GetStartPos() > compare.m_currSourceWordsRange.GetStartPos()) return 1;
 	return 0;
 }
-/**
- * Calculates the overall language model score by combining the scores
+
+/** Calculates the overall language model score by combining the scores
  * of language models generated for each of the factors.  Because the factors
  * represent a variety of tag sets, and because factors with smaller tag sets 
  * (such as POS instead of words) allow us to calculate richer statistics, we
diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h
index 56bc1a0fe..df1a423e0 100755
--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@@ -83,9 +83,9 @@ protected:
 	void CalcDistortionScore();
 	//TODO: add appropriate arguments to score calculator
 
-	void GenerateNGramCompareHash() const;
-	mutable size_t _hash;
-	mutable bool _hash_computed;
+	//	void GenerateNGramCompareHash() const;
+	// mutable size_t _hash;
+	// mutable bool _hash_computed;
 
 public:
 	static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
@@ -205,12 +205,12 @@ public:
 
 	int NGramCompare(const Hypothesis &compare) const;
 
-	inline size_t hash() const
-	{
-		if (_hash_computed) return _hash;
-		GenerateNGramCompareHash();
-		return _hash;
-	}
+	//	inline size_t hash() const
+	//	{
+	//		if (_hash_computed) return _hash;
+	//		GenerateNGramCompareHash();
+	//		return _hash;
+	//	}
 
 	void ToStream(std::ostream& out) const
 	{
diff --git a/moses/src/HypothesisCollection.cpp b/moses/src/HypothesisCollection.cpp
index 4569b77e0..9012f22c6 100755
--- a/moses/src/HypothesisCollection.cpp
+++ b/moses/src/HypothesisCollection.cpp
@@ -102,7 +102,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
 	Hypothesis *hypoExisting = *iter;
 	if (hypo->GetTotalScore() > hypoExisting->GetTotalScore())
 	{ // incoming hypo is better than the one we have
-		VERBOSE(3,"better than matching hyp, recombining, ");
+		VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
 		if (m_nBestIsEnabled) {
 			hypo->AddArc(hypoExisting);
 			Detach(iter);
@@ -114,7 +114,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
 	}
 	else
 	{ // already storing the best hypo. discard current hypo 
-	  VERBOSE(3,"worse than matching hyp, recombining" << std::endl)
+	  VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
 		if (m_nBestIsEnabled) {
 			(*iter)->AddArc(hypo);
 		} else {
@@ -183,6 +183,16 @@ void HypothesisCollection::PruneToSize(size_t newSize)
 				}
 			VERBOSE(3,", pruned to size " << size() << endl);
 			
+			IFVERBOSE(3) {
+				cerr << "stack now contains: ";
+				for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++) 
+					{
+						Hypothesis *hypo = *iter;
+						cerr << hypo->GetId() << " (" << hypo->GetTotalScore() << ") ";
+					}
+				cerr << endl;
+			}
+
 			// set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack
 			m_worstScore = scoreThreshold;
 			// cerr << "Heap contains " << bestScores.size() << " items" << endl;
diff --git a/moses/src/HypothesisCollection.h b/moses/src/HypothesisCollection.h
index efd3ab257..236ce1599 100755
--- a/moses/src/HypothesisCollection.h
+++ b/moses/src/HypothesisCollection.h
@@ -66,12 +66,12 @@ struct HypothesisRecombinationComparer
 	}
 };
 
-struct HypothesisRecombinationHasher
-{
-  size_t operator()(const Hypothesis* hypo) const {
-    return hypo->hash();
-  }
-};
+//struct HypothesisRecombinationHasher
+//{
+//  size_t operator()(const Hypothesis* hypo) const {
+//    return hypo->hash();
+//  }
+//};
 
 /** Stack for instances of Hypothesis, includes functions for pruning. */ 
 class HypothesisCollection 
diff --git a/moses/src/LexicalReordering.cpp b/moses/src/LexicalReordering.cpp
index 9de9ab90c..bdef84786 100755
--- a/moses/src/LexicalReordering.cpp
+++ b/moses/src/LexicalReordering.cpp
@@ -7,45 +7,56 @@
 #include <algorithm>
 #include "LexicalReordering.h"
 #include "InputFileStream.h"
-#include "DistortionOrientation.h"
 #include "StaticData.h"
 #include "Util.h"
 
 using namespace std;
 
-/*
- * Load the file pointed to by filename; set up the table according to
- * the orientation and condition parameters. Direction will be used
- * later for computing the score.
- */
+/** Load the file pointed to by filename; set up the table according to
+  * the orientation and condition parameters. Direction will be used
+  * later for computing the score.
+  * \param filename file that contains the table
+  * \param orientation orientation as defined in DistortionOrientationType (monotone/msd)
+  * \param direction direction as defined in LexReorderType (forward/backward/bidirectional)
+  * \param condition either conditioned on foreign or foreign+english
+  * \param weights weight setting for this model
+  * \param input input factors
+  * \param output output factors
+  */
 LexicalReordering::LexicalReordering(const std::string &filename, 
 																		 int orientation, int direction,
 																		 int condition, const std::vector<float>& weights,
 																		 vector<FactorType> input, vector<FactorType> output) :
-	m_orientation(orientation), m_condition(condition), m_numberscores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output)
+	m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output)
 {
 	//add score producer
 	const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
 	//manage the weights by SetWeightsForScoreProducer method of static data.
 	if(direction == LexReorderType::Bidirectional)
 	{
+		m_direction.push_back(LexReorderType::Backward); // this order is important
 		m_direction.push_back(LexReorderType::Forward);
-		m_direction.push_back(LexReorderType::Backward);
 	}
 	else
 	{
 		m_direction.push_back(direction);
 	}
+	// set number of orientations
+	if( orientation == DistortionOrientationType::Monotone) {
+		m_numOrientationTypes = 2;
+	}
+	else if ( orientation == DistortionOrientationType::Msd) {
+		m_numOrientationTypes = 3;
+	}
 	const_cast<StaticData*>(StaticData::Instance())->SetWeightsForScoreProducer(this, weights);
 	// Load the file
 	LoadFile();
-//	PrintTable();
+	//	PrintTable();
 }
 
 
-/*
- * Loads the file into a map.
- */
+/** Loads the orientation file into a map 
+  */
 void LexicalReordering::LoadFile()
 {
 	InputFileStream inFile(m_filename);
@@ -73,14 +84,11 @@ void LexicalReordering::LoadFile()
 					probs = Scan<float>(Tokenize(tokens[F_PROBS]));
 	
 				}
-			if (m_orientation == DistortionOrientationType::Monotone)
-				{
-					assert(probs.size() == MONO_NUM_PROBS); // 2 backward, 2 forward
-				}
-			else
-				{
-					assert(probs.size() == MSD_NUM_PROBS); // 3 backward, 3 forward
-				}
+			if (probs.size() != m_direction.size() * m_numOrientationTypes) {
+				TRACE_ERR("found " << probs.size() << " probabilities, expected " 
+									<< m_direction.size() * m_numOrientationTypes << endl);
+				exit(0);
+			}
 			std::vector<float> scv(probs.size());
 			std::transform(probs.begin(),probs.end(),probs.begin(),TransformScore);
 			m_orientation_table[key] = probs;
@@ -88,9 +96,8 @@ void LexicalReordering::LoadFile()
 	inFile.Close();
 }
 
-/*
- * Print the table in a readable format.
- */
+/** print the table in a readable format (not used at this point)
+  */
 void LexicalReordering::PrintTable()
 {
 	// iterate over map
@@ -113,140 +120,141 @@ void LexicalReordering::PrintTable()
 		}
 }
 
-std::vector<float> LexicalReordering::CalcScore(Hypothesis *hypothesis)
+/** compute the orientation given a hypothesis 
+  */
+int LexicalReordering::GetOrientation(const Hypothesis *curr_hypothesis) 
 {
-	std::vector<float> score(m_numberscores, 0);
-	vector<float> val;
-	for(unsigned int i=0; i < m_direction.size(); i++)
+	const Hypothesis *prevHypo = curr_hypothesis->GetPrevHypo();
+
+	const WordsRange &currSourceRange = curr_hypothesis->GetCurrSourceWordsRange();
+	size_t curr_source_start = currSourceRange.GetStartPos();
+	size_t curr_source_end = currSourceRange.GetEndPos();
+
+	//if there's no previous source...
+	if(prevHypo->GetId() == 0){
+		if (curr_source_start == 0) 
+		{
+			return ORIENTATION_MONOTONE;
+		}
+		else {
+			return ORIENTATION_DISCONTINUOUS;
+		}
+	}
+
+
+	const WordsRange &prevSourceRange = prevHypo->GetCurrSourceWordsRange();
+	size_t prev_source_start = prevSourceRange.GetStartPos();
+	size_t prev_source_end = prevSourceRange.GetEndPos();		
+	if(prev_source_end==curr_source_start-1)
 	{
-		int direction = m_direction[i];
-		int orientation = DistortionOrientation::GetOrientation(hypothesis, direction);
-		if(m_condition==LexReorderType::Fe)
+		return ORIENTATION_MONOTONE;
+	}
+	// distinguish between monotone, swap, discontinuous
+	else if(m_orientation==DistortionOrientationType::Msd) 
+	{
+		if(prev_source_start==curr_source_end+1)
 		{
-		//this key string is F+'|||'+E from the hypothesis
-		val=m_orientation_table[hypothesis->GetSourcePhraseStringRep(m_sourceFactors)
-														+"||| "
-														+hypothesis->GetTargetPhraseStringRep(m_targetFactors)];
+			return ORIENTATION_SWAP;
 		}
 		else
 		{
-			//this key string is F from the hypothesis
-			val=m_orientation_table[hypothesis->GetTargetPhraseStringRep(m_sourceFactors)];
+			return ORIENTATION_DISCONTINUOUS;
 		}
-		if(val.size()> 0)
-		{
-			if(m_orientation==DistortionOrientationType::Msd)
-			{
-				if(direction==LexReorderType::Backward)
-				{
-					if(orientation==DistortionOrientationType::MONO)
-					{
-						score[BACK_M] = val[BACK_M];
-					}
-					else if(orientation==DistortionOrientationType::SWAP)
-					{
-						score[BACK_S] = val[BACK_S];
-					}
-					else
-					{
-						score[BACK_D] = val[BACK_D];
-					}
-				
-				}
-				else
-				{
-					//if we only have forward scores (no backward scores) in the table, 
-					//then forward scores have no offset so we can use the indices of the backwards scores
-					if(orientation==DistortionOrientationType::MONO)
-					{
-						if(m_numberscores>3)
-						{
-							score[FOR_M] = val[FOR_M];
-						}
-						else
-						{
-							score[BACK_M] = val[BACK_M];
-						}
-					}
-					else if(orientation==DistortionOrientationType::SWAP)
+	}
+	// only distinguish between monotone, non monotone
+	else
+	{
+		return ORIENTATION_NON_MONOTONE;
+	}
+}
+
+/** calculate the score(s) for a hypothesis 
+  */
+std::vector<float> LexicalReordering::CalcScore(Hypothesis *hypothesis)
+{
+	std::vector<float> score(m_numScores, 0);
+	for(unsigned int i=0; i < m_direction.size(); i++) // backward, forward, or both 
+	{
+	  vector<float> val; // we will score the matching probability here
+		
+		// FIRST, get probability distribution
+
+	  int direction = m_direction[i]; // either backward or forward
+
+		// no score, if we would have to compute the forward score from the initial hypothesis
+	  if (direction == LexReorderType::Backward || hypothesis->GetPrevHypo()->GetId() != 0) {
+
+			if (direction == LexReorderType::Backward) { 
+				// conditioned on both foreign and English
+				if(m_condition==LexReorderType::Fe)
 					{
-						if(m_numberscores>3)
-						{
-							score[FOR_S] = val[FOR_S];
-						}
-						else
-						{
-							score[BACK_S] = val[BACK_S];
-						}
+						//this key string is F+'|||'+E from the hypothesis
+						val=m_orientation_table[hypothesis->GetSourcePhraseStringRep(m_sourceFactors)
+																		+"||| "
+																		+hypothesis->GetTargetPhraseStringRep(m_targetFactors)];
 					}
-					else
+				// only conditioned on foreign
+				else 
 					{
-						if(m_numberscores>3)
-						{
-							score[FOR_D] = val[FOR_D];
-						}
-						else
-						{
-							score[BACK_D] = val[BACK_D];
-						}
+						//this key string is F from the hypothesis
+						val=m_orientation_table[hypothesis->GetTargetPhraseStringRep(m_sourceFactors)];
 					}
-				}
 			}
-			else
-			{
-				if(direction==LexReorderType::Backward)
-				{
-					if(orientation==DistortionOrientationType::MONO)
-					{
-						score[BACK_MONO] = val[BACK_MONO];
-					}
-					else
-					{
-						score[BACK_NONMONO] = val[BACK_NONMONO];
-					}
-				}
-				else
-				{
-					//if we only have forward scores (no backward scores) in the table, 
-					//then forward scores have no offset so we can use the indices of the backwards scores
-					if(orientation==DistortionOrientationType::MONO)
+
+			// if forward looking, condition on previous phrase
+			else {
+				// conditioned on both foreign and English
+				if(m_condition==LexReorderType::Fe)
 					{
-						if(m_numberscores>3)
-						{
-							score[FOR_MONO] = val[FOR_MONO];					
-						}
-						else
-						{
-							score[BACK_MONO] = val[BACK_MONO];
-						}
+						//this key string is F+'|||'+E from the hypothesis
+						val=m_orientation_table[hypothesis->GetPrevHypo()->GetSourcePhraseStringRep(m_sourceFactors)
+																		+"||| "
+																		+hypothesis->GetPrevHypo()->GetTargetPhraseStringRep(m_targetFactors)];
 					}
-					else
+				// only conditioned on foreign
+				else 
 					{
-						if(m_numberscores>3)
-						{
-							score[FOR_NONMONO] = val[FOR_NONMONO];
-						}
-						else
-						{
-							score[BACK_NONMONO] = val[BACK_NONMONO];
-						}					
+						//this key string is F from the hypothesis
+						val=m_orientation_table[hypothesis->GetPrevHypo()->GetTargetPhraseStringRep(m_sourceFactors)];
 					}
-				}
 			}
-	
+	  }
+
+		// SECOND, look up score
+
+	  if(val.size()> 0) // valid entry
+		{
+			int orientation = GetOrientation(hypothesis);
+			float value = val[ orientation + i * m_numOrientationTypes ];
+			// one weight per direction
+			if ( m_numScores < m_numOrientationTypes ) { 
+				score[i] = value;
+			}
+			// one weight per direction and type
+			else {
+				score[ orientation + i * m_numOrientationTypes ] = value;
+			}
+
+			//			IFVERBOSE(3) {
+			//				cerr << "\tdistortion type " << orientation << " =>";
+			//				for(unsigned int j=0;j<score.size();j++) {
+			//					cerr << " " << score[j];
+			//				}
+			//				cerr << endl;
+			//			}
 		}
 	}
 	return score;
 }
 
-
+/** return the number of scores produced by this model */
 unsigned int LexicalReordering::GetNumScoreComponents() const
 {
-	return m_numberscores;
+	return m_numScores;
 }
 
+/** returns description of the model */
 const std::string  LexicalReordering::GetScoreProducerDescription() const
 {
 	return "Lexicalized reordering score, file=" + m_filename;
 }
-
diff --git a/moses/src/LexicalReordering.h b/moses/src/LexicalReordering.h
index 8bab36b99..329c5ec83 100755
--- a/moses/src/LexicalReordering.h
+++ b/moses/src/LexicalReordering.h
@@ -37,11 +37,11 @@ class Phrase;
 class Hypothesis;
 using namespace std;
 
-/***
- * The LexicalReordering class handles everything involved with
+/** The LexicalReordering class handles everything involved with
  * lexical reordering. It loads a probability table P(orientation|f,e)
  * and computes scores in either forward, backward, or bidirectional
  * direction. 
+ * This model is described in Koehn et al. [IWSLT 2005]
  */
 
 class LexicalReordering : public ScoreProducer
@@ -49,14 +49,9 @@ class LexicalReordering : public ScoreProducer
 
 private: 
 
-	// Members
+	// This stores the model table
 	typedef std::map<std::string, std::vector<float> > ORIENTATION_TABLE;
 
-	// This is the order in which the different forward/backward
-	// probabilities are stored in the table.
-	enum TableLookupMsd { BACK_M, BACK_S, BACK_D, FOR_M,FOR_S, FOR_D };
-	enum TableLookupMonotone { BACK_MONO, BACK_NONMONO, FOR_MONO, FOR_NONMONO};
-
 	// This is the order in which pieces appear in the orientation table
 	// when conditioning on f and e.
 	enum FEFileFormat { FE_FOREIGN, FE_ENGLISH, FE_PROBS };
@@ -67,19 +62,25 @@ private:
 
 	// different numbers of probabilities for different ranges of
 	// orientation variable
-	static const unsigned int MSD_NUM_PROBS = 6;
-	static const unsigned int MONO_NUM_PROBS = 4;
+	static const unsigned int MSD_NUM_PROBS = 3;
+	static const unsigned int MONO_NUM_PROBS = 2;
+
+	static const unsigned int ORIENTATION_MONOTONE = 0;
+	static const unsigned int ORIENTATION_NON_MONOTONE = 1;
+	static const unsigned int ORIENTATION_SWAP = 1;
+	static const unsigned int ORIENTATION_DISCONTINUOUS = 2;
 
-	int m_orientation; // msd or monotone
-	std::vector<int> m_direction;   // contains forward, backward, or both (bidirectional)
-	int m_condition;   // fe or f
-	int m_numberscores; //2, 3, 4 or 6
-	std::string m_filename; // probability table location
-	vector<FactorType> m_sourceFactors;
-	vector<FactorType> m_targetFactors;
+	int m_orientation; /**< msd or monotone */
+	std::vector<int> m_direction;   /**< contains forward, backward, or both (bidirectional) */
+	int m_condition;   /**< fe or f */
+	int m_numScores;   /**< 1, 2, 3, or 6 */
+	int m_numOrientationTypes; /**< 2(mono) or 3(msd) */
+	std::string m_filename; /**< probability table location */
+	vector<FactorType> m_sourceFactors; /**< source factors to condition on */
+	vector<FactorType> m_targetFactors; /**< target factors to condition on */
 
 
-	ORIENTATION_TABLE m_orientation_table; // probability table
+	ORIENTATION_TABLE m_orientation_table; /**< probability table */
 
 	// Functions
 	void LoadFile(void);
@@ -95,6 +96,9 @@ public:
 	// Descructor
 	~LexicalReordering(void) {}
 
+	// Compute Orientation
+	int GetOrientation(const Hypothesis *curr_hypothesis);
+
 	// Compute and return a score for a hypothesis
 	std::vector<float> CalcScore(Hypothesis *curr_hypothesis);
 	
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 813c6aef2..3b70e7fa4 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -186,36 +186,35 @@ bool StaticData::LoadParameters(int argc, char* argv[])
 	// load Lexical Reordering model
 	const vector<string> &lrFileVector = 
 		m_parameter.GetParam("distortion-file");	
+
 		for(unsigned int i=0; i< lrFileVector.size(); i++ ) //loops for each distortion model
 		{
-			
-				//if this went wrong, something went wrong in the parsing.
-			const vector<string> &lrTypeVector = 	m_parameter.GetParam("distortion");	
+			vector<string> specification = Tokenize<string>(lrFileVector[i]," ");
+				if (specification.size() != 4 )
+				{
+				  TRACE_ERR("ERROR: Expected format 'factors type weight-count filename' in specification of distortion file " << i << std::endl << lrFileVector[i] << std::endl);
+				  return false;
+				}
+		  
 			//defaults, but at least one of these per model should be explicitly specified in the .ini file
 			int orientation = DistortionOrientationType::Msd, 
 					direction = LexReorderType::Bidirectional, 
 					condition = LexReorderType::Fe;
 
-			if(lrTypeVector.size() < i)			
-			{
-				std::cerr<<"ERROR: please specify one line of configuration under [distortion] per distortion model in the moses configuration file\n";
-				abort();
-			}
-
 			//Loop through, overriding defaults with specifications
-			vector<string> parameters = Tokenize<string>(lrTypeVector[i],"-");
+			vector<string> parameters = Tokenize<string>(specification[1],"-");
 			for (size_t param=0; param<parameters.size(); param++)
 			{
 				string val = ToLower(parameters[param]);
 				//orientation 
-				if(val == "monotone")
+				if(val == "monotone" || val == "monotonicity")
 					orientation = DistortionOrientationType::Monotone; 
-				else if(val == "msd")
+				else if(val == "msd" || val == "orientation")
 					orientation = DistortionOrientationType::Msd;
 				//direction
 				else if(val == "forward")
 					direction = LexReorderType::Forward;
-				else if(val == "backward")
+				else if(val == "backward" || val == "unidirectional")
 					direction = LexReorderType::Backward; 
 				else if(val == "bidirectional")
 					direction = LexReorderType::Bidirectional;
@@ -224,7 +223,10 @@ bool StaticData::LoadParameters(int argc, char* argv[])
 					condition = LexReorderType::F; 
 				else if(val == "fe")
 					condition = LexReorderType::Fe; 
+				if (orientation == DistortionOrientationType::Msd) 
+					m_sourceStartPosMattersForRecombination = true;
 			}
+
 			//compute the number of weights that ought to be in the table from this
 			size_t numWeightsInTable = 0;
 			if(orientation == DistortionOrientationType::Monotone)
@@ -239,10 +241,17 @@ bool StaticData::LoadParameters(int argc, char* argv[])
 			{
 				numWeightsInTable *= 2;
 			}
-			
-			vector<string>	token	= Tokenize(lrFileVector[i]);
-			//characteristics of the phrase table
-			vector<string> inputfactors = Tokenize(token[0],"-");
+			size_t specifiedNumWeights = Scan<size_t>(specification[2]);
+			if (specifiedNumWeights != numWeightsInTable) {
+			  std::cerr << "specified number of weights (" 
+				    << specifiedNumWeights 
+				    << ") does not match correct number of weights for this type (" 
+				    << numWeightsInTable << std::endl;
+			  abort();
+                        }
+
+			//factors involved in this table
+			vector<string> inputfactors = Tokenize(specification[0],"-");
 			vector<FactorType> 	input,output;
 			if(inputfactors.size() > 1)
 			{
@@ -254,11 +263,9 @@ bool StaticData::LoadParameters(int argc, char* argv[])
 				input.push_back(0); // default, just in case the user is actually using a bidirectional model
 				output = Tokenize<FactorType>(inputfactors[0],",");
 			}
-			size_t numWeights = Scan<size_t>(token[1]);
-			std::string	filePath= token[2];
 			std::vector<float> m_lexWeights; 			//will store the weights for this particular distortion reorderer
 			std::vector<float> newLexWeights;     //we'll remove the weights used by this distortion reorder, leaving the weights yet to be used
-			if(numWeights == 1) // this is useful if the user just wants to train one weight for the model
+			if(specifiedNumWeights == 1) // this is useful if the user just wants to train one weight for the model
 			{
 				//add appropriate weight to weight vector
 				assert(distortionModelWeights.size()> 0); //if this fails the user has not specified enough weights
@@ -298,10 +305,11 @@ bool StaticData::LoadParameters(int argc, char* argv[])
 			//	TRACE_ERR(m_lexWeights[weight] << "\t");
 			//}
 			//TRACE_ERR(endl);
-			timer.check("Starting to load lexical reorder table...");
-			TRACE_ERR(filePath << "...");
+
+			// loading the file
+			std::string	filePath= specification[3];
+			timer.check(("Start loading distortion table " + filePath).c_str());
  			m_reorderModels.push_back(new LexicalReordering(filePath, orientation, direction, condition, m_lexWeights, input, output));
-			//			timer.check("Finished loading lexical reorder table.");
 		}
 		
 		if (m_parameter.GetParam("lmodel-file").size() > 0)
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index 38cf1a525..d06f9cf5e 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -87,6 +87,7 @@ protected:
 	bool m_dropUnknown;
 	bool m_wordDeletionEnabled;
 
+	bool m_sourceStartPosMattersForRecombination;
 
 	int m_inputType;
 	unsigned m_numInputScores;
@@ -156,6 +157,10 @@ public:
 		return m_decodeStepList;
 	}
 	
+	inline bool GetSourceStartPosMattersForRecombination() const
+	{ 
+		return m_sourceStartPosMattersForRecombination; 
+	}
 	inline bool GetDropUnknown() const 
 	{ 
 		return m_dropUnknown; 
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index b67724369..4a744ffda 100755
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -111,8 +111,6 @@ namespace DistortionOrientationType
 			Monotone, //distinguish only between monotone and non-monotone as possible orientations
 			Msd //further separate non-monotone into swapped and discontinuous
 		};	
-    // Possible values for orientation.
-	enum ORIENTATIONS { MONO, NON_MONO, SWAP, DISC }; //TODO explain values
 }
 
 enum IOMethod
diff --git a/moses/src/WordsBitmap.cpp b/moses/src/WordsBitmap.cpp
index 1b12aa487..9d0e3b09b 100755
--- a/moses/src/WordsBitmap.cpp
+++ b/moses/src/WordsBitmap.cpp
@@ -58,7 +58,7 @@ int WordsBitmap::GetFutureCosts(int lastPos) const
 }
 
 
-std::vector<size_t> WordsBitmap::GetCompressedReprentation() const
+std::vector<size_t> WordsBitmap::GetCompressedRepresentation() const
 {
   std::vector<size_t> res(1 + (m_size >> (sizeof(int) + 3)), 0);
   size_t c=0; size_t x=0; size_t ci=0;
diff --git a/moses/src/WordsBitmap.h b/moses/src/WordsBitmap.h
index b98df1a1b..af6429fe0 100755
--- a/moses/src/WordsBitmap.h
+++ b/moses/src/WordsBitmap.h
@@ -140,7 +140,7 @@ public:
 		return m_size;
 	}
 
-	std::vector<size_t> GetCompressedReprentation() const;
+	std::vector<size_t> GetCompressedRepresentation() const;
 	
 	inline int Compare (const WordsBitmap &compare) const
 	{
author	phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>	2006-09-28 22:36:38 +0400
committer	phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>	2006-09-28 22:36:38 +0400
commit	3e7e4570b527282a12415ddf3a0908ebf2425dde (patch)
tree	86b711da04e9b3c7505e9479ee06d64fedab11dd /moses
parent	ef42ad791e432545add95e0b8a6a7bef80f52ba8 (diff)