added unknown-word handling code (currently commented out) and source-word deletion (also currently commented out)

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@176 1f5c12ca-751b-0410-a591-d2e778427230
author: eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230> 2006-07-18 22:13:45 +0400
committer: eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230> 2006-07-18 22:13:45 +0400
commit: 5c5d971895c177be55cf6b43acd3c827915f2bb4 (patch)
tree: 7a8813b76e5921c381b64a2c8feabe2f72e5878d /moses
parent: 7beabbd9ed39a52fcfe1d6dbd0567cf1ab1b4768 (diff)
25 files changed, 760 insertions, 268 deletions
diff --git a/moses/src/Arc.cpp b/moses/src/Arc.cpp
index fbc7c0d0c..2b43d90b2 100755
--- a/moses/src/Arc.cpp
+++ b/moses/src/Arc.cpp
@@ -42,7 +42,7 @@ std::ostream& operator<<(std::ostream& out, const Arc& arc)
 	{
 		out << *prevHypo;
 	}
-	out << arc.GetPhrase();
+	out << arc.GetTargetPhrase();
 
 	// score
 	out << " [" << arc.GetScore( static_cast<ScoreType::ScoreType>(0));
diff --git a/moses/src/Arc.h b/moses/src/Arc.h
index 69c4c18b2..9c4762274 100755
--- a/moses/src/Arc.h
+++ b/moses/src/Arc.h
@@ -33,7 +33,7 @@ public:
 
 	Arc(const Arc &arc); // not implemented
 	
-	Arc( const float score[NUM_SCORES]
+	Arc( const float score[]
 			, const ScoreComponentCollection 	&transScoreComponent
 			, const ScoreColl					 						&lmScoreComponent
 			, const ScoreColl											&generationScoreColl
diff --git a/moses/src/DeletionHypothesis.cpp b/moses/src/DeletionHypothesis.cpp
new file mode 100644
index 000000000..88bcacb99
--- /dev/null
+++ b/moses/src/DeletionHypothesis.cpp
@@ -0,0 +1,69 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "DeletionHypothesis.h"
+
+
+
+/***
+ * calculate the score due to source words dropped; set the appropriate elements of m_score
+ */
+void DeletionHypothesis::CalcDeletionScore(const Sentence& sourceSentence, const WordsRange& sourceWordsRange, const WordDeletionTable& wordDeletionTable)
+{
+	m_score[ScoreType::DeletedWords] = wordDeletionTable.GetDeletionCost(sourceSentence.GetSubString(sourceWordsRange));
+}
+
+/***
+ * Set the total-score field from the various individual score parts
+ * (not necessarily using all of them)
+ */
+void DeletionHypothesis::SumIndividualScores(const StaticData& staticData)
+{
+	m_score[ScoreType::Total] = m_score[ScoreType::PhraseTrans]
+								+ m_score[ScoreType::Generation]			
+								+ m_score[ScoreType::LanguageModelScore]
+								+ m_score[ScoreType::Distortion]					* staticData.GetWeightDistortion()
+								+ m_score[ScoreType::WordPenalty]				* staticData.GetWeightWordPenalty()
+								+ m_score[ScoreType::DeletedWords]
+								+ m_score[ScoreType::FutureScoreEnum];
+}
+
+void DeletionHypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &futureScore, const Sentence &source)
+{
+	// DISTORTION COST
+	CalcDistortionScore();
+	
+	// LANGUAGE MODEL COST
+	CalcLMScore(staticData.GetLanguageModel(Initial), staticData.GetLanguageModel(Other));
+
+	// WORD PENALTY
+	m_score[ScoreType::WordPenalty] = - (float) GetSize();
+
+	// FUTURE COST
+	CalcFutureScore(futureScore);
+	
+	//cost for deleting source words
+//	CalcDeletionScore(source, GetCurrSourceWordsRange(), staticData.GetWordDeletionTable());
+	
+	//LEXICAL REORDERING COST
+	CalcLexicalReorderingScore();
+
+	// TOTAL COST
+	SumIndividualScores(staticData);
+}
diff --git a/moses/src/DeletionHypothesis.h b/moses/src/DeletionHypothesis.h
new file mode 100644
index 000000000..f6df1f64b
--- /dev/null
+++ b/moses/src/DeletionHypothesis.h
@@ -0,0 +1,58 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include "Sentence.h"
+#include "WordsBitmap.h"
+#include "WordsRange.h"
+#include "WordDeletionTable.h"
+#include "StaticData.h"
+#include "Hypothesis.h"
+#include "TranslationOption.h"
+
+/***
+ * Describe a hypothesis extension that involves translating a source phrase to the empty phrase
+ * (ie dropping the source words)
+ */
+class DeletionHypothesis : public Hypothesis
+{
+	friend class Hypothesis; //for the factory functions
+	
+	protected:
+	
+		DeletionHypothesis(const WordsBitmap &initialCoverage) : Hypothesis(Phrase(), initialCoverage) {}
+		DeletionHypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt) : Hypothesis(prevHypo, transOpt) {}
+		virtual ~DeletionHypothesis() {}
+	
+		/***
+		 * calculate the score due to source words dropped; set the appropriate elements of m_score
+		 */
+		void CalcDeletionScore(const Sentence& sourceSentence, const WordsRange& sourceWordsRange, const WordDeletionTable& wordDeletionTable);
+		
+		/***
+		 * Set the total-score field from the various individual score parts
+		 * (not necessarily using all of them)
+		 */
+		virtual void SumIndividualScores(const StaticData& staticData);
+	
+	public:
+		
+		virtual void CalcScore(const StaticData& staticData, const SquareMatrix &futureScore, const Sentence &source);
+};
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
index 77a715a1d..d70800d9b 100755
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@@ -28,6 +28,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "Util.h"
 #include "Arc.h"
 #include "SquareMatrix.h"
+#include "StaticData.h"
+//#include "DeletionHypothesis.h"
 //TODO: add this include in when it compiles
 //#include "LexicalReordering.h"
 
@@ -41,6 +43,7 @@ Hypothesis::Hypothesis(const Phrase &phrase, const WordsBitmap &initialCoverage)
 	, m_sourceCompleted(initialCoverage)
 	, m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)
 	, m_currTargetWordsRange(NOT_FOUND, NOT_FOUND)
+	, m_wordDeleted(false)
 	, m_id(s_numNodes++)
 {	// used for initial seeding of trans process	
 	// initialize scores
@@ -53,9 +56,10 @@ Hypothesis::Hypothesis(const Hypothesis &copy)
 	, m_sourceCompleted				(copy.m_sourceCompleted )
 	, m_currSourceWordsRange	(copy.m_currSourceWordsRange)
 	, m_currTargetWordsRange		(copy.m_currTargetWordsRange)
+	, m_wordDeleted(false)
 	, m_id(s_numNodes++)
 {
-	m_phrase.AddWords( copy.m_phrase );
+	m_targetPhrase.AddWords( copy.m_targetPhrase );
 
 	// initialize scores
 	SetScore(copy.GetScore());
@@ -67,12 +71,16 @@ Hypothesis::Hypothesis(const Hypothesis &copy)
 #endif
 }
 
+/***
+ * continue prevHypo by appending the phrases in transOpt
+ */
 Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
 	: LatticeEdge							(Output, &prevHypo)
 	, m_sourceCompleted				(prevHypo.m_sourceCompleted )
 	, m_currSourceWordsRange	(prevHypo.m_currSourceWordsRange)
 	, m_currTargetWordsRange		( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
 														 ,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetPhrase().GetSize())
+	, m_wordDeleted(false)
 	, m_id(s_numNodes++)
 {
 	const Phrase &possPhrase				= transOpt.GetPhrase();
@@ -81,24 +89,24 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
 	m_sourceCompleted.SetValue(wordsRange.GetStartPos(), wordsRange.GetEndPos(), true);
 	// add new words from poss trans
 	//m_phrase.AddWords(prev.m_phrase);
-	m_phrase.AddWords(possPhrase);
+	m_targetPhrase.AddWords(possPhrase);
 
 	// scores
 	SetScore(prevHypo.GetScore());
 	m_score[ScoreType::PhraseTrans]				+= transOpt.GetTranslationScore();
 	m_score[ScoreType::FutureScoreEnum]		+= transOpt.GetFutureScore();
 	m_score[ScoreType::LanguageModelScore]	+= transOpt.GetNgramScore();
+//  m_wordDeleted = transOpt.IsDeletionOption();
 
 #ifdef N_BEST
 	// language model score (ngram)
 	m_lmScoreComponent = prevHypo.GetLMScoreComponent();
-	const list< pair<size_t, float> > &nGramComponent = transOpt.GetTrigramComponent();
+	const std::vector< std::pair<size_t, float> > &nGramComponent = transOpt.GetTrigramComponent();
 
-	list< pair<size_t, float> >::const_iterator iter;
-	for (iter = nGramComponent.begin() ; iter != nGramComponent.end() ; ++iter)
+	for(unsigned int i = 0; i < nGramComponent.size(); i++)
 	{
-		size_t lmId = (*iter).first;
-		float score	= (*iter).second;
+		size_t lmId = nGramComponent[i].first;
+		float score	= nGramComponent[i].second;
 		m_lmScoreComponent[lmId] += score;
 	}
 
@@ -132,16 +140,40 @@ Hypothesis::~Hypothesis()
 #endif
 }
 
-Hypothesis *Hypothesis::CreateNext(const TranslationOption &transOpt) const
+/***
+ * return the subclass of Hypothesis most appropriate to the given translation option
+ */
+Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt) const
 {
-	Hypothesis *clone	= new Hypothesis(*this, transOpt);
-	return clone;
+	return Create(*this, transOpt);
 }
 
+/***
+ * return the subclass of Hypothesis most appropriate to the given translation option
+ */
+Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt)
+{
+	/*if(s_wordDeletionEnabled && transOpt.GetPhrase().GetSize() == 0) return new DeletionHypothesis(prevHypo, transOpt);
+	else*/ return new Hypothesis(prevHypo, transOpt);
+}
 
+/***
+ * return the subclass of Hypothesis most appropriate to the given target phrase
+ */
+Hypothesis* Hypothesis::Create(const Phrase& targetPhrase, const WordsBitmap &initialCoverage)
+{
+	/*if(s_wordDeletionEnabled && targetPhrase.GetSize() == 0) return new DeletionHypothesis(initialCoverage);
+	else*/ return new Hypothesis(targetPhrase, initialCoverage);
+}
 
-
-Hypothesis *Hypothesis::MergeNext(const TranslationOption &transOpt) const
+/***
+ * if any factors aren't set in our target phrase but are present in transOpt, copy them over
+ * (unless the factors that we do have fail to match the corresponding ones in transOpt,
+ *  in which case presumably there's a programmer's error)
+ * 
+ * return NULL if we aren't compatible with the given option
+ */
+Hypothesis* Hypothesis::MergeNext(const TranslationOption &transOpt) const
 {
 	// check each word is compatible and merge 1-by-1
 	const Phrase &possPhrase = transOpt.GetPhrase();
@@ -151,7 +183,7 @@ Hypothesis *Hypothesis::MergeNext(const TranslationOption &transOpt) const
 	}
 
 	// ok, merge
-	Hypothesis *clone				= new Hypothesis(*this);
+	Hypothesis* clone = new Hypothesis(*this);
 
 	int currWord = 0;
 	size_t len = GetSize();
@@ -276,10 +308,6 @@ void Hypothesis::CalcLexicalReorderingScore()
 //	  	LatticeEdge.getPrevHypo());     //Previous Hypothesis
 }
 
-
-
-
-
 /**
  * Calculates the overall language model score by combining the scores
  * of language models generated for each of the factors.  Because the factors
@@ -289,7 +317,6 @@ void Hypothesis::CalcLexicalReorderingScore()
  * /param lmListInitial todo - describe this parameter 
  * /param lmListEnd todo - describe this parameter
  */
-
 void Hypothesis::CalcLMScore(const LMList &lmListInitial, const LMList	&lmListEnd)
 {
 	const size_t startPos	= m_currTargetWordsRange.GetStartPos();
@@ -316,8 +343,6 @@ void Hypothesis::CalcLMScore(const LMList &lmListInitial, const LMList	&lmListEn
 		}		
 		lmScore	= languageModel.GetValue(contextFactor);
 		//cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl;
-		
-		
 
 		// main loop
 		for (size_t currPos = startPos + 1 ; currPos <= m_currTargetWordsRange.GetEndPos() ; currPos++)
@@ -361,53 +386,56 @@ void Hypothesis::CalcLMScore(const LMList &lmListInitial, const LMList	&lmListEn
 		size_t nGramOrder			= languageModel.GetNGramOrder();
 		float lmScore;
 
-		// 1st n-gram
-		vector<const Factor*> contextFactor(nGramOrder);
-		size_t index = 0;
-		for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)
-		{
-			if (currPos >= 0)
-				contextFactor[index++] = GetFactor(currPos, factorType);
-			else			
-				contextFactor[index++] = languageModel.GetSentenceStart();
-		}
-		lmScore	= languageModel.GetValue(contextFactor);
-		//cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl;
-		
-
-		// main loop
-		size_t endPos = std::min(startPos + nGramOrder - 2
-														, m_currTargetWordsRange.GetEndPos());
-		for (size_t currPos = startPos + 1 ; currPos <= endPos ; currPos++)
+		if(m_currTargetWordsRange.GetWordsCount() > 0) //non-empty target phrase
 		{
-			// shift all args down 1 place
-			for (size_t i = 0 ; i < nGramOrder - 1 ; i++)
-				contextFactor[i] = contextFactor[i + 1];
-
-			// add last factor
-			contextFactor.back() = GetFactor(currPos, factorType);
-
-			lmScore	+= languageModel.GetValue(contextFactor);
+			// 1st n-gram
+			vector<const Factor*> contextFactor(nGramOrder);
+			size_t index = 0;
+			for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)
+			{
+				if (currPos >= 0)
+					contextFactor[index++] = GetFactor(currPos, factorType);
+				else			
+					contextFactor[index++] = languageModel.GetSentenceStart();
+			}
+			lmScore	= languageModel.GetValue(contextFactor);
 			//cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl;
-		
-		}
 
-		// end of sentence
-		if (m_sourceCompleted.IsComplete())
-		{
-			const size_t size = GetSize();
-			contextFactor.back() = languageModel.GetSentenceEnd();
+			// main loop
+			size_t endPos = std::min(startPos + nGramOrder - 2
+															, m_currTargetWordsRange.GetEndPos());
+			for (size_t currPos = startPos + 1 ; currPos <= endPos ; currPos++)
+			{
+				// shift all args down 1 place
+				for (size_t i = 0 ; i < nGramOrder - 1 ; i++)
+					contextFactor[i] = contextFactor[i + 1];
+	
+				// add last factor
+				contextFactor.back() = GetFactor(currPos, factorType);
+	
+				lmScore	+= languageModel.GetValue(contextFactor);
+				//cout<<"context factor: "<<languageModel.GetValue(contextFactor)<<endl;		
+			}
 
-			for (size_t i = 0 ; i < nGramOrder - 1 ; i ++)
+			// end of sentence
+			if (m_sourceCompleted.IsComplete())
 			{
-				int currPos = size - nGramOrder + i + 1;
-				if (currPos < 0)
-					contextFactor[i] = languageModel.GetSentenceStart();
-				else
-					contextFactor[i] = GetFactor((size_t)currPos, factorType);
+				const size_t size = GetSize();
+				contextFactor.back() = languageModel.GetSentenceEnd();
+	
+				for (size_t i = 0 ; i < nGramOrder - 1 ; i ++)
+				{
+					int currPos = size - nGramOrder + i + 1;
+					if (currPos < 0)
+						contextFactor[i] = languageModel.GetSentenceStart();
+					else
+						contextFactor[i] = GetFactor((size_t)currPos, factorType);
+				}
+				lmScore	+= languageModel.GetValue(contextFactor);
 			}
-			lmScore	+= languageModel.GetValue(contextFactor);
 		}
+		else lmScore = 0; //the score associated with dropping source words is not part of the language model
+		
 		m_score[ScoreType::LanguageModelScore] += lmScore * languageModel.GetWeight();
 #ifdef N_BEST
 		size_t lmId = languageModel.GetId();
@@ -416,14 +444,8 @@ void Hypothesis::CalcLMScore(const LMList &lmListInitial, const LMList	&lmListEn
 	}
 }
 
-void Hypothesis::CalcScore(const LMList		&lmListInitial
-													, const LMList	&lmListEnd
-													, float weightDistortion
-													, float weightWordPenalty
-													, const SquareMatrix &futureScore
-													, const Sentence &source) 
+void Hypothesis::CalcDistortionScore()
 {
-	// DISTORTION COST
 	const WordsRange &prevRange = m_prevHypo->GetCurrSourceWordsRange()
 								, &currRange	= GetCurrSourceWordsRange();
 				
@@ -436,28 +458,52 @@ void Hypothesis::CalcScore(const LMList		&lmListInitial
 		// distortions scores of all previous partial translations
 		m_score[ScoreType::Distortion]	-=  (float) currRange.CalcDistortion(prevRange) ;
 	}
+}
+
+/***
+ * calculate the score due to source words dropped; set the appropriate elements of m_score
+ */
+void Hypothesis::CalcDeletionScore(const Sentence& sourceSentence, const WordsRange& sourceWordsRange, const WordDeletionTable& wordDeletionTable)
+{
+	m_score[ScoreType::DeletedWords] =
+		wordDeletionTable.GetDeletionCost(sourceSentence.GetSubString(sourceWordsRange));
+}
+
+
+/***
+ * calculate the logarithm of our total translation score (sum up components)
+ */
+void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &futureScore, const Sentence &source) 
+{
+	// DISTORTION COST
+	CalcDistortionScore();
 	
 	// LANGUAGE MODEL COST
-	CalcLMScore(lmListInitial, lmListEnd);
+	CalcLMScore(staticData.GetLanguageModel(Initial), staticData.GetLanguageModel(Other));
 
 	// WORD PENALTY
 	m_score[ScoreType::WordPenalty] = - (float) GetSize();
 
 	// FUTURE COST
 	CalcFutureScore(futureScore);
+	
+	//LEXICAL REORDERING COST
+	CalcLexicalReorderingScore();
 
-
-  //LEXICAL REORDERING COST
-  CalcLexicalReorderingScore();
+	//cost for deleting source words
+	if (m_wordDeleted)
+	{
+		CalcDeletionScore(source, GetCurrSourceWordsRange(), staticData.GetWordDeletionTable());
+	}
 
 	// TOTAL COST
 	m_score[ScoreType::Total] = m_score[ScoreType::PhraseTrans]
 								+ m_score[ScoreType::Generation]			
 								+ m_score[ScoreType::LanguageModelScore]
-								+ m_score[ScoreType::Distortion]					* weightDistortion
-								+ m_score[ScoreType::WordPenalty]				* weightWordPenalty
-								+ m_score[ScoreType::FutureScoreEnum]
-								+ m_score[ScoreType::LexicalReordering];
+								+ m_score[ScoreType::Distortion]					* staticData.GetWeightDistortion()
+								+ m_score[ScoreType::WordPenalty]					* staticData.GetWeightWordPenalty()
+								+ m_score[ScoreType::DeletedWords]				* staticData.GetWordDeletionWeight()
+								+ m_score[ScoreType::FutureScoreEnum];
 }
 
 void Hypothesis::CalcFutureScore(const SquareMatrix &futureScore)
@@ -498,7 +544,7 @@ const Hypothesis* Hypothesis::GetPrevHypo()const{
 }
 
 /**
- * prints hypothesis information for pharaoh style logging
+ * print hypothesis information for pharaoh-style logging
  */
 void Hypothesis::PrintHypothesis(const Sentence &source, float weightDistortion, float weightWordPenalty) const{
 	int start = m_prevHypo->m_currSourceWordsRange.GetEndPos() -1;
@@ -519,10 +565,10 @@ void Hypothesis::PrintHypothesis(const Sentence &source, float weightDistortion,
 	cout<<" )"<<endl;
 	cout<<"\tbase score "<<m_prevHypo->m_score[ScoreType::Total]<<endl;
 	cout<<"\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "<< source.GetSubString(m_currSourceWordsRange)  <<endl;
-	cout<<"\ttranslated as: "<<m_phrase<<" => translation cost "<<m_score[ScoreType::PhraseTrans]<<endl;
+	cout<<"\ttranslated as: "<<m_targetPhrase<<" => translation cost "<<m_score[ScoreType::PhraseTrans]<<endl;
 	cout<<"\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange()) << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
 	cout<<"\tlanguage model cost "<<m_score[ScoreType::LanguageModelScore]<<endl;
-	cout<<"\tword penalty "<<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
+	cout<<"\tword penalty "<<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<< "\tdeletion cost "<<m_score[ScoreType::DeletedWords] << endl;
 	cout<<"\tscore "<<m_score[ScoreType::Total] - m_score[ScoreType::FutureScoreEnum]<<" + future cost "<<m_score[ScoreType::FutureScoreEnum]<<" = "<<m_score[ScoreType::Total]<<endl;
 	//PrintLMScores();
 }
@@ -533,23 +579,19 @@ void Hypothesis::PrintHypothesis(const Sentence &source, float weightDistortion,
 ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
 {	
 	hypothesis.ToStream(out);
-	
 	// words bitmap
-		
 	out << "[" << hypothesis.m_sourceCompleted << "] ";
 	
-	
-		out << " [" << hypothesis.GetScore( static_cast<ScoreType::ScoreType>(0));
-		for (size_t i = 1 ; i < NUM_SCORES ; i++)
-			{
-				out << "," << hypothesis.GetScore( static_cast<ScoreType::ScoreType>(i));
-			}
-		out << "]";
+	// scores
+	out << " [" << hypothesis.GetScore( static_cast<ScoreType::ScoreType>(0));
+	for (size_t i = 1 ; i < NUM_SCORES ; i++)
+	{
+		out << "," << hypothesis.GetScore( static_cast<ScoreType::ScoreType>(i));
+	}
+	out << "]";
 #ifdef N_BEST
-		out << " " << hypothesis.GetScoreComponent();
-		out << " " << hypothesis.GetGenerationScoreComponent();
+	out << " " << hypothesis.GetScoreComponent();
+	out << " " << hypothesis.GetGenerationScoreComponent();
 #endif
-
-	
 	return out;
 }
diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h
index b7082e9df..21dcaa777 100755
--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@@ -36,7 +36,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "ScoreComponentCollection.h"
 
 class SquareMatrix;
+class StaticData;
 class TranslationOption;
+class Sentence;
+class WordsRange;
+class WordDeletionTable;
 
 class Hypothesis : public LatticeEdge
 {
@@ -47,65 +51,82 @@ protected:
 		//				of those in dictionary
 	WordsBitmap				m_sourceCompleted;
 	WordsRange				m_currSourceWordsRange, m_currTargetWordsRange;
+  bool							m_wordDeleted;
 #ifdef N_BEST
 	std::list<Arc*>		m_arcList; //all arcs that end at the same lattice point as we do
 #endif
 
 	/***
+	 * Used for initializing translation process
+	 */
+	Hypothesis(const Phrase &phrase, const WordsBitmap &initialCoverage);
+	// create next
+	Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
+
+	/***
 	 * \return whether none of the factors clash
+	 * \param phrase TODO ???
 	 */
 	bool IsCompatible(const Phrase &phrase) const;
 	
 	void CalcFutureScore(const SquareMatrix &futureScore);
 	//void CalcFutureScore(float futureScore[256][256]);
 	void CalcLMScore(const LMList		&lmListInitial, const LMList	&lmListEnd);
+	void CalcDistortionScore();
 	//TODO: add appropriate arguments to score calculator
-  void CalcLexicalReorderingScore();
+	void CalcLexicalReorderingScore();
+  void CalcDeletionScore(const Sentence& sourceSentence, const WordsRange& sourceWordsRange, const WordDeletionTable& wordDeletionTable);
 
 public:
 
-	static int s_numNodes;
-	int m_id;	
-
+	static int s_numNodes; //TODO what is this?
+	int m_id;
+	
 	/***
 	 * Deep copy
 	 */
-	Hypothesis(const Hypothesis &copy); 
+	Hypothesis(const Hypothesis &copy);
+	
+	/***
+	 * return the subclass of Hypothesis most appropriate to the given translation option
+	 */
+	static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt);
+	/***
+	 * return the subclass of Hypothesis most appropriate to the given target phrase
+	 */
+	static Hypothesis* Create(const Phrase& targetPhrase, const WordsBitmap &initialCoverage);
 
-	// used to create clone
-	Hypothesis(const Phrase &phrase, const WordsBitmap &initialCoverage);
-		// used for initial seeding of trans process
-	Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
-		// create next
 	~Hypothesis();
-	inline Hypothesis *Clone() const
-	{
-		return new Hypothesis(*this);
-	}
-
-	Hypothesis *CreateNext(const TranslationOption &transOpt) const;
 	
-	Hypothesis *MergeNext(const TranslationOption &transOpt) const;
-
-	int GetId()const;
-	void PrintHypothesis(  const Sentence &source, float weightDistortion, float weightWordPenalty) const;
+	/***
+	 * return the subclass of Hypothesis most appropriate to the given translation option
+	 */
+	Hypothesis* CreateNext(const TranslationOption &transOpt) const;
+	/***
+	 * if any factors aren't set in our target phrase but are present in transOpt, copy them over
+	 * (unless the factors that we do have fail to match the corresponding ones in transOpt,
+	 *  in which case presumably there's a programmer's error)
+	 * 
+	 * return NULL if we aren't compatible with the given option
+	 */
+	Hypothesis* MergeNext(const TranslationOption &transOpt) const;
+	
+	virtual void PrintHypothesis(  const Sentence &source, float weightDistortion, float weightWordPenalty) const;
  // void PrintLMScores(const LMList &lmListInitial, const LMList	&lmListEnd) const;
 	inline const WordsRange &GetCurrSourceWordsRange() const
 	{
 		return m_currSourceWordsRange;
 	}
-	inline size_t GetCurrTargetLength() const
+	
+	// subsequent translation should only translate this sub-phrase
+	virtual size_t GetCurrTargetLength() const
 	{
 		return m_currTargetWordsRange.GetWordsCount();
 	}
-	// subsequent translation should only translate this sub-phrase
 
-	void CalcScore(const LMList &lmListInitial
-							, const LMList &lmListEnd
-							, float weightDistortion
-							, float weightWordPenalty
-							, const SquareMatrix &futureScore
-							, const Sentence &source) ;
+	virtual void CalcScore(const StaticData& staticData, const SquareMatrix &futureScore, const Sentence &source);
+
+	int GetId() const;
 
 	const Hypothesis* GetPrevHypo() const;
 
@@ -116,34 +137,34 @@ public:
 	}
 	inline const Phrase &GetPhrase() const
 	{
-		return m_phrase;
+		return m_targetPhrase;
 	}
 
 	// curr
 	inline FactorArray &GetCurrFactorArray(size_t pos)
 	{
-		return m_phrase.GetFactorArray(pos);
+		return m_targetPhrase.GetFactorArray(pos);
 	}
 	inline const FactorArray &GetCurrFactorArray(size_t pos) const
 	{
-		return m_phrase.GetFactorArray(pos);
+		return m_targetPhrase.GetFactorArray(pos);
 	}
 	inline const Factor *GetCurrFactor(size_t pos, FactorType factorType) const
 	{
-		return m_phrase.GetFactor(pos, factorType);
+		return m_targetPhrase.GetFactor(pos, factorType);
 	}
 	// recursive
 	inline const FactorArray &GetFactorArray(size_t pos) const
 	{
 		if (pos < m_currTargetWordsRange.GetStartPos())
 			return m_prevHypo->GetFactorArray(pos);
-		return m_phrase.GetFactorArray(pos - m_currTargetWordsRange.GetStartPos());
+		return m_targetPhrase.GetFactorArray(pos - m_currTargetWordsRange.GetStartPos());
 	}
-	inline const Factor *GetFactor(size_t pos, FactorType factorType) const
+	inline const Factor* GetFactor(size_t pos, FactorType factorType) const
 	{
 		if (pos < m_currTargetWordsRange.GetStartPos())
 			return m_prevHypo->GetFactor(pos, factorType);
-		return m_phrase.GetFactor(pos - m_currTargetWordsRange.GetStartPos(), factorType);
+		return m_targetPhrase.GetFactor(pos - m_currTargetWordsRange.GetStartPos(), factorType);
 	}
 
 	/***
diff --git a/moses/src/LanguageModel.cpp b/moses/src/LanguageModel.cpp
index d2f0e7647..4f393a877 100755
--- a/moses/src/LanguageModel.cpp
+++ b/moses/src/LanguageModel.cpp
@@ -40,6 +40,9 @@ const LmId LanguageModel::UNKNOWN_LM_ID(0);
 LanguageModel::LanguageModel() {}
 LanguageModel::~LanguageModel() {}
 
+/***
+ * ngramComponent should be an invalid pointer iff n-best ranking is turned off
+ */
 void LanguageModel::CalcScore(const Phrase &phrase
 														, float &fullScore
 														, float &ngramScore) const
@@ -79,4 +82,3 @@ void LanguageModel::CalcScore(const Phrase &phrase
 	}
 	fullScore += ngramScore;
 }
-
diff --git a/moses/src/LanguageModel.h b/moses/src/LanguageModel.h
index 39eabef45..5db15c977 100755
--- a/moses/src/LanguageModel.h
+++ b/moses/src/LanguageModel.h
@@ -63,6 +63,9 @@ public:
 	{
 		return m_sentenceEnd;
 	}
+	/***
+	 * ngramComponent should be an invalid pointer iff n-best ranking is turned off
+	 */
 	void CalcScore(const Phrase &phrase
 							, float &fullScore
 							, float &ngramScore) const;
diff --git a/moses/src/LatticeEdge.cpp b/moses/src/LatticeEdge.cpp
index 9484ca3b6..b2f796b5e 100755
--- a/moses/src/LatticeEdge.cpp
+++ b/moses/src/LatticeEdge.cpp
@@ -19,6 +19,8 @@ License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
 
+#include <cstring> // memset
+
 #include "LatticeEdge.h"
 #include "LanguageModel.h"
 
@@ -30,10 +32,7 @@ LatticeEdge::~LatticeEdge()
 
 void LatticeEdge::ResetScore()
 {
-	for (size_t i = 0 ; i < NUM_SCORES ; i++)
-	{
-		m_score[i]	= 0;
-	}
+  std::memset(m_score, 0, sizeof(float) * NUM_SCORES);
 }
 
 #ifdef N_BEST
diff --git a/moses/src/LatticeEdge.h b/moses/src/LatticeEdge.h
index 31026031b..d254d6f00 100755
--- a/moses/src/LatticeEdge.h
+++ b/moses/src/LatticeEdge.h
@@ -21,6 +21,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 #pragma once
 
+#include <cstring> //memcpy()
 #include <iostream>
 #include <list>
 #include "TypeDef.h"
@@ -43,8 +44,8 @@ protected:
 	// scores
 	float						m_score[NUM_SCORES];
 
-	const Hypothesis *m_prevHypo;
-	Phrase					m_phrase; //target phrase being created at the current decoding step
+	const Hypothesis* m_prevHypo;
+	Phrase					m_targetPhrase; //target phrase being created at the current decoding step
 
 #ifdef N_BEST
 	ScoreComponentCollection	m_transScoreComponent;
@@ -54,14 +55,14 @@ protected:
 
 public:
 	LatticeEdge(const LatticeEdge &copy); // not implemented
-	LatticeEdge(const float 												score[NUM_SCORES]
+	LatticeEdge(const float 												score[]
 						, const ScoreComponentCollection 	&transScoreComponent
 						, const ScoreColl					 						&lmScoreComponent
 						, const ScoreColl											&generationScoreComponent
 						, const Phrase 												&phrase
 						, const Hypothesis 										*prevHypo)
 		:m_prevHypo(prevHypo)
-		,m_phrase(phrase)
+		,m_targetPhrase(phrase)
 #ifdef N_BEST
 		,m_transScoreComponent(transScoreComponent)
 		,m_generationScoreComponent(generationScoreComponent)
@@ -72,25 +73,25 @@ public:
 	}
 	LatticeEdge(FactorDirection direction, const Hypothesis *prevHypo)
 		:m_prevHypo(prevHypo)
-		,m_phrase(direction)
+		,m_targetPhrase(direction)
 	{}
 
 	virtual ~LatticeEdge();
 
-	inline const Phrase &GetPhrase() const
+	inline const Phrase &GetTargetPhrase() const
 	{
-		return m_phrase;
+		return m_targetPhrase;
 	}
 	inline void SetFactor(size_t pos, FactorType factorType, const Factor *factor)
 	{ // pos starts from current phrase, not from beginning of 1st phrase
-		m_phrase.SetFactor(pos, factorType, factor);
+		m_targetPhrase.SetFactor(pos, factorType, factor);
 	}
-	inline void SetScore(const float score[NUM_SCORES])
+	/***
+	 * score should be of length NUM_SCORES
+	 */
+	inline void SetScore(const float score[])
 	{
-		for (size_t currScore = 0 ; currScore < NUM_SCORES ; currScore++)
-		{
-			m_score[currScore] = score[currScore];
-		}
+		std::memcpy(m_score, score, NUM_SCORES * sizeof(float));
 	}
 	void ResetScore();
 
@@ -130,7 +131,7 @@ public:
 
 inline std::ostream& operator<<(std::ostream& out, const LatticeEdge& edge)
 {
-	out << edge.GetPhrase();
+	out << edge.GetTargetPhrase();
 	return out;
 }
 
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index 68380260d..acb560f65 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -2,18 +2,19 @@ lib_LIBRARIES = libmoses.a
 libmoses_a_SOURCES = \
 	Arc.cpp \
 	ConfusionNet.cpp \
-	FactorCollection.cpp \
+	DeletionHypothesis.cpp \
 	Factor.cpp \
+	FactorCollection.cpp \
 	FactorTypeSet.cpp \
 	GenerationDictionary.cpp \
+	Hypothesis.cpp \
 	HypothesisCollection.cpp \
 	HypothesisCollectionIntermediate.cpp \
-	Hypothesis.cpp \
 	InputFileStream.cpp \
 	LanguageModel.cpp \
 	LatticeEdge.cpp \
 	LatticePath.cpp \
-  LexicalReordering.cpp \
+	LexicalReordering.cpp \
 	Manager.cpp \
 	md5.cpp \
 	Parameter.cpp \
@@ -26,11 +27,13 @@ libmoses_a_SOURCES = \
 	TargetPhrase.cpp \
 	TranslationOption.cpp \
 	TranslationOptionCollection.cpp \
-	UserMessage.cpp \
-	Util.cpp \
-	WeightOptimization.cpp \
-	Word.cpp \
-	WordsBitmap.cpp \
+	UnknownWordHandler.cpp \
+ 	UserMessage.cpp \
+ 	Util.cpp \
+ 	WeightOptimization.cpp \
+ 	Word.cpp \
+ 	WordDeletionTable.cpp \
+ 	WordsBitmap.cpp \
 	WordsRange.cpp
 
 if INTERNAL_LM
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
index a72337140..827e4b806 100755
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@@ -77,7 +77,7 @@ void Manager::ProcessSentence()
 
 	// seed hypothesis
 	{
-	Hypothesis *hypo = new Hypothesis(m_source, m_possibleTranslations.GetInitialCoverage());
+	Hypothesis *hypo = Hypothesis::Create(m_source, m_possibleTranslations.GetInitialCoverage());
 	TRACE_ERR(m_possibleTranslations.GetInitialCoverage().GetWordsCount() << endl);
 #ifdef N_BEST
 	LMList allLM = m_staticData.GetAllLM();
@@ -176,11 +176,7 @@ void Manager::ProcessOneHypothesis(const list < DecodeStep > &decodeStepList, co
 	{
 		Hypothesis *hypo = *iterHypo;
 
-		hypo->CalcScore(m_staticData.GetLanguageModel(Initial)
-									, m_staticData.GetLanguageModel(Other)
-									, m_staticData.GetWeightDistortion()
-									, m_staticData.GetWeightWordPenalty()
-									, m_possibleTranslations.GetFutureScore(), m_source);
+		hypo->CalcScore(m_staticData, m_possibleTranslations.GetFutureScore(), m_source);
 		if(m_staticData.GetVerboseLevel() > 2) 
 		{			
 			hypo->PrintHypothesis(m_source, m_staticData.GetWeightDistortion(), m_staticData.GetWeightWordPenalty());
@@ -207,7 +203,6 @@ void Manager::ProcessOneHypothesis(const list < DecodeStep > &decodeStepList, co
 			++iterHypo;
 		}
 	}
-
 }
 
 void Manager::ProcessInitialTranslation(const Hypothesis &hypothesis, const DecodeStep &decodeStep, HypothesisCollectionIntermediate &outputHypoColl)
@@ -222,7 +217,7 @@ void Manager::ProcessInitialTranslation(const Hypothesis &hypothesis, const Deco
 
 			if ( !transOpt.Overlap(hypothesis)) 
 			{
-				Hypothesis *newHypo = hypothesis.CreateNext(transOpt);
+				Hypothesis* newHypo = hypothesis.CreateNext(transOpt);
 				outputHypoColl.AddNoPrune( newHypo );			
 			}
 		}
@@ -276,9 +271,6 @@ void Manager::ProcessInitialTranslation(const Hypothesis &hypothesis, const Deco
 			}
 		}
 	}
-
-
-
 }
 
 void Manager::ProcessTranslation(const Hypothesis &hypothesis, const DecodeStep &decodeStep, HypothesisCollectionIntermediate &outputHypoColl)
@@ -398,8 +390,8 @@ void Manager::CreateTranslationOptions(const Phrase &phrase, PhraseDictionary &p
 				/*
 				 * changed to have an extendable unknown-word translation module -- EVH
 				 */
-				//std::list<TranslationOption> unknownWordTranslations = m_staticData.GetUnknownWordHandler().GetPossibleTranslations(wordsRange, sourcePhrase, m_staticData, phraseDictionary);
-				//m_possibleTranslations.insert(m_possibleTranslations.end(), unknownWordTranslations.begin(), unknownWordTranslations.end());
+//				boost::shared_ptr<std::list<TranslationOption> > unknownWordTranslations = m_staticData.GetUnknownWordHandler()->GetPossibleTranslations(wordsRange, sourcePhrase, m_staticData, phraseDictionary);
+//				m_possibleTranslations.insert(m_possibleTranslations.end(), unknownWordTranslations->begin(), unknownWordTranslations->end());
 			}
 		}
 	}
@@ -443,12 +435,10 @@ void Manager::CreateTranslationOptions(const Phrase &phrase, PhraseDictionary &p
 
 			//print information about future cost table when verbose option is set
 
-
 			if(m_staticData.GetVerboseLevel() > 2) 
-				{		
-					cout<<"future cost from "<<start<<" to "<<end<<" is "<<score[length]<<endl;
-				}
-
+			{		
+				cout<<"future cost from "<<start<<" to "<<end<<" is "<<score[length]<<endl;
+			}
 		}
 	}
 }
@@ -542,7 +532,7 @@ void Manager::ProcessGeneration(const Hypothesis &hypothesis
 		}
 
 		// merge with existing hypothesis
-		Hypothesis *mergeHypo = hypothesis.Clone();
+		Hypothesis *mergeHypo = new Hypothesis(hypothesis);
 		mergeHypo->MergeFactors(mergeWords, generationDictionary, generationScore, weight);
 		outputHypoColl.AddNoPrune(mergeHypo);
 
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
index b5b856579..f12ed72cc 100755
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@@ -48,6 +48,7 @@ Parameter::Parameter()
 	AddParam("weight-l");
 	AddParam("weight-t");
 	AddParam("weight-w");
+	AddParam("weight-e"); //source word deletion overall weight
 	AddParam("weight-generation");
 	AddParam("mapping");
 	AddParam("n-best-list");
@@ -224,6 +225,7 @@ bool Parameter::LoadParam(int argc, char* argv[])
 	OverwriteParam("-lm", "weight-l", argc, argv);
 	OverwriteParam("-tm", "weight-t", argc, argv);
 	OverwriteParam("-w", "weight-w", argc, argv);
+	OverwriteParam("-e", "weight-e", argc, argv);
 	OverwriteParam("-g", "weight-generation", argc, argv);
 	OverwriteParam("-n-best-list", "n-best-list", argc, argv);
 	OverwriteParam("-s", "stack", argc, argv);
diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp
index d3e2e1840..2afcce5b7 100755
--- a/moses/src/PhraseDictionary.cpp
+++ b/moses/src/PhraseDictionary.cpp
@@ -66,22 +66,32 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
 	string line, prevSourcePhrase = "";
 	bool addPhrase = !filter;
 	size_t count = 0;
+  size_t line_num = 0;
 	while(getline(inFile, line)) 
 	{
-		vector<string> token = TokenizeMultiCharSeparator( line , "|||" );
-		
+    ++line_num;
+		vector<string> tokens = TokenizeMultiCharSeparator( line , "|||" );
+		if (tokens.size() != 3)
+		{
+			TRACE_ERR("Syntax error at " << filePath << ":" << line_num);
+			abort(); // TODO- error handling
+		}
+    if (tokens[1].find_first_not_of(" \t", 0) == string::npos) {
+      TRACE_ERR(filePath << ":" << line_num << ": phrase contains empty target, skipping\n");
+			continue;
+    }
 		if (!filter)
 		{
-			if (token[0] != prevSourcePhrase)
-				phraseVector = Phrase::Parse(token[0]);
+			if (tokens[0] != prevSourcePhrase)
+				phraseVector = Phrase::Parse(tokens[0]);
 		}
-		else if (token[0] == prevSourcePhrase)
+		else if (tokens[0] == prevSourcePhrase)
 		{ // same source phrase as prev line.
 		}
 		else
 		{
-			phraseVector = Phrase::Parse(token[0]);
-			prevSourcePhrase = token[0];
+			phraseVector = Phrase::Parse(tokens[0]);
+			prevSourcePhrase = tokens[0];
 
 			if (Contains(phraseVector, inputPhraseList, input))
 				addPhrase = true;
@@ -91,7 +101,7 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
 
 		if (addPhrase)
 		{
-			vector<float> scoreVector = Tokenize<float>(token[2]);
+			vector<float> scoreVector = Tokenize<float>(tokens[2]);
 			assert(scoreVector.size() == m_noScoreComponent);
 			
 			// source
@@ -99,7 +109,7 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
 			sourcePhrase.CreateFromString( input, phraseVector, factorCollection);
 			//target
 			TargetPhrase targetPhrase(Output, this);
-			targetPhrase.CreateFromString( output, token[1], factorCollection);
+			targetPhrase.CreateFromString( output, tokens[1], factorCollection);
 
 			// component score, for n-best output
 			targetPhrase.SetScore(scoreVector, weight, languageModels, weightWP);
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index f0edc1213..91e02f4c5 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -91,9 +91,17 @@ bool StaticData::LoadParameters(int argc, char* argv[])
 	//input-factors
 	const vector<string> &inputFactorVector = m_parameter.GetParam("input-factors");
 	for(size_t i=0; i<inputFactorVector.size(); i++) 
-  {
+	{
 		m_inputFactorOrder.push_back(Scan<FactorType>(inputFactorVector[i]));
 	}
+	
+	//source word deletion
+	if(m_parameter.GetParam("dtable-file").size() > 0)
+	{
+		m_wordDeletionWeight = Scan<float>(m_parameter.GetParam("weight-e")[0]);
+		m_wordDeletionEnabled = true;
+		if (GetVerboseLevel() > 0) { std::cerr << "Word deletion enabled." << std::endl; }
+	} else { m_wordDeletionEnabled = false; }
 
 	// load Lexical Reordering model
 	// check to see if the lexical reordering parameter exists
@@ -281,6 +289,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
 		: TransformScore(DEFAULT_BEAM_THRESHOLD);
 
 	// Unknown Word Processing -- wade
+	//TODO replace this w/general word dropping -- EVH
 	if (m_parameter.GetParam("drop-unknown").size() == 1)
 	  { m_dropUnknown = Scan<size_t>( m_parameter.GetParam("drop-unknown")[0]); }
 	else
@@ -413,6 +422,7 @@ void StaticData::LoadPhraseTables(bool filter
 		for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) 
 		{
 			vector<string>			token		= Tokenize(translationVector[currDict]);
+			//characteristics of the phrase table
 			vector<FactorType> 	input		= Tokenize<FactorType>(token[0], ",")
 													,output	= Tokenize<FactorType>(token[1], ",");
 			string							filePath= token[3];
@@ -467,7 +477,13 @@ void StaticData::LoadPhraseTables(bool filter
 			timer.check("Finished loading PhraseTable");
 		}
 	}
-
+/*
+	//load word deletion table
+	if(m_parameter.GetParam("dtable-file").size() > 0)
+	{
+		m_wordDeletionTable.Load(m_parameter.GetParam("dtable-file")[0], *this);
+	}
+*/
 	timer.check("Finished loading phrase tables");
 }
 
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index 4e2a32ec1..0ab55b46a 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include <list>
 #include <vector>
+#include <boost/shared_ptr.hpp>
 #include "TypeDef.h"
 #include "PhraseDictionary.h"
 #include "GenerationDictionary.h"
@@ -33,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "InputOutput.h"
 #include "DecodeStep.h"
 //#include "UnknownWordHandler.h"
+#include "WordDeletionTable.h"
 
 class StaticData
 {
@@ -40,16 +42,17 @@ protected:
 	FactorCollection										m_factorCollection;
 	std::vector<PhraseDictionary*>			m_phraseDictionary;
 	std::vector<GenerationDictionary*>	m_generationDictionary;
+	WordDeletionTable m_wordDeletionTable;
 	std::list < DecodeStep >						m_decodeStepList;
-	Parameter														m_parameter;
-	std::vector<FactorType>							m_inputFactorOrder;
-	std::vector<LMList>									m_languageModel;
+	Parameter			m_parameter;
+	std::vector<FactorType>			m_inputFactorOrder;
+//	boost::shared_ptr<UnknownWordHandler>      m_unknownWordHandler; //defaults to NULL; pointer allows polymorphism
+	std::vector<LMList>			m_languageModel;
 	LexicalReordering                   *m_lexReorder;
-//	UnknownWordHandler						      m_unknownWordHandler; //defaults to NULL; pointer allows polymorphism
 		// Initial	= 0 = can be used when creating poss trans
 		// Other		= 1 = used to calculate LM score once all steps have been processed
 	float																m_beamThreshold
-																			,m_weightDistortion, m_weightWordPenalty;
+																			,m_weightDistortion, m_weightWordPenalty, m_wordDeletionWeight;
 									// PhraseTrans, Generation & LanguageModelScore has multiple weights.
 	int																	m_maxDistortion;
 									// do it differently from old pharaoh
@@ -61,16 +64,28 @@ protected:
 	std::vector<std::string>						m_mySQLParam;
 	InputOutput													*m_inputOutput;
 	bool                                m_fLMsLoaded;
-	int m_dropUnknown;
-	
+	/***
+	 * false = treat unknown words as proper nouns, and translate them as themselves;
+	 * true = drop (ignore) them
+	 */
+	bool m_dropUnknown;
+	bool m_wordDeletionEnabled;
+		
 	size_t m_verboseLevel;
 
 public:
 	StaticData();
 	~StaticData();
 
+	/***
+	 * also initialize the Parameter object
+	 */
 	bool LoadParameters(int argc, char* argv[]);
 
+	/***
+	 * load not only the main phrase table but also any auxiliary tables that depend on which features are being used
+	 * (eg word-deletion, word-insertion tables)
+	 */
 	void LoadPhraseTables(bool filter
 											, const std::string &inputFileHash
 											, const std::list< Phrase > &inputPhraseList);
@@ -79,7 +94,7 @@ public:
 		LoadPhraseTables(false, "", std::list< Phrase >());
 	}
 	void LoadMapping();
-/*	void SetUnknownWordHandler(UnknownWordHandler &unknownWordHandler)
+/*	void SetUnknownWordHandler(boost::shared_ptr<UnknownWordHandler> unknownWordHandler)
 	{
 		m_unknownWordHandler = unknownWordHandler;
 	}
@@ -103,17 +118,17 @@ public:
 	{
 		return m_decodeStepList;
 	}
-
-  inline int GetDropUnknown() const 
-  { 
-  	return m_dropUnknown; 
-  }
+	
+	inline bool GetDropUnknown() const 
+	{ 
+		return m_dropUnknown; 
+	}
 /*	
-	UnknownWordHandler &GetUnknownWordHandler()
+	boost::shared_ptr<UnknownWordHandler> GetUnknownWordHandler()
 	{
 		return m_unknownWordHandler;
 	}
-*/	
+*/
 	FactorCollection &GetFactorCollection()
 	{
 		return m_factorCollection;
@@ -131,6 +146,14 @@ public:
 	{
 		return m_weightWordPenalty;
 	}
+	float GetWordDeletionWeight() const
+	{
+		return m_wordDeletionWeight;
+	}
+	bool LittleChrisAsksWhetherWordDeletionIsEnabledAndWeAnswerHim() const
+	{
+		return m_wordDeletionEnabled;
+	}
 	size_t GetMaxHypoStackSize() const
 	{
 		return m_maxHypoStackSize;
@@ -168,6 +191,13 @@ public:
 	{
 		return m_cachePath;
 	}
+	/***
+	 * only call this if word deletion is enabled
+	 */
+	const WordDeletionTable& GetWordDeletionTable() const
+	{
+		return m_wordDeletionTable;
+	}
 
 	size_t GetVerboseLevel() const
 	{
diff --git a/moses/src/TargetPhrase.cpp b/moses/src/TargetPhrase.cpp
index 7a5e22476..bdd54c827 100644
--- a/moses/src/TargetPhrase.cpp
+++ b/moses/src/TargetPhrase.cpp
@@ -34,11 +34,12 @@ TargetPhrase::TargetPhrase(FactorDirection direction, const PhraseDictionary *ph
 {
 }
 
+// used when creating translations of unknown words:
 // TODO the two versions of SetScore have two problems:
 //  1) they are badly named- computePhraseScores would probably be better
 //  2) they duplicate way too much code between them
 void TargetPhrase::SetScore(const LMList &languageModels, float weightWP)
-{ // used when creating translations of unknown words:
+{
 	m_transScore = m_ngramScore = 0;	
 	m_fullScore = weightWP;
 	
@@ -54,16 +55,14 @@ void TargetPhrase::SetScore(const LMList &languageModels, float weightWP)
 	
 			float fullScore, nGramScore;
 	
-			#ifdef N_BEST
-					(*lmIter)->CalcScore(*this, fullScore, nGramScore);
-          size_t lmId = (*lmIter)->GetId();
-          pair<size_t, float> store(lmId, nGramScore);
-          m_ngramComponent.push_back(store);
-			#else
-			    // this is really, really ugly (a reference to an object at NULL
-			    // is asking for trouble). TODO
-					(*lmIter)->CalcScore(*this, fullScore, nGramScore);
-			#endif
+#ifdef N_BEST
+			(*lmIter)->CalcScore(*this, fullScore, nGramScore);
+			size_t lmId = (*lmIter)->GetId();
+			pair<size_t, float> store(lmId, nGramScore);
+			m_ngramComponent.push_back(store);
+#else
+			(*lmIter)->CalcScore(*this, fullScore, nGramScore);
+#endif
 	
 			m_fullScore   += fullScore * weightLM;
 			m_ngramScore	+= nGramScore * weightLM;
@@ -104,19 +103,16 @@ void TargetPhrase::SetScore(const vector<float> &scoreVector, const vector<float
 			float fullScore, nGramScore;
 #ifdef N_BEST
 			lm.CalcScore(*this, fullScore, nGramScore);
-      size_t lmId = lm.GetId();
-      pair<size_t, float> store(lmId, nGramScore);
-      m_ngramComponent.push_back(store);
+			size_t lmId = lm.GetId();
+			pair<size_t, float> store(lmId, nGramScore);
+			m_ngramComponent.push_back(store);
 #else
-	    // this is really, really ugly (a reference to an object at NULL
-	    // is asking for trouble). TODO
 			lm.CalcScore(*this, fullScore, nGramScore);
 #endif
 	
 			// total LM score so far
 			totalNgramScore  += nGramScore * weightLM;
 			totalFullScore   += fullScore * weightLM;
-			
 		}
 	}
   m_ngramScore = totalNgramScore;
diff --git a/moses/src/TargetPhrase.h b/moses/src/TargetPhrase.h
index 2c403c890..c9a19d9fb 100644
--- a/moses/src/TargetPhrase.h
+++ b/moses/src/TargetPhrase.h
@@ -29,30 +29,18 @@ class PhraseDictionary;
 
 class TargetPhrase: public Phrase
 {
-  friend std::ostream& operator<<(std::ostream&, const TargetPhrase&);
+	friend std::ostream& operator<<(std::ostream&, const TargetPhrase&);
 protected:
 	float m_transScore, m_ngramScore, m_fullScore;
 #ifdef N_BEST
 	ScoreComponent m_scoreComponent;
-	std::list< std::pair<size_t, float> > m_lmScoreComponent;
-	std::list< std::pair<size_t, float> > m_ngramComponent;
+	std::vector< std::pair<size_t, float> > m_lmScoreComponent;
+	std::vector< std::pair<size_t, float> > m_ngramComponent;
 #endif
 
 public:
 
 	TargetPhrase(FactorDirection direction, const PhraseDictionary *phraseDictionary);
-	
-	/***
-	 * Deep copy
-	 *
-	TargetPhrase(const TargetPhrase& phrase)
-	: Phrase(phrase.GetDirection()), m_transScore(phrase.m_transScore), m_ngramScore(phrase.m_ngramScore), m_fullScore(phrase.m_fullScore)
-#ifdef N_BEST
-	, m_scoreComponent(phrase.m_scoreComponent), m_lmScoreComponent(phrase.m_lmScoreComponent), m_ngramComponent(phrase.m_ngramComponent)
-#endif
-	{
-	}
-	*/
 
 	void SetScore(const std::vector<float> &scoreVector, const std::vector<float> &weightT,
 								const LMList &languageModels, float weightWP);
@@ -61,33 +49,39 @@ public:
 	void ResetScore();
 	void SetWeights(const std::vector<float> &weightT);
 
-  inline float GetTranslationScore() const
-  {
-    return m_transScore;
-  }
-  //TODO is this really the best name?
-  inline float GetFutureScore() const
-  {
-    return m_fullScore;
-  }
-  inline float GetNgramScore() const
-  {
-    return m_ngramScore;
-  }
+	inline float GetTranslationScore() const
+	{
+		return m_transScore;
+	}
+  /***
+   * return the estimated score resulting from our being added to a sentence
+   * (it's an estimate because we don't have full n-gram info for the language model
+   *  without using the (unknown) full sentence)
+   * 
+   * TODO is this really the best name?
+   */
+	inline float GetFutureScore() const
+	{
+		return m_fullScore;
+	}
+	inline float GetNgramScore() const
+	{
+		return m_ngramScore;
+	}
 
 #ifdef N_BEST
 	inline const ScoreComponent &GetScoreComponents() const
 	{
 		return m_scoreComponent;
 	}
-  inline const std::list< std::pair<size_t, float> > &GetLMScoreComponent() const
-  {
-    return m_lmScoreComponent;
-  }
-  inline const std::list< std::pair<size_t, float> > &GetNgramComponent() const
-  {
-    return m_ngramComponent;
-  }
+	inline const std::vector< std::pair<size_t, float> > &GetLMScoreComponent() const
+	{
+		return m_lmScoreComponent;
+	}
+	inline const std::vector< std::pair<size_t, float> > &GetNgramComponent() const
+	{
+		return m_ngramComponent;
+	}
 #endif
 
 };
diff --git a/moses/src/TranslationOption.h b/moses/src/TranslationOption.h
index 03cfde5a4..57b675e65 100755
--- a/moses/src/TranslationOption.h
+++ b/moses/src/TranslationOption.h
@@ -30,7 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 /***
  * Specify source and target words for a possible translation. m_targetPhrase points to a phrase-table entry.
- * The source word range is zero-indexed, so it can't refer to an empty range.
+ * The source word range is zero-indexed, so it can't refer to an empty range. The target phrase may be empty.
  */
 class TranslationOption
 {
@@ -48,26 +48,48 @@ public:
 	TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase);
 
 	bool Overlap(const Hypothesis &hypothesis) const;
+	/***
+	 * return start index of source phrase
+	 */
 	inline size_t GetStartPos() const
 	{
 		return m_sourceWordsRange.GetStartPos();
 	}
+	/***
+	 * return end index of source phrase
+	 */
 	inline size_t GetEndPos() const
 	{
 		return m_sourceWordsRange.GetEndPos();
 	}
+	/***
+	 * return length of source phrase
+	 */
 	inline size_t GetSize() const
 	{
 		return m_sourceWordsRange.GetEndPos() - m_sourceWordsRange.GetStartPos() + 1;
 	}
+	/***
+	 * return source words range
+	 */
 	inline const WordsRange &GetWordsRange() const
 	{
 		return m_sourceWordsRange;
 	}
-	inline const Phrase 	&GetPhrase() const
+	/***
+	 * return target phrase
+	 */
+	inline const Phrase& GetPhrase() const
 	{
 		return m_targetPhrase;
 	}
+  /***
+   * returns true if the source phrase translates into nothing
+   */
+	inline bool IsDeletionOption() const
+  {
+    return m_targetPhrase.GetSize() == 0;
+  }
 	inline float GetTranslationScore() const
 	{
 		return m_targetPhrase.GetTranslationScore();
@@ -86,11 +108,11 @@ public:
 	{
 		return m_transScoreComponent;
 	}
-	inline const std::list< std::pair<size_t, float> > &GetLMScoreComponent() const
+	inline const std::vector< std::pair<size_t, float> > &GetLMScoreComponent() const
 	{
 		return m_targetPhrase.GetLMScoreComponent();
 	}
-	inline const std::list< std::pair<size_t, float> > &GetTrigramComponent() const
+	inline const std::vector< std::pair<size_t, float> > &GetTrigramComponent() const
 	{
 		return m_targetPhrase.GetNgramComponent();
 	}
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index ff7107109..4c62356fa 100755
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -105,25 +105,27 @@ enum DecodeType
 namespace ScoreType {
 	enum ScoreType
 	{
-		PhraseTrans
-		,Generation
-		,LanguageModelScore
-		,Distortion
-		,WordPenalty
-		,FutureScoreEnum
-		,LexicalReordering
-		,Total
+		PhraseTrans = 0,
+		Generation,
+		LanguageModelScore,
+		Distortion,
+		WordPenalty,
+		DeletedWords, //source words dropped from translation
+		InsertedWords, //words inserted into target phrase independently of phrase translation
+		FutureScoreEnum,
+		LexicalReordering,
+		Total
 	};
-};
+}
 
 // count of above
-const size_t NUM_SCORES = 8;
+const size_t NUM_SCORES = 10;
 
 namespace LexReorderType
 {
 	enum LexReorderType
 		{
-			Monotone
+			Monotone //TODO what the jiggers do these symbols mean?
 			,Msd
 			,Forward
 			,Backward
diff --git a/moses/src/UnknownWordHandler.cpp b/moses/src/UnknownWordHandler.cpp
new file mode 100644
index 000000000..45fa03624
--- /dev/null
+++ b/moses/src/UnknownWordHandler.cpp
@@ -0,0 +1,71 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "StaticData.h"
+#include "TranslationOption.h"
+#include "UnknownWordHandler.h"
+
+/***
+ * default implementation: assume the word/phrase is a proper noun and set it as its own translation
+ */
+boost::shared_ptr<std::list<TranslationOption> > UnknownWordHandler::GetPossibleTranslations(
+	const WordsRange& sourceWordsRange, const Phrase& sourcePhrase, StaticData& staticData, PhraseDictionary& phraseDictionary) const
+{
+	TargetPhrase targetPhrase(Output, &phraseDictionary);
+	FactorArray &targetWord = targetPhrase.AddWord();
+	const FactorArray &sourceWord = sourcePhrase.GetFactorArray(0);
+	
+	//start processing source phrase: here, just copy factors to target
+	const FactorTypeSet &targetFactors = phraseDictionary.GetFactorsUsed(Output);
+	for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
+	{
+		if (targetFactors.Contains(currFactor))
+		{
+			FactorType factorType = static_cast<FactorType>(currFactor);
+
+			const Factor *factor = sourceWord[factorType], *unknownFactor;
+			switch (factorType)
+			{
+			case POS:
+				unknownFactor = staticData.GetFactorCollection().AddFactor(Output, factorType, UNKNOWN_FACTOR);
+				targetWord[factorType] = unknownFactor;
+				break;
+			default:
+				unknownFactor = staticData.GetFactorCollection().AddFactor(Output, factorType, factor->GetString());
+				targetWord[factorType] = unknownFactor;
+				break;
+			}
+		}
+	}
+	LMList languageModels = staticData.GetAllLM();
+	targetPhrase.SetScore(languageModels, staticData.GetWeightWordPenalty());
+	
+	/*
+	 * add possible translations to the phrase table
+	 * (so that if we hit this source phrase again, we won't reprocess it because it won't still be unknown)
+	 */
+	phraseDictionary.AddEquivPhrase(sourcePhrase, targetPhrase);
+	
+	//turn phrase-table entries into TranslationOption objects
+	const TargetPhraseCollection *phraseColl = phraseDictionary.FindEquivPhrase(sourcePhrase);
+	boost::shared_ptr<std::list<TranslationOption> > transOpts(new std::list<TranslationOption>);
+	for(TargetPhraseCollection::const_iterator i = phraseColl->begin(); i != phraseColl->end(); i++)
+		transOpts->push_back(TranslationOption(sourceWordsRange, *i));
+	return transOpts;
+}
diff --git a/moses/src/UnknownWordHandler.h b/moses/src/UnknownWordHandler.h
new file mode 100644
index 000000000..23ce78dc5
--- /dev/null
+++ b/moses/src/UnknownWordHandler.h
@@ -0,0 +1,52 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <list>
+#include <boost/shared_ptr.hpp>
+#include "TargetPhrase.h"
+#include "WordsRange.h"
+#include "PhraseDictionary.h"
+
+class StaticData;
+class TranslationOption;
+
+/***
+ * Provide analysis of source-language words the phrase table can't help us with. This default implementation
+ * assumes all unknown words are proper names; it's meant to be inherited. The unknown-word handler used 
+ * is set in main().
+ */
+class UnknownWordHandler
+{
+	public:
+	
+		UnknownWordHandler() {}
+		virtual ~UnknownWordHandler() {}
+	
+		/***
+		 * \param sourceWordsRange A group of consecutive source words we can't translate via the phrase table
+		 * \param sourcePhrase The source words to be translated
+		 * \param staticData
+		 * \param phraseDictionary A modifiable phrase table
+		 * \return A list of possible translations for the given source phrase
+		 */
+		virtual boost::shared_ptr<std::list<TranslationOption> > GetPossibleTranslations(
+			const WordsRange& sourceWordsRange, const Phrase& sourcePhrase, StaticData& staticData, PhraseDictionary& phraseDictionary) const;
+};
diff --git a/moses/src/WordDeletionTable.cpp b/moses/src/WordDeletionTable.cpp
new file mode 100644
index 000000000..201652d6f
--- /dev/null
+++ b/moses/src/WordDeletionTable.cpp
@@ -0,0 +1,54 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cstdlib>
+#include <iostream>
+using std::ifstream;
+#include <vector>
+using std::vector;
+#include "TypeDef.h"
+#include "StaticData.h"
+#include "WordDeletionTable.h"
+using std::string;
+
+void WordDeletionTable::Load(const string& filename, StaticData& staticData)
+{
+	std::cout << "in WordDeletionTable::Load()" << std::endl;
+	ifstream infile(filename.c_str());
+	if(!infile)
+	{
+		std::cerr << "WordDeletionTable::Load(): can't open '" << filename << "' for read; exiting" << std::endl;
+		exit(-1);
+	}
+	
+	//each line is of format PHRASE ||| DELETION_COST
+	string line;
+	while(getline(infile, line, '\n'))
+	{
+		vector<string> token = TokenizeMultiCharSeparator(line, "|||");
+		//parse phrase
+		Phrase sourcePhrase(Input);
+		const std::vector<FactorType>& input = staticData.GetInputFactorOrder();
+		sourcePhrase.CreateFromString(input, token[0], staticData.GetFactorCollection());
+		//parse cost
+		m_deletionCosts[sourcePhrase] = Scan<float>(token[1]);
+		std::cout << "dtable entry: " << sourcePhrase << " -> " << m_deletionCosts[sourcePhrase] << std::endl;
+	}
+	infile.close();
+}
diff --git a/moses/src/WordDeletionTable.h b/moses/src/WordDeletionTable.h
new file mode 100644
index 000000000..8fedf913c
--- /dev/null
+++ b/moses/src/WordDeletionTable.h
@@ -0,0 +1,55 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <stdexcept>
+#include "Phrase.h"
+
+class StaticData;
+
+class WordDeletionTable
+{
+	typedef float COST_TYPE;
+	
+	protected:
+	
+		std::map<Phrase, COST_TYPE> m_deletionCosts; //map each source-language phrase to the cost of deleting it
+		
+	public:
+	
+		/***
+		 * should only be called once for a given instance
+		 */
+		void Load(const std::string& filename, StaticData& staticData);
+		
+		/***
+		 * \throw invalid_argument if the given phrase isn't in our table
+		 */
+		COST_TYPE GetDeletionCost(const Phrase& sourcePhrase) const throw(std::invalid_argument)
+		{
+			std::cout << "WordDeletionTable::GetDeletionCost()" << std::endl;
+			std::map<Phrase, COST_TYPE>::const_iterator i = m_deletionCosts.find(sourcePhrase);
+			if(i == m_deletionCosts.end())
+				throw std::invalid_argument("WordDeletionTable::GetDeletionCost()");
+			return i->second;
+		}
+};
diff --git a/moses/src/WordInsertionTable.h b/moses/src/WordInsertionTable.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/moses/src/WordInsertionTable.h
author	eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230>	2006-07-18 22:13:45 +0400
committer	eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230>	2006-07-18 22:13:45 +0400
commit	5c5d971895c177be55cf6b43acd3c827915f2bb4 (patch)
tree	7a8813b76e5921c381b64a2c8feabe2f72e5878d /moses
parent	7beabbd9ed39a52fcfe1d6dbd0567cf1ab1b4768 (diff)