- prepared confusion net input

-> new base class InputType is used throughout the decoder instead of Sentence Sentence and ConfusionNet derive from this class -> Manager etc. do not know if the input is a sentence or a confusion net (but could check if REALLY needed) - two separate classes derived from TranslationOptionCollection: one for text input and one for confusion net input - score computation in PhraseDictionaryTree.cpp and some optimizations git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@183 1f5c12ca-751b-0410-a591-d2e778427230
author: zens <zens@1f5c12ca-751b-0410-a591-d2e778427230> 2006-07-19 04:16:29 +0400
committer: zens <zens@1f5c12ca-751b-0410-a591-d2e778427230> 2006-07-19 04:16:29 +0400
commit: 648bd1dfcdc337f15ddf88823515e0cbedf62bd4 (patch)
tree: 893e3a23ce02ef0dd1f356498fd2bb04feaa2014 /moses
parent: 5449e11bb91459dc84bb07ff26ad7143b47df420 (diff)
19 files changed, 598 insertions, 382 deletions
diff --git a/moses/src/ConfusionNet.cpp b/moses/src/ConfusionNet.cpp
index f3a9c8a66..1d8f734cc 100644
--- a/moses/src/ConfusionNet.cpp
+++ b/moses/src/ConfusionNet.cpp
@@ -5,7 +5,7 @@
 #include "FactorCollection.h"
 #include "Util.h"
 
-ConfusionNet::ConfusionNet(FactorCollection* p) : m_factorCollection(p) {}
+ConfusionNet::ConfusionNet(FactorCollection* p) : InputType(),m_factorCollection(p) {}
 
 void ConfusionNet::SetFactorCollection(FactorCollection *p) 
 {
@@ -31,7 +31,7 @@ void ConfusionNet::String2Word(const std::string& s,Word& w,const std::vector<Fa
 
 bool ConfusionNet::ReadFormat0(std::istream& in,const std::vector<FactorType>& factorOrder) {
 	assert(m_factorCollection);
-	clear();
+	Clear();
 	std::string line;
 	while(getline(in,line)) {
 		std::istringstream is(line);
@@ -55,7 +55,7 @@ bool ConfusionNet::ReadFormat0(std::istream& in,const std::vector<FactorType>& f
 }
 bool ConfusionNet::ReadFormat1(std::istream& in,const std::vector<FactorType>& factorOrder) {
 	assert(m_factorCollection);
-	clear();
+	Clear();
 	std::string line;
 	if(!getline(in,line)) return 0;
 	size_t s;
@@ -74,7 +74,6 @@ bool ConfusionNet::ReadFormat1(std::istream& in,const std::vector<FactorType>& f
 					std::cerr<<"WARN: neg costs: "<<data[i][j].second<<" -> set to 0\n";
 					data[i][j].second=0.0;}
 				String2Word(word,data[i][j].first,factorOrder);
-				//				data[i][j].first.SetFactor(Surface,m_factorCollection->AddFactor(Input,Surface,word));
 			} else return 0;
 	}
 	return !data.empty();
@@ -90,3 +89,12 @@ void ConfusionNet::Print(std::ostream& out) const {
 	}
 	out<<"\n\n";
 }
+
+Phrase ConfusionNet::GetSubString(const WordsRange&) const {
+	std::cerr<<"ERROR: call to ConfusionNet::GetSubString\n";
+	abort();
+	return Phrase();}
+const Factor* ConfusionNet::GetFactor(size_t pos, FactorType factorType) const {
+	std::cerr<<"ERROR: call to ConfusionNet::GetFactor\n";
+	abort();
+	return 0;}
diff --git a/moses/src/ConfusionNet.h b/moses/src/ConfusionNet.h
index ec3ff0f65..7665892ff 100644
--- a/moses/src/ConfusionNet.h
+++ b/moses/src/ConfusionNet.h
@@ -4,9 +4,11 @@
 #include <vector>
 #include <iostream>
 #include "Word.h"
+#include "Input.h"
+
 class FactorCollection;
 
-class ConfusionNet {
+class ConfusionNet : public InputType {
  public: 
 	typedef std::vector<std::pair<Word,float> > Column;
 
@@ -21,13 +23,19 @@ class ConfusionNet {
 	const Column& GetColumn(size_t i) const {assert(i<data.size());return data[i];}
 	const Column& operator[](size_t i) const {return GetColumn(i);}
 
-	bool empty() const {return data.empty();}
-	size_t size() const {return data.size();}
-	void clear() {data.clear();}
+	bool Empty() const {return data.empty();}
+	size_t GetSize() const {return data.size();}
+	void Clear() {data.clear();}
 
 	bool Read(std::istream&,const std::vector<FactorType>& factorOrder,int format=0);
 	void Print(std::ostream&) const;
 
+
+	
+	Phrase GetSubString(const WordsRange&) const;
+	const Factor* GetFactor(size_t pos, FactorType factorType) const;
+
+
  private:
 	bool ReadFormat0(std::istream&,const std::vector<FactorType>& factorOrder);
 	bool ReadFormat1(std::istream&,const std::vector<FactorType>& factorOrder);
diff --git a/moses/src/Dictionary.h b/moses/src/Dictionary.h
index 1b77359cb..a41f6d349 100644
--- a/moses/src/Dictionary.h
+++ b/moses/src/Dictionary.h
@@ -22,8 +22,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #pragma once
 
 #include <vector>
+#include "FactorTypeSet.h"
 
-class FactorTypeSet;
+//class FactorTypeSet;
 
 class Dictionary
 {
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
index fd4327d92..66cb6ad14 100755
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "Arc.h"
 #include "SquareMatrix.h"
 #include "StaticData.h"
+#include "Input.h"
 //TODO: add this include in when it compiles
 //#include "LexicalReordering.h"
 
@@ -37,7 +38,7 @@ using namespace std;
 
 int Hypothesis::s_numNodes = 0;
 
-Hypothesis::Hypothesis(const Phrase &phrase, const WordsBitmap &initialCoverage)
+Hypothesis::Hypothesis(const WordsBitmap &initialCoverage)
 	: LatticeEdge(Output, NULL)
 	, m_sourceCompleted(initialCoverage)
 	, m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)
@@ -159,10 +160,10 @@ Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOpti
 /***
  * return the subclass of Hypothesis most appropriate to the given target phrase
  */
-Hypothesis* Hypothesis::Create(const Phrase& targetPhrase, const WordsBitmap &initialCoverage)
+Hypothesis* Hypothesis::Create(const WordsBitmap &initialCoverage)
 {
 	/*if(s_wordDeletionEnabled && targetPhrase.GetSize() == 0) return new DeletionHypothesis(initialCoverage);
-	else*/ return new Hypothesis(targetPhrase, initialCoverage);
+	else*/ return new Hypothesis(initialCoverage);
 }
 
 /***
@@ -462,7 +463,7 @@ void Hypothesis::CalcDistortionScore()
 /***
  * calculate the logarithm of our total translation score (sum up components)
  */
-void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &futureScore, const Sentence &source) 
+void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &futureScore) 
 {
 	// DISTORTION COST
 	CalcDistortionScore();
@@ -528,7 +529,7 @@ const Hypothesis* Hypothesis::GetPrevHypo()const{
 /**
  * print hypothesis information for pharaoh-style logging
  */
-void Hypothesis::PrintHypothesis(const Sentence &source, float weightDistortion, float weightWordPenalty) const{
+void Hypothesis::PrintHypothesis(const InputType &source, float weightDistortion, float weightWordPenalty) const{
 	int start = m_prevHypo->m_currSourceWordsRange.GetEndPos() -1;
 	int end = m_prevHypo->m_currSourceWordsRange.GetEndPos();
 	cout<<"creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ... ";
diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h
index 21dcaa777..1b943a26d 100755
--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@@ -38,7 +38,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 class SquareMatrix;
 class StaticData;
 class TranslationOption;
-class Sentence;
+class InputType;
 class WordsRange;
 class WordDeletionTable;
 
@@ -59,7 +59,7 @@ protected:
 	/***
 	 * Used for initializing translation process
 	 */
-	Hypothesis(const Phrase &phrase, const WordsBitmap &initialCoverage);
+	Hypothesis(const WordsBitmap &initialCoverage);
 	// create next
 	Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
 
@@ -94,7 +94,7 @@ public:
 	/***
 	 * return the subclass of Hypothesis most appropriate to the given target phrase
 	 */
-	static Hypothesis* Create(const Phrase& targetPhrase, const WordsBitmap &initialCoverage);
+	static Hypothesis* Create(const WordsBitmap &initialCoverage);
 
 	~Hypothesis();
 	
@@ -111,7 +111,7 @@ public:
 	 */
 	Hypothesis* MergeNext(const TranslationOption &transOpt) const;
 	
-	virtual void PrintHypothesis(  const Sentence &source, float weightDistortion, float weightWordPenalty) const;
+	virtual void PrintHypothesis(  const InputType &source, float weightDistortion, float weightWordPenalty) const;
  // void PrintLMScores(const LMList &lmListInitial, const LMList	&lmListEnd) const;
 	inline const WordsRange &GetCurrSourceWordsRange() const
 	{
@@ -124,7 +124,7 @@ public:
 		return m_currTargetWordsRange.GetWordsCount();
 	}
 
-	virtual void CalcScore(const StaticData& staticData, const SquareMatrix &futureScore, const Sentence &source);
+	virtual void CalcScore(const StaticData& staticData, const SquareMatrix &futureScore);
 
 	int GetId() const;
 
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index 94f4e189c..c8a319b54 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -2,6 +2,7 @@ lib_LIBRARIES = libmoses.a
 libmoses_a_SOURCES = \
 	Arc.cpp \
 	ConfusionNet.cpp \
+	CreateTargetPhraseCollection.cpp \
 	Factor.cpp \
 	FactorCollection.cpp \
 	FactorTypeSet.cpp \
@@ -9,6 +10,7 @@ libmoses_a_SOURCES = \
 	Hypothesis.cpp \
 	HypothesisCollection.cpp \
 	HypothesisCollectionIntermediate.cpp \
+	Input.cpp \
 	InputFileStream.cpp \
 	LanguageModel.cpp \
 	LatticeEdge.cpp \
@@ -26,12 +28,13 @@ libmoses_a_SOURCES = \
 	TargetPhrase.cpp \
 	TranslationOption.cpp \
 	TranslationOptionCollection.cpp \
-	UnknownWordHandler.cpp \
- 	UserMessage.cpp \
- 	Util.cpp \
- 	WeightOptimization.cpp \
- 	Word.cpp \
- 	WordsBitmap.cpp \
+	TranslationOptionCollectionText.cpp \
+	TranslationOptionCollectionConfusionNet.cpp \
+	UserMessage.cpp \
+	Util.cpp \
+	WeightOptimization.cpp \
+	Word.cpp \
+	WordsBitmap.cpp \
 	WordsRange.cpp
 
 if INTERNAL_LM
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
index dc0e55307..907969362 100755
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@@ -32,11 +32,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 using namespace std;
 
-Manager::Manager(const Sentence &sentence, StaticData &staticData)
-:m_source(sentence)
-,m_hypoStack(sentence.GetSize() + 1)
+Manager::Manager(InputType const& source, 
+								 TranslationOptionCollection& toc,
+								 StaticData &staticData)
+:m_source(source)
+,m_hypoStack(source.GetSize() + 1)
 ,m_staticData(staticData)
-,m_possibleTranslations(sentence)
+,m_possibleTranslations(toc)  //dynamic_cast<Sentence const&>(source))
 {
 	std::vector < HypothesisCollection >::iterator iterStack;
 	for (iterStack = m_hypoStack.begin() ; iterStack != m_hypoStack.end() ; ++iterStack)
@@ -77,13 +79,13 @@ void Manager::ProcessSentence()
 
 	// seed hypothesis
 	{
-	Hypothesis *hypo = Hypothesis::Create(m_source, m_possibleTranslations.GetInitialCoverage());
-	TRACE_ERR(m_possibleTranslations.GetInitialCoverage().GetWordsCount() << endl);
+		Hypothesis *hypo = Hypothesis::Create(m_possibleTranslations.GetInitialCoverage());
+		TRACE_ERR(m_possibleTranslations.GetInitialCoverage().GetWordsCount() << endl);
 #ifdef N_BEST
-	LMList allLM = m_staticData.GetAllLM();
-	hypo->ResizeComponentScore(allLM, decodeStepList);
+		LMList allLM = m_staticData.GetAllLM();
+		hypo->ResizeComponentScore(allLM, decodeStepList);
 #endif
-	m_hypoStack[m_possibleTranslations.GetInitialCoverage().GetWordsCount()].AddPrune(hypo);
+		m_hypoStack[m_possibleTranslations.GetInitialCoverage().GetWordsCount()].AddPrune(hypo);
 	}
 	
 	// go thru each stack
@@ -176,7 +178,7 @@ void Manager::ProcessOneHypothesis(const list < DecodeStep > &decodeStepList, co
 	{
 		Hypothesis *hypo = *iterHypo;
 
-		hypo->CalcScore(m_staticData, m_possibleTranslations.GetFutureScore(), m_source);
+		hypo->CalcScore(m_staticData, m_possibleTranslations.GetFutureScore());
 		if(m_staticData.GetVerboseLevel() > 2) 
 		{			
 			hypo->PrintHypothesis(m_source, m_staticData.GetWeightDistortion(), m_staticData.GetWeightWordPenalty());
@@ -295,9 +297,8 @@ void Manager::ProcessTranslation(const Hypothesis &hypothesis, const DecodeStep
 
 	// actual implementation
 	const WordsRange &sourceWordsRange				= hypothesis.GetCurrSourceWordsRange();
-	const Phrase sourcePhrase 								= m_source.GetSubString(sourceWordsRange);
 	const PhraseDictionary &phraseDictionary	= decodeStep.GetPhraseDictionary();
-	const TargetPhraseCollection *phraseColl	=	phraseDictionary.FindEquivPhrase(sourcePhrase);
+	const TargetPhraseCollection *phraseColl	=	CreateTargetPhraseCollection(&phraseDictionary,&m_source,sourceWordsRange); 
 
 	if (phraseColl != NULL)
 	{
@@ -332,7 +333,7 @@ void Manager::ProcessTranslation(const Hypothesis &hypothesis, const DecodeStep
 
 				if (targetFactor == NULL)
 				{
-					const Factor *sourceFactor = sourcePhrase.GetFactor(0, factorType)
+					const Factor *sourceFactor = m_source.GetFactor(sourceWordsRange.GetStartPos(), factorType)
 											,*unkownfactor;
 					switch (factorType)
 					{
@@ -354,6 +355,7 @@ void Manager::ProcessTranslation(const Hypothesis &hypothesis, const DecodeStep
 
 }
 
+#if 0
 /***
  * Add to m_possibleTranslations all possible translations the phrase table gives us for
  * the given phrase
@@ -460,6 +462,7 @@ void Manager::CreateTranslationOptions(const Phrase &phrase, PhraseDictionary &p
 		}
 	}
 }
+#endif
 
 // helpers
 typedef pair<Word, float> WordPair;
diff --git a/moses/src/Manager.h b/moses/src/Manager.h
index da540974d..b49c22604 100755
--- a/moses/src/Manager.h
+++ b/moses/src/Manager.h
@@ -23,13 +23,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include <vector>
 #include <list>
-#include "Sentence.h"
+#include "Input.h"
 #include "Hypothesis.h"
 #include "StaticData.h"
 #include "TranslationOption.h"
 #include "HypothesisCollection.h"
 #include "HypothesisCollectionIntermediate.h"
-#include "TranslationOptionCollection.h"
+#include "TranslationOptionCollectionText.h"
 #include "LatticePathList.h"
 #include "SquareMatrix.h"
 #include "WordsBitmap.h"
@@ -41,11 +41,12 @@ class Manager
 {
 protected:	
 	// data
-	Sentence m_source;
+	InputType const& m_source;
+
 	std::vector < HypothesisCollection > m_hypoStack;
 		// no of elements = no of words in source + 1
 	StaticData &m_staticData;
-	TranslationOptionCollection m_possibleTranslations;
+	TranslationOptionCollection& m_possibleTranslations;
 
 	// functions
 	void ProcessOneStack(const std::list < DecodeStep > &decodeStepList
@@ -61,13 +62,15 @@ protected:
 	void ProcessGeneration(const Hypothesis &hypothesis
 													, const DecodeStep &decodeStep
 													, HypothesisCollectionIntermediate &outputHypoColl);
+#if 0
 	void CreateTranslationOptions(const Phrase &phrase
 													, PhraseDictionary &phraseDictionary
 													, const LMList &lmListInitial);
+#endif
 	void OutputHypoStack(int stack = -1);
 	void OutputHypoStackSize();
 public:
-	Manager(const Sentence &sentence, StaticData &staticData);
+	Manager(InputType const& source, TranslationOptionCollection&, StaticData &staticData);
 	~Manager();
 
 	void ProcessSentence();
diff --git a/moses/src/PhraseDictionary.h b/moses/src/PhraseDictionary.h
index d899fd2e3..d8c374279 100755
--- a/moses/src/PhraseDictionary.h
+++ b/moses/src/PhraseDictionary.h
@@ -29,8 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "Phrase.h"
 #include "TargetPhrase.h"
 #include "Dictionary.h"
-
-typedef std::list <TargetPhrase> TargetPhraseCollection;
+#include "CreateTargetPhraseCollection.h"
 
 class StaticData;
 
diff --git a/moses/src/PhraseDictionaryTree.cpp b/moses/src/PhraseDictionaryTree.cpp
index 00cea5399..c45b8b76d 100644
--- a/moses/src/PhraseDictionaryTree.cpp
+++ b/moses/src/PhraseDictionaryTree.cpp
@@ -47,12 +47,11 @@ public:
     std::pair<typename M::iterator,bool> p
 			=m.insert(std::make_pair(k,data.size()));
     if(p.second) data.push_back(k);
-    assert(p.first->second>=0);
 		assert(static_cast<size_t>(p.first->second)<data.size());
     return p.first->second;
   }
   const Key& symbol(LabelId i) const {
-    assert(i>=0);assert(static_cast<size_t>(i)<data.size());
+    assert(static_cast<size_t>(i)<data.size());
     return data[i];}
 
   typedef typename V::const_iterator const_iterator;
@@ -145,7 +144,7 @@ struct PDTimp {
   ObjectPool<PPimp> pPool;
 
 	PDTimp() : os(0),ot(0),m_factorCollection(0) {PTF::setDefault(InvalidOffT);}
-	~PDTimp() {if(os) fClose(os);if(ot) fClose(ot);}
+	~PDTimp() {if(os) fClose(os);if(ot) fClose(ot);FreeMemory();}
 
 	void FreeMemory() 
 	{
@@ -153,21 +152,15 @@ struct PDTimp {
 		pPool.reset();
 	}
 
-	int Read(const std::string& fn) ;
-
-	off_t FindOffT(const IPhrase& f) const 
-	{
-  	if(f.empty()) return InvalidOffT;
-  	if(f[0]>=data.size()) return InvalidOffT;
-  	if(data[f[0]]) return data[f[0]]->find(f); else return InvalidOffT;
-	}
+	int Read(const std::string& fn);
 	
 	void GetTargetCandidates(const IPhrase& f,TgtCands& tgtCands) 
 	{
-		off_t tCandOffset=FindOffT(f);
-		//		std::cerr<<"offset of tgtcand: "<<tCandOffset<<" "<<InvalidOffT<<" for phrase '"<<f<<"'\n";
-		if(tCandOffset==InvalidOffT) return;
-  	fSeek(ot,tCandOffset);
+		if(f.empty()) return;
+  	if(f[0]>=data.size()) return;
+  	if(!data[f[0]]) return;
+		assert(data[f[0]]->find(f)!=InvalidOffT);
+  	fSeek(ot,data[f[0]]->find(f));
    	tgtCands.readBin(ot);
 	}
 
@@ -183,6 +176,8 @@ struct PDTimp {
    	tgtCands.readBin(ot);
 	}
 	void PrintTgtCand(const TgtCands& tcands,std::ostream& out) const;
+
+	// convert target candidates from internal data structure to the external one
 	void ConvertTgtCand(const TgtCands& tcands,std::vector<FactorTgtCand>& rv,FactorType oft) const
 	{
 		for(TgtCands::const_iterator i=tcands.begin();i!=tcands.end();++i)
@@ -259,12 +254,12 @@ int PDTimp::Read(const std::string& fn)
 
 void PDTimp::PrintTgtCand(const TgtCands& tcand,std::ostream& out) const
 {
-		for(size_t i=0;i<tcand.size();++i) 
+	for(size_t i=0;i<tcand.size();++i) 
 		{
 		  out<<i<<" -- "<<tcand[i].GetScores()<<" -- ";
 		  const IPhrase& iphr=tcand[i].GetPhrase();
 		  for(size_t j=0;j<iphr.size();++j)
-			out<<tv.symbol(iphr[j])<<" ";
+				out<<tv.symbol(iphr[j])<<" ";
 		  out<<'\n';		
 		}
 }
@@ -455,7 +450,7 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
 		}
   
   FILE *oi=fOpen(ofi.c_str(),"wb");
-  size_t vob=fWriteVector(oi,vo);
+  fWriteVector(oi,vo);
 	fClose(oi);
 
 	imp->sv.Write(ofsv);
@@ -508,33 +503,36 @@ PhraseDictionaryTree::GetTargetCandidates(PrefixPtr p,
 //
 ////////////////////////////////////////////////////////////
 
+
+#include <numeric>
 #include "Word.h"
 #include "Phrase.h"
 #include "ConfusionNet.h"
+#include "WordsRange.h"
 
 // Generates all tuples from  n indexes with ranges 0 to card[j]-1, respectively..
 // Input: number of indexes and  ranges: ranges[0] ... ranges[num_idx-1] 
 // Output: number of tuples and monodimensional array of tuples.
 // Reference: mixed-radix generation algorithm (D. E. Knuth, TAOCP v. 4.2)
 
-size_t GenerateTuples(int num_idx,int* ranges,int *&tuples)
+size_t GenerateTuples(unsigned num_idx,unsigned* ranges,unsigned *&tuples)
 {
-  int* single_tuple=(int *) new int[num_idx+1];
-  int num_tuples=1;
+  unsigned* single_tuple= new unsigned[num_idx+1];
+  unsigned num_tuples=1;
 
-  for (int k=0;k<num_idx;++k)
+  for (unsigned k=0;k<num_idx;++k)
     {
       num_tuples *= ranges[k];
       single_tuple[k]=0;
     }
 
-  tuples=new int[num_idx * num_tuples];
+  tuples=new unsigned[num_idx * num_tuples];
 
   // we need this additional element for the last iteration
   single_tuple[num_idx]=0; 
-  int j=0;
-  for (int n=0;n<num_tuples;++n){
-    memcpy((void *)((tuples + n * num_idx)),(void *)single_tuple,num_idx * sizeof(int));
+  unsigned j=0;
+  for (unsigned n=0;n<num_tuples;++n){
+    memcpy((void *)((tuples + n * num_idx)),(void *)single_tuple,num_idx * sizeof(unsigned));
     j=0;
     while (single_tuple[j]==ranges[j]-1){single_tuple[j]=0; ++j;}
     ++single_tuple[j];
@@ -546,7 +544,6 @@ size_t GenerateTuples(int num_idx,int* ranges,int *&tuples)
 
 typedef PhraseDictionaryTree::PrefixPtr PPtr;
 typedef std::vector<PPtr> vPPtr;
-typedef std::pair<size_t,size_t> Range;
 typedef std::vector<std::vector<Factor const*> > mPhrase;
 
 std::ostream& operator<<(std::ostream& out,const mPhrase& p) {
@@ -562,14 +559,14 @@ std::ostream& operator<<(std::ostream& out,const mPhrase& p) {
 
 struct State {
 	vPPtr ptrs;
-	Range range;
+	WordsRange range;
 	float score;
 
 	State() : range(0,0),score(0.0) {}
 	State(size_t b,size_t e,const vPPtr& v,float sc=0.0) : ptrs(v),range(b,e),score(sc) {}
 	
-	size_t begin() const {return range.first;}
-	size_t end() const {return range.second;}
+	size_t begin() const {return range.GetStartPos();}
+	size_t end() const {return range.GetEndPos();}
 	float GetScore() const {return score;}
 
 };
@@ -580,129 +577,199 @@ std::ostream& operator<<(std::ostream& out,const State& s) {
 	return out;
 }
 
+typedef std::map<mPhrase,float> E2Costs;
 
-void GenerateCandidates(const ConfusionNet& src,
-												std::vector<PhraseDictionaryTree const*>& pdicts) {
 
-	vPPtr root(pdicts.size());
-	std::vector<FactorType> inF(pdicts.size()),outF(pdicts.size());
-	for(size_t i=0;i<pdicts.size();++i) 
-	{
-		root[i]=pdicts[i]->GetRoot();
-		inF[i]=pdicts[i]->GetInputFactorType();
-		outF[i]=pdicts[i]->GetOutputFactorType();
+struct GCData {
+	const std::vector<PhraseDictionaryTree const*>& pdicts;
+	const std::vector<std::vector<float> >& weights;
+	std::vector<FactorType> inF,outF;
+	size_t distinctOutputFactors;
+	vPPtr root;
+	size_t totalTuples,distinctTuples;
+
+
+	GCData(const std::vector<PhraseDictionaryTree const*>& a,
+				 const std::vector<std::vector<float> >& b) 
+		: pdicts(a),weights(b),totalTuples(0),distinctTuples(0) {
+
+		assert(pdicts.size()==weights.size());
+		std::set<FactorType> distinctOutFset;
+		inF.resize(pdicts.size());
+		outF.resize(pdicts.size());
+		root.resize(pdicts.size());
+		for(size_t i=0;i<pdicts.size();++i) 
+			{
+				root[i]=pdicts[i]->GetRoot();
+				inF[i]=pdicts[i]->GetInputFactorType();
+				outF[i]=pdicts[i]->GetOutputFactorType();
+				distinctOutFset.insert(pdicts[i]->GetOutputFactorType());
+			}
+		distinctOutputFactors=distinctOutFset.size();
+	}
+
+	FactorType OutFT(size_t i) const {return outF[i];}
+	FactorType InFT(size_t i) const {return inF[i];}
+	size_t DistinctOutFactors() const {return distinctOutputFactors;}
+
+	const vPPtr& GetRoot() const {return root;}
+
+};
+
+typedef std::vector<Factor const*> vFactor;
+typedef std::vector<std::pair<float,vFactor> > TgtCandList;
+
+typedef std::vector<TgtCandList> OutputFactor2TgtCandList;
+typedef std::vector<OutputFactor2TgtCandList*> Len2Cands;
+
+void GeneratePerFactorTgtList(size_t factorType,PPtr pptr,GCData& data,Len2Cands& len2cands)
+{
+	std::vector<FactorTgtCand> cands;
+	data.pdicts[factorType]->GetTargetCandidates(pptr,cands);
+
+	for(std::vector<FactorTgtCand>::const_iterator cand=cands.begin();cand!=cands.end();++cand) {
+		assert(data.weights[factorType].size()==cand->second.size());
+		float costs=std::inner_product(data.weights[factorType].begin(),
+																	 data.weights[factorType].end(),
+																	 cand->second.begin(),
+																	 0.0);
+
+		size_t len=cand->first.size();
+		if(len>=len2cands.size()) len2cands.resize(len+1,0);
+		if(!len2cands[len]) len2cands[len]=new OutputFactor2TgtCandList(data.DistinctOutFactors());
+		OutputFactor2TgtCandList &outf2tcandlist=*len2cands[len];
+
+		outf2tcandlist[data.OutFT(factorType)].push_back(std::make_pair(costs,cand->first));
+	}
+}
+
+void GenerateTupleTgtCands(OutputFactor2TgtCandList& tCand,E2Costs& e2costs,GCData& data) 
+{
+	// check if candidates are non-empty
+	bool gotCands=1;
+	for(size_t j=0;gotCands && j<tCand.size();++j)
+		gotCands &= !tCand[j].empty();
+				
+	if(gotCands) {
+		// enumerate tuples
+		assert(data.DistinctOutFactors()==tCand.size());
+		std::vector<unsigned> radix(data.DistinctOutFactors());
+		for(size_t i=0;i<tCand.size();++i) radix[i]=tCand[i].size();
+
+		unsigned *tuples=0;
+		size_t numTuples=GenerateTuples(radix.size(),&radix[0],tuples);
+
+		data.totalTuples+=numTuples;
+
+		for(size_t i=0;i<numTuples;++i)
+			{
+				mPhrase e(radix.size());float costs=0.0;
+				for(size_t j=0;j<radix.size();++j)
+					{
+						assert(tuples[radix.size()*i+j]<tCand[j].size());
+						std::pair<float,vFactor> const& mycand=tCand[j][tuples[radix.size()*i+j]];
+						e[j]=mycand.second;
+						costs+=mycand.first;
+					}
+#ifdef DEBUG
+				bool mismatch=0;
+				for(size_t j=1;!mismatch && j<e.size();++j)
+					if(e[j].size()!=e[j-1].size()) mismatch=1;
+				assert(mismatch==0);
+#endif
+				std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(e,costs));
+				if(p.second) ++data.distinctTuples;
+				else {
+					// entry known, take min of costs, alternative: sum probs
+					if(costs<p.first->second) p.first->second=costs;
+				}
+			}
+		delete [] tuples;
 	}
+}
+
+void GenerateCandidates_(E2Costs& e2costs,const vPPtr& nextP,GCData& data) 
+{
+	Len2Cands len2cands;
+	// generate candidates for each element of nextP:
+	for(size_t factorType=0;factorType<nextP.size();++factorType) 
+		if(nextP[factorType]) 
+			GeneratePerFactorTgtList(factorType,nextP[factorType],data,len2cands);
+
+	// for each length: enumerate tuples, compute score, and insert in e2costs
+	for(size_t len=0;len<len2cands.size();++len) if(len2cands[len]) 
+		GenerateTupleTgtCands(*len2cands[len],e2costs,data);
+}
+
+void GenerateCandidates(const ConfusionNet& src,
+												const std::vector<PhraseDictionaryTree const*>& pdicts,
+												const std::vector<std::vector<float> >& weights) {
+	GCData data(pdicts,weights);
 
 	std::vector<State> stack;
-	for(size_t i=0;i<src.size();++i) stack.push_back(State(i,i,root));
-	
-	size_t totalTuples=0,distinctTuples=0,lengthMismatch=0;
+	for(size_t i=0;i<src.GetSize();++i) stack.push_back(State(i,i,data.GetRoot()));
 
-	std::map<Range,std::set<mPhrase> > cov2E;
+	std::map<WordsRange,E2Costs> cov2E;
 
-	std::cerr<<"start while loop. initial stack size: "<<stack.size()<<"\n";
+	//	std::cerr<<"start while loop. initial stack size: "<<stack.size()<<"\n";
 
 	while(!stack.empty()) 
 	{
 		State curr(stack.back());
 		stack.pop_back();
 		
-		std::cerr<<"processing state "<<curr<<" stack size: "<<stack.size()<<"\n";
+		//std::cerr<<"processing state "<<curr<<" stack size: "<<stack.size()<<"\n";
 
-		assert(curr.end()<src.size());
+		assert(curr.end()<src.GetSize());
 		const ConfusionNet::Column &currCol=src[curr.end()];
-		for(size_t i=0;i<currCol.size();++i) 
+		for(size_t colidx=0;colidx<currCol.size();++colidx) 
 		{
-			const Word& w=currCol[i].first;
+			const Word& w=currCol[colidx].first;
 			vPPtr nextP(curr.ptrs);
 			for(size_t j=0;j<nextP.size();++j)
-				nextP[j]=pdicts[j]->Extend(nextP[j],w.GetFactor(inF[j])->GetString());
+				nextP[j]=pdicts[j]->Extend(nextP[j],
+																	 w.GetFactor(data.InFT(j))->GetString());
 	
 			bool valid=1;
-			for(size_t j=0;valid && j<nextP.size();++j)
-				if(!nextP[j]) valid=0;
-			//				valid &= (nextP[j] ? 1 : 0);
-			
+			for(size_t j=0;j<nextP.size();++j) if(!nextP[j]) {valid=0;break;}
+
 			if(valid) 
 			{
-				if(curr.end()+1<src.size())
+				if(curr.end()+1<src.GetSize())
 					stack.push_back(State(curr.begin(),curr.end()+1,nextP,
-																curr.GetScore()+currCol[i].second));
-
-				
-				std::vector<std::vector<FactorTgtCand>* > tCand;
+																curr.GetScore()+currCol[colidx].second));
 
-				// generate candidates for each element of nextP:
-				for(size_t j=0;j<nextP.size();++j) if(nextP[j]) 
-				{
-					if(outF[j]>=tCand.size()) tCand.resize(outF[j]+1,0);
-					if(!tCand[outF[j]]) tCand[outF[j]]=new std::vector<FactorTgtCand>;
-					pdicts[j]->GetTargetCandidates(nextP[j],*(tCand[outF[j]]));
-				}
-				
-				// check if candidates are non-empty
-				bool gotCands=1;
-				for(size_t j=0;gotCands && j<tCand.size();++j)
-					gotCands &= tCand[j] && !tCand[j]->empty();
-				
-				if(gotCands) {
-					// enumerate tuples
-
-
-					std::vector<int> radix(tCand.size());
-					for(size_t i=0;i<tCand.size();++i) radix[i]=tCand[i]->size();
-
-					int *tuples;
-					size_t numTuples=GenerateTuples(radix.size(),&radix[0],tuples);
-
-					totalTuples+=numTuples;
-
-					for(size_t i=0;i<numTuples;++i)
-						{
-							mPhrase e(radix.size());
-							for(size_t j=0;j<radix.size();++j)
-								{
-									assert(tCand[j]); // should be superfluous, but ...
-									assert(tuples[radix.size()*i+j]<tCand[j]->size());
-									e[j]=(*tCand[j])[tuples[radix.size()*i+j]].first;
-								}
-
-							bool mismatch=0;
-							for(size_t j=1;!mismatch && j<e.size();++j)
-								if(e[j].size()!=e[j-1].size()) mismatch=1;
-
-							if(mismatch) ++lengthMismatch;
-							else if(cov2E[Range(curr.begin(),curr.end()+1)].insert(e).second) ++distinctTuples;
-						}
-
-
-					delete [] tuples;
-				}
-					
+				E2Costs &e2costs=cov2E[WordsRange(curr.begin(),curr.end()+1)];
+				GenerateCandidates_(e2costs,nextP,data);
 			}
 		}
 			
+		// check if there are translations of one-word phrases ...
 		//if(curr.begin()==curr.end() && tCand.empty()) {}		
-	}
 
-	std::cerr<<"tuple stats:  total: "<<totalTuples
-					 <<" distinct: "<<distinctTuples<<" ("<<(distinctTuples/(0.01*totalTuples))
-					 <<"%) lengthMismatch: "<<lengthMismatch<<" ("<<(lengthMismatch/(0.01*totalTuples))<<"%)\n";
+	} // end while(!stack.empty()) 
+
+	// print statistics for debugging purposes
+	std::cerr<<"tuple stats:  total: "<<data.totalTuples
+					 <<" distinct: "<<data.distinctTuples<<" ("
+					 <<(data.distinctTuples/(0.01*data.totalTuples))
+					 <<"%)\n";
 	std::cerr<<"per coverage set:\n";
-	for(std::map<Range,std::set<mPhrase> >::const_iterator i=cov2E.begin();i!=cov2E.end();++i) {
-		std::cerr<<i->first.first<<","<<i->first.second<<" -- distinct cands: "<<i->second.size()<<"\n";
+	for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
+			i!=cov2E.end();++i) {
+		std::cerr<<i->first<<" -- distinct cands: "
+						 <<i->second.size()<<"\n";
 	}
 	std::cerr<<"\n\n";
 
 	std::cerr<<"full list:\n";
-	for(std::map<Range,std::set<mPhrase> >::const_iterator i=cov2E.begin();i!=cov2E.end();++i) {
-		std::cerr<<i->first.first<<","<<i->first.second<<" -- distinct cands: "<<i->second.size()<<"\n";
-		for(std::set<mPhrase>::const_iterator j=i->second.begin();j!=i->second.end();++j)
-			std::cerr<<*j<<"\n";
+	for(std::map<WordsRange,E2Costs>::const_iterator i=cov2E.begin();
+			i!=cov2E.end();++i) {
+		std::cerr<<i->first<<" -- distinct cands: "
+						 <<i->second.size()<<"\n";
+		for(E2Costs::const_iterator j=i->second.begin();j!=i->second.end();++j)
+			std::cerr<<j->first<<" -- "<<j->second<<"\n";
 	}
-
-
-
 }
 
 
diff --git a/moses/src/PhraseDictionaryTree.h b/moses/src/PhraseDictionaryTree.h
index fe85eec2c..b2728af7b 100644
--- a/moses/src/PhraseDictionaryTree.h
+++ b/moses/src/PhraseDictionaryTree.h
@@ -22,15 +22,6 @@ class PhraseDictionaryTree : public Dictionary {
 	PDTimp *imp; //implementation
 	FactorType m_inFactorType,m_outFactorType;
 public:
-
-	class PrefixPtr {
-		PPimp* imp;
-		friend class PDTimp;
-	public:
-		PrefixPtr(PPimp* x=0) : imp(x) {}
-		operator bool() const;
-	};
-
 	PhraseDictionaryTree(size_t noScoreComponent,
 											 FactorCollection* factorCollection=0,
 											 FactorType inputFactorType=Surface,
@@ -38,44 +29,73 @@ public:
 
 	virtual ~PhraseDictionaryTree();
 
-	DecodeType GetDecodeType() const
-	{
-		return Translate;
-	}
-	size_t GetSize() const
-	{
-		return 0;
-	}
-
+	DecodeType GetDecodeType() const {return Translate;}
+	size_t GetSize() const {return 0;}
 	FactorType GetInputFactorType() const {return m_inFactorType;}
 	FactorType GetOutputFactorType() const {return m_outFactorType;}
 	
 	// convert from ascii phrase table format 
-	int Create(std::istream& In,const std::string& OutFileNamePrefix);
-	int Read(const std::string& FileNamePrefix); 
+	// note: only creates table, does not keep it in memory
+	//        -> use Read(outFileNamePrefix);
+	int Create(std::istream& in,const std::string& outFileNamePrefix);
+
+	int Read(const std::string& fileNamePrefix); 
 
-	// free memory used by the prefix tree
+	// free memory used by the prefix tree etc.
 	void FreeMemory() const;
 
-	// access with full src phrase
+
+	/**************************************
+	 *   access with full source phrase   *
+	 **************************************/
+	// get the target candidates for a given factor sequence/phrase
 	void GetTargetCandidates(const std::vector<const Factor*>& src,
 													 std::vector<FactorTgtCand>& rv) const;
+
+	// print target candidates for a given phrase, mainly for debugging
 	void PrintTargetCandidates(const std::vector<std::string>& src,
 														 std::ostream& out) const;
 
-	// access to prefix tree
+
+
+	/*****************************
+	 *   access to prefix tree   *
+	 *****************************/
+
+	// 'pointer' into prefix tree
+	// the only permitted direct operation is a check for NULL,
+	// e.g. PrefixPtr p; if(p) ...
+	// other usage only through PhraseDictionaryTree-functions below
+
+	class PrefixPtr {
+		PPimp* imp;
+		friend class PDTimp;
+	public:
+		PrefixPtr(PPimp* x=0) : imp(x) {}
+		operator bool() const;
+	};
+
+	// return pointer to root node
 	PrefixPtr GetRoot() const;
-	PrefixPtr Extend(PrefixPtr,const std::string&) const;
+	// extend pointer with a word/Factorstring and return the resulting successor
+	// pointer. If there is no such successor node, the result will evaluate to 
+	// false. Requirement: the input pointer p evaluates to true.
+	PrefixPtr Extend(PrefixPtr p,const std::string& s) const;
 
+	// get the target candidates for a given prefix pointer
+	// requirement: the pointer has to evaluate to true
 	void GetTargetCandidates(PrefixPtr p,
 													 std::vector<FactorTgtCand>& rv) const;
-	void PrintTargetCandidates(PrefixPtr p,std::ostream& out) const;
 
+	// print target candidates for a given prefix pointer to a stream, mainly 
+	// for debugging
+	void PrintTargetCandidates(PrefixPtr p,std::ostream& out) const;
 };
 
 
 void GenerateCandidates(const ConfusionNet& src,
-												std::vector<PhraseDictionaryTree const*>& pdicts) ;
+												const std::vector<PhraseDictionaryTree const*>& pdicts,
+												const std::vector<std::vector<float> >& weights) ;
 
 
 #endif /*PHRASEDICTIONARYTREE_H_*/
diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h
index 18d4916b0..f0a68af20 100755
--- a/moses/src/Sentence.h
+++ b/moses/src/Sentence.h
@@ -25,32 +25,38 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <string>
 #include "Word.h"
 #include "Phrase.h"
+#include "Input.h"
+
+class WordsRangs;
 
 /***
  * a Sentence is a Phrase with an ID
  */
-class Sentence : public Phrase
+class Sentence : public Phrase, public InputType
 {
-protected:
-	long m_translationId;
-
+ protected:
 	Sentence()
-	{
-	}
-public:
-	Sentence(FactorDirection direction)
-		:Phrase(direction)
-	{
-	}
-
-	// for db stuff
-	long GetTranslationId()
-	{
-		return m_translationId;
-	}
-	void SetTranslationId(long translationId)
-	{	// for db stuff;
-		m_translationId = translationId;
-	}
+		{
+		}
+ public:
+	Sentence(FactorDirection direction)	: InputType(),Phrase(direction)
+		{
+		}
+
+	Phrase GetSubString(const WordsRange& r) const 
+		{
+			return Phrase::GetSubString(r);
+		}
+
+	const Factor* GetFactor(size_t pos, FactorType factorType) const
+		{
+			return Phrase::GetFactor(pos,factorType);
+		}
+
+	size_t GetSize() const 
+		{
+			return Phrase::GetSize();
+		}
+
 };
 
diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp
index 47890a390..37ab55b38 100644
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@@ -18,7 +18,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
 
 #include "TranslationOptionCollection.h"
-#include "Sentence.h"
+//#include "Sentence.h"
 #include "DecodeStep.h"
 #include "LanguageModel.h"
 #include "PhraseDictionary.h"
@@ -26,170 +26,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 using namespace std;
 
-TranslationOptionCollection::TranslationOptionCollection(const Sentence &inputSentence)
-	: m_inputSentence(inputSentence)
-	,m_futureScore(inputSentence.GetSize())
-	,m_initialCoverage(inputSentence.GetSize())
-  {
-  }
-
-void TranslationOptionCollection::CreateTranslationOptions(
-  														const list < DecodeStep > &decodeStepList
-  														, const LMList &languageModels
-  														, const LMList &allLM
-  														, FactorCollection &factorCollection
-  														, float weightWordPenalty
-  														, bool dropUnknown
-  														, size_t verboseLevel)
+TranslationOptionCollection::TranslationOptionCollection(size_t srcSize)
+	: m_futureScore(srcSize),m_initialCoverage(srcSize)
 {
-  // loop over all substrings of the source sentence, look them up
-  // in the phraseDictionary (which is the- possibly filtered-- phrase
-  // table loaded on initialization), generate TranslationOption objects
-  // for all phrases
-  //
-  // possible optimization- don't consider phrases longer than the longest
-  // phrase in the PhraseDictionary?
-  
-  PhraseDictionary &phraseDictionary = decodeStepList.front().GetPhraseDictionary();
-  for (size_t startPos = 0 ; startPos < m_inputSentence.GetSize() ; startPos++)
-    {
-      // reuse phrase, add next word on
-      Phrase sourcePhrase( m_inputSentence.GetDirection());
-
-      for (size_t endPos = startPos ; endPos < m_inputSentence.GetSize() ; endPos++)
-	{
-	  const WordsRange wordsRange(startPos, endPos);
-
-	  FactorArray &newWord = sourcePhrase.AddWord();
-	  Word::Copy(newWord, m_inputSentence.GetFactorArray(endPos));
-
-	  const TargetPhraseCollection *phraseColl =	phraseDictionary.FindEquivPhrase(sourcePhrase);
-	  if (phraseColl != NULL)
-	    {
-	      if (verboseLevel >= 3) {
-		cout << "[" << sourcePhrase << "; " << startPos << "-" << endPos << "]\n";
-	      }
-	      TargetPhraseCollection::const_iterator iterTargetPhrase;
-	      for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != phraseColl->end() ; ++iterTargetPhrase)
-		{
-		  const TargetPhrase	&targetPhrase = *iterTargetPhrase;
-					
-		  const WordsRange wordsRange(startPos, endPos);
-		  TranslationOption transOpt(wordsRange
-					     , targetPhrase);
-
-		  push_back(transOpt);
-		  if (verboseLevel >= 3) {
-		    cout << "\t" << transOpt << "\n";
-		  }
-		}
-	      if (verboseLevel >= 3) { cout << endl; }
-	    }
-	  else if (sourcePhrase.GetSize() == 1)
-	    {
-	      // unknown word, add to target, and add as poss trans
-	      //				float	weightWP		= m_staticData.GetWeightWordPenalty();
-	      const FactorTypeSet &targetFactors 		= phraseDictionary.GetFactorsUsed(Output);
-	      int isDigit = 0;
-	      if (dropUnknown)
-		{
-		  const Factor *f = sourcePhrase.GetFactor(0, static_cast<FactorType>(0)); // surface @ 0
-		  std::string s = f->ToString();
-		  isDigit = s.find_first_of("0123456789");
-		  if (isDigit == string::npos) isDigit = 0;
-		  else isDigit = 1;
-		  // modify the starting bitmap
-		}
-	      if (!dropUnknown || isDigit)
-		{
-		  // add to dictionary
-		  TargetPhrase targetPhraseOrig(Output, &phraseDictionary);
-		  FactorArray &targetWord = targetPhraseOrig.AddWord();
-		  
-		  const FactorArray &sourceWord = sourcePhrase.GetFactorArray(0);
-		  
-		  for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
-		    {
-		      if (targetFactors.Contains(currFactor))
-			{
-			  FactorType factorType = static_cast<FactorType>(currFactor);
-			  
-			  const Factor *factor = sourceWord[factorType]
-			    ,*unkownfactor;
-			  switch (factorType)
-			    {
-			    case POS:
-			      unkownfactor = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
-			      targetWord[factorType] = unkownfactor;
-			      break;
-			    default:
-			      unkownfactor = factorCollection.AddFactor(Output, factorType, factor->GetString());
-			      targetWord[factorType] = unkownfactor;
-			      break;
-			    }
-			}
-		    }
-		  
-		  targetPhraseOrig.SetScore(allLM, weightWordPenalty);
-		  
-		  phraseDictionary.AddEquivPhrase(sourcePhrase, targetPhraseOrig);
-		  const TargetPhraseCollection *phraseColl = phraseDictionary.FindEquivPhrase(sourcePhrase);
-		  const TargetPhrase &targetPhrase = *phraseColl->begin();
-		  
-		  TranslationOption transOpt(wordsRange, targetPhrase);
-		  
-		  push_back(transOpt);
-		}
-	      else // drop source word
-		{ m_initialCoverage.SetValue(startPos, startPos,1); }
-	    }
-	}
-    }
-
-  // create future score matrix
-  // for each span in the source phrase (denoted by start and end)
-  for(size_t startPos = 0; startPos < m_inputSentence.GetSize() ; startPos++) 
-    {
-      for(size_t endPos = startPos; endPos < m_inputSentence.GetSize() ; endPos++) 
-	{
-	  size_t length = endPos - startPos + 1;
-	  vector< float > score(length + 1);
-	  score[0] = 0;
-	  for(size_t currLength = 1 ; currLength <= length ; currLength++) 
-	    // initalize their future cost to -infinity
-	    {
-	      score[currLength] = - numeric_limits<float>::infinity();
-	    }
-
-	  for(size_t currLength = 0 ; currLength < length ; currLength++) 
-	    {
-	      // iterate over possible translations of this source subphrase and
-	      // keep track of the highest cost option
-	      TranslationOptionCollection::const_iterator iterTransOpt;
-	      for(iterTransOpt = begin() ; iterTransOpt != end() ; ++iterTransOpt)
-		{
-		  const TranslationOption &transOpt = *iterTransOpt;
-		  size_t index = currLength + transOpt.GetSize();
-
-		  if (transOpt.GetStartPos() == currLength + startPos 
-		      && transOpt.GetEndPos() <= endPos 
-		      && transOpt.GetFutureScore() + score[currLength] > score[index]) 
-		    {
-		      score[index] = transOpt.GetFutureScore() + score[currLength];
-		    }
-		}
-	    }
-	  // record the highest cost option in the future cost table.
-	  m_futureScore.SetScore(startPos, endPos, score[length]);
-
-	  //print information about future cost table when verbose option is set
-
-	  if(verboseLevel > 0) 
-	    {		
-	      cout<<"future cost from "<<startPos<<" to "<<endPos<<" is "<<score[length]<<endl;
-	    }
-	}
-    }
-
 }
 
+TranslationOptionCollection::~TranslationOptionCollection() {}
+
diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h
index 57183bde5..37709018b 100755
--- a/moses/src/TranslationOptionCollection.h
+++ b/moses/src/TranslationOptionCollection.h
@@ -27,28 +27,33 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "SquareMatrix.h"
 #include "WordsBitmap.h"
 
-class Sentence;
 class DecodeStep;
 class LanguageModel;
 class FactorCollection;
 
 class TranslationOptionCollection : public std::list< TranslationOption >
 {
+	TranslationOptionCollection(const TranslationOptionCollection&); // no copy constructor
 protected:
-	const Sentence &m_inputSentence;
 	SquareMatrix m_futureScore;
 	WordsBitmap m_initialCoverage;
+
+	TranslationOptionCollection(size_t srcSize);
 	
 public:
-	TranslationOptionCollection(const Sentence &inputSentence);
-  
-  void CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList
+  virtual ~TranslationOptionCollection();
+
+  virtual void CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList
   														, const LMList &languageModels  														
   														, const LMList &allLM
   														, FactorCollection &factorCollection
   														, float weightWordPenalty
   														, bool dropUnknown
-  														, size_t verboseLevel);
+  														, size_t verboseLevel) =0;
+	// get length/size of source input
+	virtual size_t GetSourceSize() const=0;
+
+
 	inline const SquareMatrix &GetFutureScore()
 	{
 		return m_futureScore;
diff --git a/moses/src/TranslationOptionCollectionConfusionNet.cpp b/moses/src/TranslationOptionCollectionConfusionNet.cpp
new file mode 100644
index 000000000..e7854cd6a
--- /dev/null
+++ b/moses/src/TranslationOptionCollectionConfusionNet.cpp
@@ -0,0 +1,25 @@
+// $Id$
+#include "TranslationOptionCollectionConfusionNet.h"
+#include "ConfusionNet.h"
+#include "DecodeStep.h"
+#include "LanguageModel.h"
+#include "PhraseDictionary.h"
+#include "FactorCollection.h"
+
+TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet(const ConfusionNet &input) 
+	: TranslationOptionCollection(input.GetSize()),m_inputCN(input) {}
+
+size_t TranslationOptionCollectionConfusionNet::GetSourceSize() const 
+{
+	return m_inputCN.GetSize();
+}
+void TranslationOptionCollectionConfusionNet::
+CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList,
+												 const LMList &languageModels,
+												 const LMList &allLM,
+												 FactorCollection &factorCollection,
+												 float weightWordPenalty,
+												 bool dropUnknown,
+												 size_t verboseLevel)
+{
+}
diff --git a/moses/src/TranslationOptionCollectionConfusionNet.h b/moses/src/TranslationOptionCollectionConfusionNet.h
new file mode 100644
index 000000000..938fba121
--- /dev/null
+++ b/moses/src/TranslationOptionCollectionConfusionNet.h
@@ -0,0 +1,23 @@
+// $Id$
+#ifndef TRANSLATIONOPTIONCOLLECTIONCONFUSIONNET_H_
+#define TRANSLATIONOPTIONCOLLECTIONCONFUSIONNET_H_
+#include "TranslationOptionCollection.h"
+
+class ConfusionNet;
+
+class TranslationOptionCollectionConfusionNet : public TranslationOptionCollection {
+	const ConfusionNet &m_inputCN;
+ public:
+	TranslationOptionCollectionConfusionNet(const ConfusionNet &input);
+
+	void CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList,
+																const LMList &languageModels,
+																const LMList &allLM,
+																FactorCollection &factorCollection,
+																float weightWordPenalty,
+																bool dropUnknown,
+																size_t verboseLevel);
+
+	size_t GetSourceSize() const;
+};
+#endif
diff --git a/moses/src/TranslationOptionCollectionText.cpp b/moses/src/TranslationOptionCollectionText.cpp
new file mode 100644
index 000000000..ec971458e
--- /dev/null
+++ b/moses/src/TranslationOptionCollectionText.cpp
@@ -0,0 +1,178 @@
+// $Id$
+#include "TranslationOptionCollectionText.h"
+#include "Sentence.h"
+#include "DecodeStep.h"
+#include "LanguageModel.h"
+#include "PhraseDictionary.h"
+#include "FactorCollection.h"
+
+
+TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &inputSentence) 
+	: TranslationOptionCollection(inputSentence.GetSize()),m_inputSentence(inputSentence) {}
+
+size_t TranslationOptionCollectionText::GetSourceSize() const 
+{
+	return m_inputSentence.GetSize();
+}
+
+
+void TranslationOptionCollectionText::
+CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList,
+												 const LMList &languageModels,
+												 const LMList &allLM,
+												 FactorCollection &factorCollection,
+												 float weightWordPenalty,
+												 bool dropUnknown,
+												 size_t verboseLevel)
+{
+  // loop over all substrings of the source sentence, look them up
+  // in the phraseDictionary (which is the- possibly filtered-- phrase
+  // table loaded on initialization), generate TranslationOption objects
+  // for all phrases
+  //
+  // possible optimization- don't consider phrases longer than the longest
+  // phrase in the PhraseDictionary?
+  
+  PhraseDictionary &phraseDictionary = decodeStepList.front().GetPhraseDictionary();
+  for (size_t startPos = 0 ; startPos < m_inputSentence.GetSize() ; startPos++)
+    {
+      // reuse phrase, add next word on
+      Phrase sourcePhrase( m_inputSentence.GetDirection());
+
+      for (size_t endPos = startPos ; endPos < m_inputSentence.GetSize() ; endPos++)
+				{
+					const WordsRange wordsRange(startPos, endPos);
+
+					FactorArray &newWord = sourcePhrase.AddWord();
+					Word::Copy(newWord, m_inputSentence.GetFactorArray(endPos));
+
+					const TargetPhraseCollection *phraseColl =	phraseDictionary.FindEquivPhrase(sourcePhrase);
+					if (phraseColl != NULL)
+						{
+							if (verboseLevel >= 3) {
+								std::cout << "[" << sourcePhrase << "; " << startPos << "-" << endPos << "]\n";
+							}
+							TargetPhraseCollection::const_iterator iterTargetPhrase;
+							for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != phraseColl->end() ; ++iterTargetPhrase)
+								{
+									const TargetPhrase	&targetPhrase = *iterTargetPhrase;
+					
+									const WordsRange wordsRange(startPos, endPos);
+									TranslationOption transOpt(wordsRange
+																						 , targetPhrase);
+
+									push_back(transOpt);
+									if (verboseLevel >= 3) {
+										std::cout << "\t" << transOpt << "\n";
+									}
+								}
+							if (verboseLevel >= 3) { std::cout << std::endl; }
+						}
+					else if (sourcePhrase.GetSize() == 1)
+						{
+							// unknown word, add to target, and add as poss trans
+							//				float	weightWP		= m_staticData.GetWeightWordPenalty();
+							const FactorTypeSet &targetFactors 		= phraseDictionary.GetFactorsUsed(Output);
+							int isDigit = 0;
+							if (dropUnknown)
+								{
+									const Factor *f = sourcePhrase.GetFactor(0, static_cast<FactorType>(0)); // surface @ 0
+									std::string s = f->ToString();
+									isDigit = s.find_first_of("0123456789");
+									if (isDigit == std::string::npos) isDigit = 0;
+									else isDigit = 1;
+									// modify the starting bitmap
+								}
+							if (!dropUnknown || isDigit)
+								{
+									// add to dictionary
+									TargetPhrase targetPhraseOrig(Output, &phraseDictionary);
+									FactorArray &targetWord = targetPhraseOrig.AddWord();
+		  
+									const FactorArray &sourceWord = sourcePhrase.GetFactorArray(0);
+		  
+									for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
+										{
+											if (targetFactors.Contains(currFactor))
+												{
+													FactorType factorType = static_cast<FactorType>(currFactor);
+			  
+													const Factor *factor = sourceWord[factorType]
+														,*unkownfactor;
+													switch (factorType)
+														{
+														case POS:
+															unkownfactor = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
+															targetWord[factorType] = unkownfactor;
+															break;
+														default:
+															unkownfactor = factorCollection.AddFactor(Output, factorType, factor->GetString());
+															targetWord[factorType] = unkownfactor;
+															break;
+														}
+												}
+										}
+		  
+									targetPhraseOrig.SetScore(allLM, weightWordPenalty);
+		  
+									phraseDictionary.AddEquivPhrase(sourcePhrase, targetPhraseOrig);
+									const TargetPhraseCollection *phraseColl = phraseDictionary.FindEquivPhrase(sourcePhrase);
+									const TargetPhrase &targetPhrase = *phraseColl->begin();
+		  
+									TranslationOption transOpt(wordsRange, targetPhrase);
+		  
+									push_back(transOpt);
+								}
+							else // drop source word
+								{ m_initialCoverage.SetValue(startPos, startPos,1); }
+						}
+				}
+    }
+
+  // create future score matrix
+  // for each span in the source phrase (denoted by start and end)
+  for(size_t startPos = 0; startPos < m_inputSentence.GetSize() ; startPos++) 
+    {
+      for(size_t endPos = startPos; endPos < m_inputSentence.GetSize() ; endPos++) 
+				{
+					size_t length = endPos - startPos + 1;
+					std::vector< float > score(length + 1);
+					score[0] = 0;
+					for(size_t currLength = 1 ; currLength <= length ; currLength++) 
+						// initalize their future cost to -infinity
+						{
+							score[currLength] = - std::numeric_limits<float>::infinity();
+						}
+
+					for(size_t currLength = 0 ; currLength < length ; currLength++) 
+						{
+							// iterate over possible translations of this source subphrase and
+							// keep track of the highest cost option
+							TranslationOptionCollection::const_iterator iterTransOpt;
+							for(iterTransOpt = begin() ; iterTransOpt != end() ; ++iterTransOpt)
+								{
+									const TranslationOption &transOpt = *iterTransOpt;
+									size_t index = currLength + transOpt.GetSize();
+
+									if (transOpt.GetStartPos() == currLength + startPos 
+											&& transOpt.GetEndPos() <= endPos 
+											&& transOpt.GetFutureScore() + score[currLength] > score[index]) 
+										{
+											score[index] = transOpt.GetFutureScore() + score[currLength];
+										}
+								}
+						}
+					// record the highest cost option in the future cost table.
+					m_futureScore.SetScore(startPos, endPos, score[length]);
+
+					//print information about future cost table when verbose option is set
+
+					if(verboseLevel > 0) 
+						{		
+							std::cout<<"future cost from "<<startPos<<" to "<<endPos<<" is "<<score[length]<<std::endl;
+						}
+				}
+    }
+
+}
+
diff --git a/moses/src/TranslationOptionCollectionText.h b/moses/src/TranslationOptionCollectionText.h
new file mode 100644
index 000000000..2b085722e
--- /dev/null
+++ b/moses/src/TranslationOptionCollectionText.h
@@ -0,0 +1,22 @@
+// $Id$
+#ifndef TRANSLATIONOPTIONCOLLECTIONTEXT_H_
+#define TRANSLATIONOPTIONCOLLECTIONTEXT_H_
+#include "TranslationOptionCollection.h"
+#include "Sentence.h"
+
+class TranslationOptionCollectionText : public TranslationOptionCollection {
+	Sentence const& m_inputSentence;
+ public:
+	TranslationOptionCollectionText(Sentence const& inputSentence);
+
+	void CreateTranslationOptions(const std::list < DecodeStep > &decodeStepList,
+																const LMList &languageModels,
+																const LMList &allLM,
+																FactorCollection &factorCollection,
+																float weightWordPenalty,
+																bool dropUnknown,
+																size_t verboseLevel);
+
+	size_t GetSourceSize() const;
+};
+#endif
diff --git a/moses/src/WordsRange.h b/moses/src/WordsRange.h
index f06cf5633..aa3b4c281 100755
--- a/moses/src/WordsRange.h
+++ b/moses/src/WordsRange.h
@@ -53,6 +53,10 @@ public:
 	{
 		return (m_startPos == NOT_FOUND) ? 0 : m_endPos - m_startPos + 1;
 	}
-
+	inline bool operator<(const WordsRange& x) const 
+	{
+		return (m_startPos<x.m_startPos 
+						|| (m_startPos==x.m_startPos && m_endPos<x.m_endPos));
+	}
 };
author	zens <zens@1f5c12ca-751b-0410-a591-d2e778427230>	2006-07-19 04:16:29 +0400
committer	zens <zens@1f5c12ca-751b-0410-a591-d2e778427230>	2006-07-19 04:16:29 +0400
commit	648bd1dfcdc337f15ddf88823515e0cbedf62bd4 (patch)
tree	893e3a23ce02ef0dd1f356498fd2bb04feaa2014 /moses
parent	5449e11bb91459dc84bb07ff26ad7143b47df420 (diff)