Get rid of FactorArrayWrapper/FactorArray and use only Word. Memory pool is currently disabled, but the net cleanup resulted in better performance despite this (there are fewer copies of FactorArray -> Word now).

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@827 1f5c12ca-751b-0410-a591-d2e778427230
author: redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230> 2006-09-26 01:35:10 +0400
committer: redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230> 2006-09-26 01:35:10 +0400
commit: 441b147b3531bbefb53d9290dbf7595206e9a423 (patch)
tree: fe727a99134e7008e92ce90c9489cc6c1076226a /moses
parent: 27fe63219404d339ebd406da2cbee1703e84ae8e (diff)
35 files changed, 232 insertions, 402 deletions
diff --git a/moses/src/ConfusionNet.cpp b/moses/src/ConfusionNet.cpp
index 0b29cc35d..aa5062a10 100644
--- a/moses/src/ConfusionNet.cpp
+++ b/moses/src/ConfusionNet.cpp
@@ -55,7 +55,7 @@ ConfusionNet::ConfusionNet(Sentence const& s)
 {
 	data.resize(s.GetSize());
 	for(size_t i=0;i<s.GetSize();++i)
-		data[i].push_back(std::make_pair(Word(s.GetFactorArray(i)),0.0));
+		data[i].push_back(std::make_pair(s.GetWord(i),0.0));
 }
 
 
@@ -185,7 +185,7 @@ std::string ConfusionNet::GetStringRep(const vector<FactorType> factorsToPrint)
 	return "";
 }
 #pragma warning(disable:4716)
-const FactorArray& ConfusionNet::GetFactorArray(size_t) const {
+const Word& ConfusionNet::GetWord(size_t) const {
 	std::cerr<<"ERROR: call to ConfusionNet::GetFactorArray\n";
 	abort();
 }
diff --git a/moses/src/ConfusionNet.h b/moses/src/ConfusionNet.h
index 2e5aaa673..03101e829 100644
--- a/moses/src/ConfusionNet.h
+++ b/moses/src/ConfusionNet.h
@@ -41,7 +41,7 @@ class ConfusionNet : public InputType {
 	
 	Phrase GetSubString(const WordsRange&) const; //TODO not defined
 	std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
-	const FactorArray& GetFactorArray(size_t pos) const;
+	const Word& GetWord(size_t pos) const;
 
 
 	TargetPhraseCollection const* CreateTargetPhraseCollection(PhraseDictionaryBase const& d,const WordsRange& r) const;
diff --git a/moses/src/DecodeStepGeneration.cpp b/moses/src/DecodeStepGeneration.cpp
index 0f9bb5f4d..998e6dc98 100644
--- a/moses/src/DecodeStepGeneration.cpp
+++ b/moses/src/DecodeStepGeneration.cpp
@@ -107,10 +107,10 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
     {
       // generatable factors for this word to be put in wordList
       WordList &wordList = wordListVector[wordListVectorPos];
-      const FactorArray &factorArray = targetPhrase.GetFactorArray(currPos);
+      const Word &word = targetPhrase.GetWord(currPos);
 
       // consult dictionary for possible generations for this word
-      const OutputWordCollection *wordColl = generationDictionary.FindWord(factorArray);
+      const OutputWordCollection *wordColl = generationDictionary.FindWord(word);
 
       if (wordColl == NULL)
         { // word not found in generation dictionary
diff --git a/moses/src/FactorArrayWrapper.cpp b/moses/src/FactorArrayWrapper.cpp
deleted file mode 100644
index f205a4a2c..000000000
--- a/moses/src/FactorArrayWrapper.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-***********************************************************************/
-
-#include "FactorArrayWrapper.h"
-#include "Util.h"
-#include "Word.h"
-
-using namespace std;
-
-FactorArrayWrapper::~FactorArrayWrapper() {}
-
-int FactorArrayWrapper::Compare(const FactorArrayWrapper &compare) const
-{
-	return Compare(GetFactorArray(), compare.GetFactorArray());
-}
-
-// static functions
-int FactorArrayWrapper::Compare(const FactorArray &targetWord, const FactorArray &sourceWord)
-{
-	for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++)
-	{
-		const Factor *targetFactor		= targetWord[factorType]
-								,*sourceFactor	= sourceWord[factorType];
-					
-		if (targetFactor == NULL || sourceFactor == NULL)
-		{
-			continue;
-		}
-		int result = targetFactor->Compare(*sourceFactor);
-		if ( result )
-			return result;
-	}
-	return 0;
-
-}
-
-TO_STRING_BODY(FactorArrayWrapper);
-
-// friend
-ostream& operator<<(ostream& out, const FactorArrayWrapper& wrapper)
-{	
-	out << Word::ToString(*wrapper.m_factorArrayPtr);
-	return out;
-}
diff --git a/moses/src/FactorArrayWrapper.h b/moses/src/FactorArrayWrapper.h
deleted file mode 100644
index 740dfbdeb..000000000
--- a/moses/src/FactorArrayWrapper.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-***********************************************************************/
-
-#pragma once
-
-#include <iostream>
-#include "TypeDef.h"
-#include "Factor.h"
-
-class FactorArrayWrapper
-{
-	friend std::ostream& operator<<(std::ostream&, const FactorArrayWrapper&);
-	
-protected:
-	const FactorArray *m_factorArrayPtr;
-public:
-	FactorArrayWrapper() {}
-	FactorArrayWrapper(const FactorArray &factorArray)
-		:m_factorArrayPtr(&factorArray) {}
-	virtual ~FactorArrayWrapper();
-
-	FactorArrayWrapper& operator=(const FactorArrayWrapper &other)
-	{
-		if(this != &other)
-		{
-			m_factorArrayPtr = other.m_factorArrayPtr;
-		}
-		return *this;
-	}
-
-	const Factor *operator[](size_t index) const
-	{
-		return (*m_factorArrayPtr)[index];
-	}
-
-	virtual const FactorArray &GetFactorArray() const
-	{
-		return *m_factorArrayPtr;
-	}
-
-	inline const Factor *GetFactor(FactorType factorType) const
-	{
-		return (*m_factorArrayPtr)[factorType];
-	}
-
-	int Compare(const FactorArrayWrapper &compare) const;
-		// -1 = less than
-		// +1 = more than
-		// 0	= same
-	
-	inline bool operator< (const FactorArrayWrapper &compare) const
-	{ // needed to store word in GenerationDictionary map
-		// uses comparison of FactorKey
-		// 'proper' comparison, not address/id comparison
-		return Compare(compare) < 0;
-	}
-
-	TO_STRING;
-
-	//statics
-	static int Compare(const FactorArray &targetWord, const FactorArray &sourceWord);
-
-};
-
diff --git a/moses/src/GenerationDictionary.cpp b/moses/src/GenerationDictionary.cpp
index f5fc85691..4e91fdbfd 100755
--- a/moses/src/GenerationDictionary.cpp
+++ b/moses/src/GenerationDictionary.cpp
@@ -62,7 +62,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
 		exit(1);
 	}
 
-  m_filename = filePath;
+	m_filename = filePath;
 	string line;
 	size_t lineNum = 0;
 	while(getline(inFile, line)) 
@@ -71,7 +71,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
 		vector<string> token = Tokenize( line );
 		
 		// add each line in generation file into class
-		Word *inputWord = new Word();
+		Word *inputWord = new Word();  // deleted in destructor
 		Word outputWord;
 
 		// create word with certain factors filled out
@@ -113,7 +113,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
 
 GenerationDictionary::~GenerationDictionary()
 {
-	std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer>::const_iterator iter;
+	std::map<const Word* , OutputWordCollection, WordComparer>::const_iterator iter;
 	for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter)
 	{
 		delete iter->first;
@@ -130,13 +130,12 @@ const std::string GenerationDictionary::GetScoreProducerDescription() const
   return "Generation score, file=" + m_filename;
 }
 
-const OutputWordCollection *GenerationDictionary::FindWord(const FactorArray &factorArray) const
+const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) const
 {
 	const OutputWordCollection *ret;
 	
-	FactorArrayWrapper wrapper(factorArray);
-	std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer>::const_iterator 
-				iter = m_collection.find(&wrapper);
+	std::map<const Word* , OutputWordCollection, WordComparer>::const_iterator 
+				iter = m_collection.find(&word);
 	if (iter == m_collection.end())
 	{ // can't find source phrase
 		ret = NULL;
diff --git a/moses/src/GenerationDictionary.h b/moses/src/GenerationDictionary.h
index 9248fdd3c..d97f5d679 100755
--- a/moses/src/GenerationDictionary.h
+++ b/moses/src/GenerationDictionary.h
@@ -31,10 +31,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 class FactorCollection;
 
-struct FactorArrayWrapperComparer
+struct WordComparer
 {
 	//! returns true if hypoA can be recombined with hypoB
-	bool operator()(const FactorArrayWrapper *a, const FactorArrayWrapper *b) const
+	bool operator()(const Word *a, const Word *b) const
 	{
 		return *a < *b;
 	}
@@ -47,7 +47,7 @@ typedef std::map < Word , ScoreComponentCollection2 > OutputWordCollection;
 class GenerationDictionary : public Dictionary, public ScoreProducer
 {
 protected:
-	std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer> m_collection;
+	std::map<const Word* , OutputWordCollection, WordComparer> m_collection;
 	// 1st = source
 	// 2nd = target
 	std::string						m_filename;
@@ -75,6 +75,6 @@ public:
 	{
 		return m_collection.size();
 	}
-	const OutputWordCollection *FindWord(const FactorArray &factorArray) const;
+	const OutputWordCollection *FindWord(const Word &word) const;
 };
 
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
index c1133dd4c..0365fa61d 100755
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@@ -214,14 +214,14 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
 				(*_lmstats)[lmIdx].resize(m_currTargetWordsRange.GetWordsCount(), 0);
 
 			// 1st n-gram
-			vector<FactorArrayWrapper> contextFactor(nGramOrder);
+			vector<const Word*> contextFactor(nGramOrder);
 			size_t index = 0;
 			for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)
 			{
 				if (currPos >= 0)
-					contextFactor[index++] = GetFactorArray(currPos);
+					contextFactor[index++] = &GetWord(currPos);
 				else			
-					contextFactor[index++] = languageModel.GetSentenceStartArray();
+					contextFactor[index++] = &languageModel.GetSentenceStartArray();
 			}
 			lmScore	= languageModel.GetValue(contextFactor);
 			if (_lmstats) { languageModel.GetState(contextFactor, &(*_lmstats)[lmIdx][nLmCallCount++]); }
@@ -237,7 +237,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
 					contextFactor[i] = contextFactor[i + 1];
 	
 				// add last factor
-				contextFactor.back() = GetFactorArray(currPos);
+				contextFactor.back() = &GetWord(currPos);
 
 				lmScore	+= languageModel.GetValue(contextFactor);
 				if (_lmstats) 
@@ -249,15 +249,15 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
 			if (m_sourceCompleted.IsComplete())
 			{
 				const size_t size = GetSize();
-				contextFactor.back() = languageModel.GetSentenceEndArray();
+				contextFactor.back() = &languageModel.GetSentenceEndArray();
 	
 				for (size_t i = 0 ; i < nGramOrder - 1 ; i ++)
 				{
 					int currPos = size - nGramOrder + i + 1;
 					if (currPos < 0)
-						contextFactor[i] = languageModel.GetSentenceStartArray();
+						contextFactor[i] = &languageModel.GetSentenceStartArray();
 					else
-						contextFactor[i] = GetFactorArray((size_t)currPos);
+						contextFactor[i] = &GetWord((size_t)currPos);
 				}
 				if (_lmstats) {
 					(*_lmstats)[lmIdx].resize((*_lmstats)[lmIdx].size() + 1); // extra space for the last call
@@ -268,7 +268,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
 				for (size_t currPos = endPos+1; currPos <= currEndPos; currPos++) {
 					for (size_t i = 0 ; i < nGramOrder - 1 ; i++)
 						contextFactor[i] = contextFactor[i + 1];
-					contextFactor.back() = GetFactorArray(currPos);
+					contextFactor.back() = &GetWord(currPos);
 					if (_lmstats)
 						languageModel.GetState(contextFactor, &(*_lmstats)[lmIdx][nLmCallCount++]);
 				}
@@ -437,7 +437,8 @@ std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factor
 std::string Hypothesis::GetSourcePhraseStringRep() const 
 {
 	vector<FactorType> allFactors;
-	for(size_t i=0; i < MAX_NUM_FACTORS; i++)
+	const size_t maxSourceFactors = StaticData::Instance()->GetMaxNumFactors(Input);
+	for(size_t i=0; i < maxSourceFactors; i++)
 	{
 		allFactors.push_back(i);
 	}
@@ -446,7 +447,8 @@ std::string Hypothesis::GetSourcePhraseStringRep() const
 std::string Hypothesis::GetTargetPhraseStringRep() const 
 {
 	vector<FactorType> allFactors;
-	for(size_t i=0; i < MAX_NUM_FACTORS; i++)
+	const size_t maxTargetFactors = StaticData::Instance()->GetMaxNumFactors(Output);
+	for(size_t i=0; i < maxTargetFactors; i++)
 	{
 		allFactors.push_back(i);
 	}
diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h
index 26694ad4d..56bc1a0fe 100755
--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@@ -1,4 +1,5 @@
 // $Id$
+// vim:tabstop=2
 
 /***********************************************************************
 Moses - factored phrase-based language decoder
@@ -167,19 +168,19 @@ public:
 	std::string GetSourcePhraseStringRep() const;
 	std::string GetTargetPhraseStringRep() const;
 
-	// curr - pos is relative from CURRENT hypothesis's starting ind ex
-  // (ie, start of sentence would be some negative number, which is
-  // not allowed- USE WITH CAUTION)
-	inline const FactorArray &GetCurrFactorArray(size_t pos) const
+	/** curr - pos is relative from CURRENT hypothesis's starting index
+	 * (ie, start of sentence would be some negative number, which is
+	 * not allowed- USE WITH CAUTION) */
+	inline const Word &GetCurrWord(size_t pos) const
 	{
-		return m_targetPhrase.GetFactorArray(pos);
+		return m_targetPhrase.GetWord(pos);
 	}
 	inline const Factor *GetCurrFactor(size_t pos, FactorType factorType) const
 	{
 		return m_targetPhrase.GetFactor(pos, factorType);
 	}
-	// recursive - pos is relative from start of sentence
-	inline const FactorArray &GetFactorArray(size_t pos) const
+	/** recursive - pos is relative from start of sentence */
+	inline const Word &GetWord(size_t pos) const
 	{
 		const Hypothesis *hypo = this;
 		while (pos < hypo->GetCurrTargetWordsRange().GetStartPos())
@@ -187,11 +188,11 @@ public:
 			hypo = hypo->GetPrevHypo();
 			assert(hypo != NULL);
 		}
-		return hypo->GetCurrFactorArray(pos - hypo->GetCurrTargetWordsRange().GetStartPos());
+		return hypo->GetCurrWord(pos - hypo->GetCurrTargetWordsRange().GetStartPos());
 	}
 	inline const Factor* GetFactor(size_t pos, FactorType factorType) const
 	{
-		return GetFactorArray(pos)[factorType];
+		return GetWord(pos)[factorType];
 	}
 
 	/***
diff --git a/moses/src/InputType.h b/moses/src/InputType.h
index 6681caa3e..0e3f4f8a5 100644
--- a/moses/src/InputType.h
+++ b/moses/src/InputType.h
@@ -42,7 +42,7 @@ protected:
 
 	virtual Phrase GetSubString(const WordsRange&) const =0;
 //	virtual std::string GetStringRep(const WordsRange&) const=0;
-	virtual const FactorArray& GetFactorArray(size_t pos) const=0;
+	virtual const Word& GetWord(size_t pos) const=0;
 	
 	TO_STRING;
 	
diff --git a/moses/src/LanguageModel.cpp b/moses/src/LanguageModel.cpp
index 9398e2513..01336700d 100755
--- a/moses/src/LanguageModel.cpp
+++ b/moses/src/LanguageModel.cpp
@@ -39,9 +39,6 @@ LanguageModel::LanguageModel(bool registerScore)
 {
 	if (registerScore)
 		const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
-		
-	Word::Initialize(m_sentenceStartArray);
-	Word::Initialize(m_sentenceEndArray);
 }
 LanguageModel::~LanguageModel() {}
 
@@ -59,19 +56,19 @@ void LanguageModel::CalcScore(const Phrase &phrase
 	ngramScore	= 0;
 
 	size_t phraseSize = phrase.GetSize();
-	vector<FactorArrayWrapper> contextFactor;
+	vector<const Word*> contextFactor;
 	contextFactor.reserve(m_nGramOrder);
 
 	// start of sentence
 	for (size_t currPos = 0 ; currPos < m_nGramOrder - 1 && currPos < phraseSize ; currPos++)
 	{
-		contextFactor.push_back(phrase.GetFactorArray(currPos));		
+		contextFactor.push_back(&phrase.GetWord(currPos));		
 		fullScore += GetValue(contextFactor);
 	}
 	
 	if (phraseSize >= m_nGramOrder)
 	{
-		contextFactor.push_back(phrase.GetFactorArray(m_nGramOrder - 1));
+		contextFactor.push_back(&phrase.GetWord(m_nGramOrder - 1));
 		ngramScore = GetValue(contextFactor);
 	}
 	
@@ -82,14 +79,14 @@ void LanguageModel::CalcScore(const Phrase &phrase
 		{
 			contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];
 		}
-		contextFactor[m_nGramOrder - 1] = phrase.GetFactorArray(currPos);
+		contextFactor[m_nGramOrder - 1] = &phrase.GetWord(currPos);
 		float partScore = GetValue(contextFactor);		
 		ngramScore += partScore;		
 	}
 	fullScore += ngramScore;	
 }
 
-LanguageModel::State LanguageModel::GetState(const std::vector<FactorArrayWrapper> &contextFactor, unsigned int* len) const
+LanguageModel::State LanguageModel::GetState(const std::vector<const Word*> &contextFactor, unsigned int* len) const
 {
   State state;
 	unsigned int dummy;
diff --git a/moses/src/LanguageModel.h b/moses/src/LanguageModel.h
index ad3d48bf4..dd21c133d 100755
--- a/moses/src/LanguageModel.h
+++ b/moses/src/LanguageModel.h
@@ -39,7 +39,7 @@ protected:
 	float				m_weight;
 	std::string	m_filename;
 	size_t			m_nGramOrder;
-	FactorArray m_sentenceStartArray, m_sentenceEndArray;
+	Word m_sentenceStartArray, m_sentenceEndArray;
 
 	LanguageModel(bool registerScore);
 
@@ -59,19 +59,19 @@ public:
 	void CalcScore(const Phrase &phrase
 							, float &fullScore
 							, float &ngramScore) const;
-	virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = 0, unsigned int* len = 0) const = 0;
+	virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const = 0;
 
-	State GetState(const std::vector<FactorArrayWrapper> &contextFactor, unsigned int* len = 0) const;
+	State GetState(const std::vector<const Word*> &contextFactor, unsigned int* len = 0) const;
 
 	size_t GetNGramOrder() const
 	{
 		return m_nGramOrder;
 	}
-	const FactorArray &GetSentenceStartArray() const
+	const Word &GetSentenceStartArray() const
 	{
 		return m_sentenceStartArray;
 	}
-	const FactorArray &GetSentenceEndArray() const
+	const Word &GetSentenceEndArray() const
 	{
 		return m_sentenceEndArray;
 	}
diff --git a/moses/src/LanguageModelChunking.h b/moses/src/LanguageModelChunking.h
index 3c05f2aa4..fd1c0c224 100644
--- a/moses/src/LanguageModelChunking.h
+++ b/moses/src/LanguageModelChunking.h
@@ -64,7 +64,7 @@ public:
 		m_lmImpl->Load(fileName, factorCollection, m_factorType, weight, nGramOrder);
 	}
 			
-	float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
+	float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
 	{
 		if (contextFactor.size() == 0)
 		{
@@ -76,30 +76,30 @@ public:
 		TRACE_ERR(std::endl);
 		*/
 		// only process context where last word is a word we want
-		const Factor *factor = contextFactor.back()[m_factorType];
+		const Factor *factor = (*contextFactor.back())[m_factorType];
 		std::string strWord = factor->GetString();
 		if (strWord.find("???") == 0)
 			return 0;
 		
 		// add last word
-		std::vector<FactorArrayWrapper> chunkContext;
-		Word chunkWord;
-		chunkWord.SetFactor(m_factorType, factor);
+		std::vector<const Word*> chunkContext;
+		Word* chunkWord = new Word;
+		chunkWord->SetFactor(m_factorType, factor);
 		chunkContext.push_back(chunkWord);
 		
 		// create context in reverse 'cos we skip words we don't want
 		for (int currPos = (int)contextFactor.size() - 2 ; currPos >= 0 && chunkContext.size() < m_realNGramOrder ; --currPos )
 		{
-			const FactorArrayWrapper &factorArray = contextFactor[currPos];
-			factor = factorArray[m_factorType];
+			const Word &word = *contextFactor[currPos];
+			factor = word[m_factorType];
 			std::string strWord = factor->GetString();
 			bool skip = strWord.find("???") == 0;
 			if (skip)
 				continue;
 
 			// add word to chunked context
-			Word chunkWord;
-			chunkWord.SetFactor(m_factorType, factor);
+			Word* chunkWord = new Word;
+			chunkWord->SetFactor(m_factorType, factor);
 			chunkContext.push_back(chunkWord);
 		}
 	
@@ -112,7 +112,9 @@ public:
 		*/
 		// calc score on chunked phrase
 		float ret = m_lmImpl->GetValue(chunkContext, finalState, len);
-		
+
+		RemoveAllInColl(chunkContext);
+
 		return ret;
 	}
 };
diff --git a/moses/src/LanguageModelIRST.cpp b/moses/src/LanguageModelIRST.cpp
index 3ef1a464b..517a592c1 100755
--- a/moses/src/LanguageModelIRST.cpp
+++ b/moses/src/LanguageModelIRST.cpp
@@ -128,7 +128,7 @@ int LanguageModelIRST::GetLmID( const std::string &str ) const
     return m_lmtb->dict->encode( str.c_str() );
 }
 
-float LanguageModelIRST::GetValue(const vector<FactorArrayWrapper> &contextFactor, State* finalState, unsigned int* len) const
+float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const
 {
 	unsigned int dummy;
 	if (!len) { len = &dummy; }
@@ -137,24 +137,24 @@ float LanguageModelIRST::GetValue(const vector<FactorArrayWrapper> &contextFacto
 	// set up context
 	size_t count = contextFactor.size();
     
-  m_lmtb_ng->size=0;
-  if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
-  if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);  
+	m_lmtb_ng->size=0;
+	if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
+	if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);  
   
 	for (size_t i = 0 ; i < count ; i++)
 	{
 
-    int lmId = GetLmID(contextFactor[i][factorType]);
-    m_lmtb_ng->pushc(lmId);
+		int lmId = GetLmID((*contextFactor[i])[factorType]);
+		m_lmtb_ng->pushc(lmId);
 	}
   
 	if (finalState){        
-    *finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);	
+		*finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);	
 		// back off stats not currently available
 		*len = 0;	
 	}
 
-  return TransformIRSTScore(m_lmtb->clprob(*m_lmtb_ng));
+	return TransformIRSTScore(m_lmtb->clprob(*m_lmtb_ng));
 }
 
 
diff --git a/moses/src/LanguageModelIRST.h b/moses/src/LanguageModelIRST.h
index 9c465c41e..d7c04092a 100755
--- a/moses/src/LanguageModelIRST.h
+++ b/moses/src/LanguageModelIRST.h
@@ -67,7 +67,7 @@ public:
 					, float weight
 					, size_t nGramOrder);
 
-  virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
+  virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
 
   const void CleanUpAfterSentenceProcessing();
   const void InitializeBeforeSentenceProcessing();
diff --git a/moses/src/LanguageModelJoint.h b/moses/src/LanguageModelJoint.h
index 59696577d..5030ea5df 100644
--- a/moses/src/LanguageModelJoint.h
+++ b/moses/src/LanguageModelJoint.h
@@ -79,7 +79,7 @@ public:
 		m_lmImpl->Load(fileName, factorCollection, m_implFactor, weight, nGramOrder);
 	}
 	
-	float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
+	float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
 	{
 		if (contextFactor.size() == 0)
 		{
@@ -92,29 +92,29 @@ public:
 		*/
 
 		// joint context for internal LM
-		std::vector<FactorArrayWrapper> jointContext;
+		std::vector<const Word*> jointContext;
 		
 		for (size_t currPos = 0 ; currPos < m_nGramOrder ; ++currPos )
 		{
-			const FactorArrayWrapper &factorArray = contextFactor[currPos];
+			const Word &word = *contextFactor[currPos];
 
 			// add word to chunked context
 			std::stringstream stream("");
 
-			const Factor *factor = factorArray[ m_factorTypesOrdered[0] ];
+			const Factor *factor = word[ m_factorTypesOrdered[0] ];
 			stream << factor->GetString();
 
 			for (size_t index = 1 ; index < m_factorTypesOrdered.size() ; ++index)
 			{
 				FactorType factorType = m_factorTypesOrdered[index];
-				const Factor *factor = factorArray[factorType];
+				const Factor *factor = word[factorType];
 				stream << "|" << factor->GetString();
 			}
 			
 			factor = m_factorCollection->AddFactor(Output, m_implFactor, stream.str());
 
-			Word jointWord;
-			jointWord.SetFactor(m_implFactor, factor);
+			Word* jointWord = new Word;
+			jointWord->SetFactor(m_implFactor, factor);
 			jointContext.push_back(jointWord);
 		}
 	
@@ -125,6 +125,8 @@ public:
 		*/
 		// calc score on chunked phrase
 		float ret = m_lmImpl->GetValue(jointContext, finalState, len);
+
+		RemoveAllInColl(jointContext);
 		
 		return ret;
 	}
diff --git a/moses/src/LanguageModelMultiFactor.cpp b/moses/src/LanguageModelMultiFactor.cpp
index 8dabb30a5..275e5a0e5 100644
--- a/moses/src/LanguageModelMultiFactor.cpp
+++ b/moses/src/LanguageModelMultiFactor.cpp
@@ -40,10 +40,10 @@ bool LanguageModelMultiFactor::Useable(const Phrase &phrase) const
 		return false;
 	
 	// whether phrase contains all factors in this LM
-	const FactorArray &factorArray = phrase.GetFactorArray(0);
+	const Word &word = phrase.GetWord(0);
 	for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; ++currFactor)
 	{
-		if (m_factorTypes[currFactor] && factorArray[currFactor] == NULL)
+		if (m_factorTypes[currFactor] && word[currFactor] == NULL)
 			return false;
 	}
 	return  true;
diff --git a/moses/src/LanguageModelSRI.cpp b/moses/src/LanguageModelSRI.cpp
index be59f420a..2d499b771 100755
--- a/moses/src/LanguageModelSRI.cpp
+++ b/moses/src/LanguageModelSRI.cpp
@@ -132,7 +132,7 @@ float LanguageModelSRI::GetValue(VocabIndex wordId, VocabIndex *context) const
 	return FloorSRIScore(TransformSRIScore(p));  // log10->log
 }
 
-float LanguageModelSRI::GetValue(const vector<FactorArrayWrapper> &contextFactor, State* finalState, unsigned int *len) const
+float LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int *len) const
 {
 	FactorType	factorType = GetFactorType();
 	size_t count = contextFactor.size();
@@ -146,13 +146,13 @@ float LanguageModelSRI::GetValue(const vector<FactorArrayWrapper> &contextFactor
 	VocabIndex context[MAX_NGRAM_SIZE];
 	for (size_t i = 0 ; i < count - 1 ; i++)
 	{
-		context[i] =  GetLmID(contextFactor[count-2-i][factorType]);
+		context[i] =  GetLmID((*contextFactor[count-2-i])[factorType]);
 	}
 	context[count-1] = Vocab_None;
 	
-	assert(contextFactor[count-1][factorType] != NULL);
+	assert((*contextFactor[count-1])[factorType] != NULL);
 	// call sri lm fn
-	VocabIndex lmId= GetLmID(contextFactor[count-1][factorType]);
+	VocabIndex lmId= GetLmID((*contextFactor[count-1])[factorType]);
 	float ret = GetValue(lmId, context);
 
 	if (finalState) {
diff --git a/moses/src/LanguageModelSRI.h b/moses/src/LanguageModelSRI.h
index b43e4bc1f..b1a59f899 100755
--- a/moses/src/LanguageModelSRI.h
+++ b/moses/src/LanguageModelSRI.h
@@ -56,6 +56,6 @@ public:
 					, float weight
 					, size_t nGramOrder);
 
-  virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
+  virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
 };
 
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index 48bdb0d80..013dbdc34 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -9,7 +9,6 @@ libmoses_a_SOURCES_TMP = \
 	DistortionOrientation.cpp \
 	DummyScoreProducers.cpp \
 	Factor.cpp \
-	FactorArrayWrapper.cpp \
 	FactorCollection.cpp \
 	FactorTypeSet.cpp \
 	GenerationDictionary.cpp \
diff --git a/moses/src/PDTAimp.h b/moses/src/PDTAimp.h
index cf27b687a..03bc2cafd 100644
--- a/moses/src/PDTAimp.h
+++ b/moses/src/PDTAimp.h
@@ -96,7 +96,7 @@ public:
 
 	}
 
-	void Factors2String(FactorArray const& w,std::string& s) const 
+	void Factors2String(Word const& w,std::string& s) const 
 	{
 		for(size_t j=0;j<m_input.size();++j)
 			{
@@ -155,7 +155,7 @@ public:
 		std::vector<std::string> srcString(src.GetSize());
 		// convert source Phrase into vector of strings
 		for(size_t i=0;i<srcString.size();++i)
-			Factors2String(src.GetFactorArray(i),srcString[i]);
+			Factors2String(src.GetWord(i),srcString[i]);
 
 		// get target phrases in string representation
 		std::vector<StringTgtCand> cands;
@@ -268,9 +268,9 @@ public:
 		for(size_t k=0;k<factorStrings.size();++k) 
 			{
 				std::vector<std::string> factors=Tokenize(*factorStrings[k],"|");
-				FactorArray& fa=targetPhrase.AddWord();
+				Word& w=targetPhrase.AddWord();
 				for(size_t l=0;l<m_output.size();++l)
-					fa[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
+					w[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
 			}
 		targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels);
 		targetPhrase.SetSourcePhrase(srcPtr);
@@ -360,7 +360,7 @@ public:
 					{
 						const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
 						std::string s;
-						Factors2String(w.GetFactorArray(),s);
+						Factors2String(w,s);
 						bool isEpsilon=(s=="" || s==EPSILON);
 
 						// do not start with epsilon (except at first position)
diff --git a/moses/src/Phrase.cpp b/moses/src/Phrase.cpp
index 5d0c3ee07..fdd54d61d 100755
--- a/moses/src/Phrase.cpp
+++ b/moses/src/Phrase.cpp
@@ -1,4 +1,5 @@
 // $Id$
+// vim:tabstop=2
 
 /***********************************************************************
 Moses - factored phrase-based language decoder
@@ -27,20 +28,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "FactorCollection.h"
 #include "Phrase.h"
 #include "Util.h" //malloc() replacement
+#include "StaticData.h"  // GetMaxNumFactors
 
 using namespace std;
 
-std::vector<mempool*> Phrase::s_memPool;
+// std::vector<mempool*> Phrase::s_memPool;
 
 Phrase::Phrase(const Phrase &copy)
 :m_direction(copy.m_direction)
 ,m_phraseSize(copy.m_phraseSize)
 ,m_arraySize(copy.m_arraySize)
-,m_memPoolIndex(copy.m_memPoolIndex)
+//,m_memPoolIndex(copy.m_memPoolIndex)
+,m_words(copy.m_words)
 {
-	assert(m_memPoolIndex<s_memPool.size() && s_memPool[m_memPoolIndex]);
-	m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
-	memcpy(m_factorArray, copy.m_factorArray, m_phraseSize * sizeof(FactorArray));
 }
 
 Phrase& Phrase::operator=(const Phrase& x) 
@@ -48,19 +48,12 @@ Phrase& Phrase::operator=(const Phrase& x)
 	if(this!=&x)
 		{
 
-			if(m_factorArray)
-				{
-					assert(m_memPoolIndex<s_memPool.size());
-					s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
-				}
-
 			m_direction=x.m_direction;
 			m_phraseSize=x.m_phraseSize;
 			m_arraySize=x.m_arraySize;
-			m_memPoolIndex=x.m_memPoolIndex;
+//			m_memPoolIndex=x.m_memPoolIndex;
 
-			m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
-			memcpy(m_factorArray, x.m_factorArray, m_phraseSize * sizeof(FactorArray));
+			m_words = x.m_words;
 		}
 	return *this;
 }
@@ -70,55 +63,34 @@ Phrase::Phrase(FactorDirection direction)
 	: m_direction(direction)
 	, m_phraseSize(0)
 	, m_arraySize(ARRAY_SIZE_INCR)
-	, m_memPoolIndex(0)
+//	, m_memPoolIndex(0)
+	, m_words(ARRAY_SIZE_INCR)
 {
-	assert(m_memPoolIndex<s_memPool.size());
-	m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
 }
 
 Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords)
 :m_direction(direction)
 ,m_phraseSize(mergeWords.size())
+,m_words(mergeWords.size())
 {
-	m_memPoolIndex	= (m_phraseSize + ARRAY_SIZE_INCR - 1) / ARRAY_SIZE_INCR  - 1;
-	m_arraySize 		= (m_memPoolIndex + 1) * ARRAY_SIZE_INCR;
-	m_factorArray 	= (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
-	
 	for (size_t currPos = 0 ; currPos < m_phraseSize ; currPos++)
 	{
-		FactorArray &thisWord				= m_factorArray[currPos];
-		const Word &mergeWord				= *mergeWords[currPos];
-
-		for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
-		{
-			FactorType factorType = static_cast<FactorType>(currFactor);
-			thisWord[currFactor] = mergeWord.GetFactor(factorType);
-		}
+		m_words[currPos] = *mergeWords[currPos];
 	}
 }
 
 Phrase::~Phrase()
 {
-	// RZ: 
-	// will segFault if Phrase was default constructed and AddWord was never called
-	// TODO not sure if this is really the intended behaviour 
-	// assertion failure is better than segFault, but if(m_factorArray) might be more appropriate
-	//assert(m_factorArray); 
-	if(m_factorArray)
-		{
-			assert(m_memPoolIndex<s_memPool.size());
-			assert((char*)m_factorArray);
-			s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
-		}
 }
 
 void Phrase::MergeFactors(const Phrase &copy)
 {
 	assert(GetSize() == copy.GetSize());
 	size_t size = GetSize();
+	const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
 	for (size_t currPos = 0 ; currPos < size ; currPos++)
 	{
-		for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+		for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
 		{
 			FactorType factorType = static_cast<FactorType>(currFactor);
 			const Factor *factor = copy.GetFactor(currPos, factorType);
@@ -153,8 +125,8 @@ Phrase Phrase::GetSubString(const WordsRange &wordsRange) const
 
 	for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++)
 	{
-		FactorArray &newWord = retPhrase.AddWord();
-		Word::Copy(newWord, GetFactorArray(currPos));
+		Word &word = retPhrase.AddWord();
+		word = GetWord(currPos);
 	}
 
 	return retPhrase;
@@ -166,31 +138,21 @@ std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const
 	stringstream strme;
 	for (size_t pos = 0 ; pos < GetSize() ; pos++)
 	{
-		strme << Word::ToString(factorsToPrint, GetFactorArray(pos));
+		strme << GetWord(pos).ToString(factorsToPrint);
 	}
 
 	return strme.str();
 }
 
-FactorArray &Phrase::AddWord()
+Word &Phrase::AddWord()
 {
 	if ((m_phraseSize+1) % ARRAY_SIZE_INCR == 0)
 	{ // need to expand array
-		FactorArray *newArray = (FactorArray*) s_memPool[m_memPoolIndex+1]->allocate();
-		memcpy(newArray, m_factorArray, m_phraseSize * sizeof(FactorArray));
-		s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
-		
-		m_memPoolIndex++;
 		m_arraySize += ARRAY_SIZE_INCR;
-		m_factorArray = newArray;
+		m_words.resize(m_arraySize);
 	}
 
-	FactorArray &factorArray = m_factorArray[m_phraseSize];
-	Word::Initialize(factorArray);
-
-	m_phraseSize++;
-
-	return factorArray;
+	return m_words[m_phraseSize++];
 }
 
 vector< vector<string> > Phrase::Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter)
@@ -233,13 +195,13 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
 	for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++)
 	{
 		// add word this phrase
-		FactorArray &factorArray = AddWord();
+		Word &word = AddWord();
 		for (size_t currFactorIndex= 0 ; currFactorIndex < factorOrder.size() ; currFactorIndex++)
 		{
 			FactorType factorType = factorOrder[currFactorIndex];
 			const string &factorStr = phraseVector[phrasePos][currFactorIndex];
 			const Factor *factor = factorCollection.AddFactor(m_direction, factorType, factorStr); 
-			factorArray[factorType] = factor;
+			word[factorType] = factor;
 		}
 	}
 }
@@ -270,8 +232,9 @@ bool Phrase::operator < (const Phrase &compare) const
 	{
 		size_t minSize = std::min( thisSize , compareSize );
 
+		const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
 		// taken from word.Compare()
-		for (size_t i = 0 ; i < MAX_NUM_FACTORS ; i++)
+		for (size_t i = 0 ; i < maxNumFactors ; i++)
 		{
 			FactorType factorType = static_cast<FactorType>(i);
 
@@ -346,9 +309,10 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase) const
 
 	const size_t size = GetSize();
 
+	const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
 	for (size_t currPos = 0 ; currPos < size ; currPos++)
 	{
-		for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+		for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
 		{
 			FactorType factorType = static_cast<FactorType>(currFactor);
 			const Factor *thisFactor 		= GetFactor(currPos, factorType)
@@ -389,6 +353,7 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorTyp
 
 void Phrase::InitializeMemPool()
 {
+#if 0
 	s_memPool.push_back( new mempool(1 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 50000 ));
 	s_memPool.push_back( new mempool(2 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
 	s_memPool.push_back( new mempool(3 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
@@ -399,15 +364,18 @@ void Phrase::InitializeMemPool()
 	
 	for (size_t i = 8 ; i < 30 ; ++i)
 		s_memPool.push_back( new mempool(i * ARRAY_SIZE_INCR * sizeof(FactorArray) , 2 ));
+#endif
 }
 
 void Phrase::FinalizeMemPool()
 {
+#if 0
 	std::vector<mempool*>::iterator iter;
 	for (iter = s_memPool.begin() ; iter != s_memPool.end() ; ++iter)
 	{
 		delete *iter;
 	}
+#endif
 }
 
 TO_STRING_BODY(Phrase);
@@ -418,8 +386,8 @@ ostream& operator<<(ostream& out, const Phrase& phrase)
 //	out << "(size " << phrase.GetSize() << ") ";
 	for (size_t pos = 0 ; pos < phrase.GetSize() ; pos++)
 	{
-		const FactorArray &factorArray = phrase.GetFactorArray(pos);
-		out << Word::ToString(factorArray);
+		const Word &word = phrase.GetWord(pos);
+		out << word;
 	}
 	return out;
 }
diff --git a/moses/src/Phrase.h b/moses/src/Phrase.h
index 321741615..4d0986098 100755
--- a/moses/src/Phrase.h
+++ b/moses/src/Phrase.h
@@ -1,4 +1,5 @@
 // $Id$
+// vim:tabstop=2
 
 /***********************************************************************
 Moses - factored phrase-based language decoder
@@ -35,13 +36,13 @@ class Phrase
 {
 	friend std::ostream& operator<<(std::ostream&, const Phrase&);
  private:
-	static std::vector<mempool*> s_memPool;
+//	static std::vector<mempool*> s_memPool;
 
 	FactorDirection				m_direction;
-	size_t								m_phraseSize, //number of words
-												m_arraySize,
-												m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
-	FactorArray						*m_factorArray;
+	size_t								m_phraseSize; //number of words
+	size_t								m_arraySize;
+//	size_t										m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
+	std::vector<Word>			m_words;
 
 public:
 	static void InitializeMemPool();
@@ -84,35 +85,37 @@ public:
 	{
 		return m_phraseSize;
 	}
-	inline const FactorArray &GetFactorArray(size_t pos) const
+	inline const Word &GetWord(size_t pos) const
 	{
-		return m_factorArray[pos];
+		return m_words[pos];
 	}
-	inline FactorArray &GetFactorArray(size_t pos)
+	inline Word &GetWord(size_t pos)
 	{
-		return m_factorArray[pos];
+		return m_words[pos];
 	}
 	inline const Factor *GetFactor(size_t pos, FactorType factorType) const
 	{
-		FactorArray &ptr = m_factorArray[pos];
+		const Word &ptr = m_words[pos];
 		return ptr[factorType];
 	}
 	inline void SetFactor(size_t pos, FactorType factorType, const Factor *factor)
 	{
-		FactorArray &ptr = m_factorArray[pos];
+		Word &ptr = m_words[pos];
 		ptr[factorType] = factor;
 	}
 
 	bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
 							, const std::vector<FactorType> &inputFactor) const;
 
-	FactorArray &AddWord();
+	Word &AddWord();
 
 	Phrase GetSubString(const WordsRange &wordsRange) const;
 	
 	std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; 
   
-	void push_back(Word const& w) {Word::Copy(AddWord(),w.GetFactorArray());}
+	void push_back(Word const& w) {
+    AddWord() = w;
+  }
 
 	TO_STRING;
 
diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp
index c55aafac6..3f4cfbf28 100755
--- a/moses/src/PhraseDictionary.cpp
+++ b/moses/src/PhraseDictionary.cpp
@@ -122,7 +122,7 @@ TargetPhraseCollection *PhraseDictionary::CreateTargetPhraseCollection(const Phr
 	PhraseDictionaryNode *currNode = &m_collection;
 	for (size_t pos = 0 ; pos < size ; ++pos)
 	{
-		Word word(source.GetFactorArray(pos));
+		const Word& word = source.GetWord(pos);
 		currNode = currNode->GetOrCreateChild(word);
 		if (currNode == NULL)
 			return NULL;
@@ -144,7 +144,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const
 	const PhraseDictionaryNode *currNode = &m_collection;
 	for (size_t pos = 0 ; pos < size ; ++pos)
 	{
-		Word word(source.GetFactorArray(pos));
+		const Word& word = source.GetWord(pos);
 		currNode = currNode->GetChild(word);
 		if (currNode == NULL)
 			return NULL;
diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h
index 38af542d7..b33d700cb 100755
--- a/moses/src/Sentence.h
+++ b/moses/src/Sentence.h
@@ -50,9 +50,9 @@ class Sentence : public Phrase, public InputType
 		{
 			return Phrase::GetStringRep(factorsToPrint);
 		}
-	const FactorArray& GetFactorArray(size_t pos) const
+	const Word& GetWord(size_t pos) const
 		{
-			return Phrase::GetFactorArray(pos);
+			return Phrase::GetWord(pos);
 		}
 	size_t GetSize() const 
 		{
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 9edbab89f..813c6aef2 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -1,4 +1,5 @@
 // $Id$
+// vim:tabstop=2
 
 /***********************************************************************
 Moses - factored phrase-based language decoder
@@ -42,6 +43,22 @@ using namespace std;
 
 extern Timer timer;
 
+static size_t CalcMax(size_t x, const vector<size_t>& y) {
+  size_t max = x;
+  for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i)
+    if (*i > max) max = *i;
+  return max;
+}
+
+static size_t CalcMax(size_t x, const vector<size_t>& y, const vector<size_t>& z) {
+  size_t max = x;
+  for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i)
+    if (*i > max) max = *i;
+  for (vector<size_t>::const_iterator i=z.begin(); i != z.end(); ++i)
+    if (*i > max) max = *i;
+  return max;
+}
+
 StaticData* StaticData::s_instance(0);
 
 StaticData::StaticData()
@@ -57,6 +74,9 @@ StaticData::StaticData()
 ,m_computeLMBackoffStats(false)
 ,m_factorDelimiter("|") // default delimiter between factors
 {
+  m_maxFactorIdx[0] = 0;  // source side
+  m_maxFactorIdx[1] = 0;  // target side
+
 	s_instance = this;
 
 	// memory pools
@@ -300,9 +320,6 @@ bool StaticData::LoadParameters(int argc, char* argv[])
 		
 
 	  // initialize n-gram order for each factor. populated only by factored lm
-	  for(size_t i=0; i < MAX_NUM_FACTORS ; i++)
-	  	m_maxNgramOrderForFactor[i] = 0;
-	  
 		const vector<string> &lmVector = m_parameter.GetParam("lmodel-file");
 
 		for(size_t i=0; i<lmVector.size(); i++) 
@@ -360,6 +377,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
 			bool oldFormat = (token.size() == 3);
 			vector<FactorType> 	input		= Tokenize<FactorType>(token[0], ",")
 													,output	= Tokenize<FactorType>(token[1], ",");
+      m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], input, output);
 			string							filePath;
 			size_t							numFeatures = 1;
 			if (oldFormat)
@@ -536,6 +554,9 @@ void StaticData::LoadPhraseTables(bool filter
 			//characteristics of the phrase table
 			vector<FactorType> 	input		= Tokenize<FactorType>(token[0], ",")
 													,output	= Tokenize<FactorType>(token[1], ",");
+			m_maxFactorIdx[0] = CalcMax(m_maxFactorIdx[0], input);
+			m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], output);
+      m_maxNumFactors = std::max(m_maxFactorIdx[0], m_maxFactorIdx[1]) + 1;
 			string							filePath= token[3];
 			size_t							noScoreComponent	= Scan<size_t>(token[2]);
 			// weights for this phrase dictionary
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index bcd59ca11..38cf1a525 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -80,7 +80,6 @@ protected:
 	std::vector<std::string>		m_mySQLParam;
 	InputOutput									*m_inputOutput;
 	bool                        m_fLMsLoaded, m_labeledNBestList;
-	size_t											m_maxNgramOrderForFactor[MAX_NUM_FACTORS];
 	/***
 	 * false = treat unknown words as unknowns, and translate them as themselves;
 	 * true = drop (ignore) them
@@ -88,6 +87,7 @@ protected:
 	bool m_dropUnknown;
 	bool m_wordDeletionEnabled;
 
+
 	int m_inputType;
 	unsigned m_numInputScores;
 
@@ -102,7 +102,9 @@ protected:
 	bool m_computeLMBackoffStats;
 
 	mutable std::auto_ptr<SentenceStats> m_sentenceStats;
-	std::string m_factorDelimiter;
+	std::string m_factorDelimiter; //! by default, |, but it can be changed
+	size_t m_maxFactorIdx[2];  //! number of factors on source and target side
+	size_t m_maxNumFactors;  //! max number of factors on both source and target sides
 
 public:
 	StaticData();
@@ -299,4 +301,6 @@ public:
 	bool UseDistortionFutureCosts() const {return m_useDistortionFutureCosts;}
 	bool OnlyDistinctNBest() const {return m_onlyDistinctNBest;}
 	const std::string& GetFactorDelimiter() const {return m_factorDelimiter;}
+	size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; }
+	size_t GetMaxNumFactors() const { return m_maxNumFactors; }
 };
diff --git a/moses/src/TargetPhrase.cpp b/moses/src/TargetPhrase.cpp
index 06fadff9b..ff1e2604d 100644
--- a/moses/src/TargetPhrase.cpp
+++ b/moses/src/TargetPhrase.cpp
@@ -114,9 +114,9 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
 	const size_t len = GetSize();
 	for (size_t currPos = 0 ; currPos < len ; currPos++)
 	{
-		const FactorArray &inputWord	= inputPhrase.GetFactorArray(currPos);
-		FactorArray &cloneWord = clone->GetFactorArray(currPos);
-		Word::Merge(cloneWord, inputWord);
+		const Word &inputWord	= inputPhrase.GetWord(currPos);
+		Word &cloneWord = clone->GetWord(currPos);
+		cloneWord.Merge(inputWord);
 		
 		currWord++;
 	}
diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp
index 64c971f5f..8a1bd9508 100644
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@@ -332,7 +332,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
 
 
 /** special handling of unknown words: add special translation (or drop) */
-void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourceWord,
+void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,
 																														size_t sourcePos
 																												, FactorCollection &factorCollection)
 {
@@ -356,7 +356,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourc
 		{
 			// add to dictionary
 			TargetPhrase targetPhrase(Output);
-			FactorArray &targetWord = targetPhrase.AddWord();
+			Word &targetWord = targetPhrase.AddWord();
 						
 			for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
 			{
diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h
index d8f9ef4db..141e8d6cf 100755
--- a/moses/src/TranslationOptionCollection.h
+++ b/moses/src/TranslationOptionCollection.h
@@ -36,6 +36,7 @@ class GenerationDictionary;
 class InputType;
 class LMList;
 class FactorMask;
+class Word;
 
 typedef std::vector<const TranslationOption*> TranslationOptionList;
 
@@ -70,7 +71,7 @@ protected:
 															, size_t startPos, size_t endPos, bool observeTableLimit );
 
 	void ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection);
-	virtual void ProcessOneUnknownWord(const FactorArray &sourceWord
+	virtual void ProcessOneUnknownWord(const Word &sourceWord
 																		 , size_t sourcePos
 																		 , FactorCollection &factorCollection);
 
diff --git a/moses/src/TranslationOptionCollectionConfusionNet.cpp b/moses/src/TranslationOptionCollectionConfusionNet.cpp
index d1e506935..035212d84 100644
--- a/moses/src/TranslationOptionCollectionConfusionNet.cpp
+++ b/moses/src/TranslationOptionCollectionConfusionNet.cpp
@@ -19,7 +19,7 @@ ProcessUnknownWord(		size_t sourcePos
 
 	ConfusionNet::Column const& coll=source.GetColumn(sourcePos);
 	for(ConfusionNet::Column::const_iterator i=coll.begin();i!=coll.end();++i)
-		ProcessOneUnknownWord(i->first.GetFactorArray(),sourcePos,factorCollection);
+		ProcessOneUnknownWord(i->first,sourcePos,factorCollection);
 		
 }
 
diff --git a/moses/src/TranslationOptionCollectionText.cpp b/moses/src/TranslationOptionCollectionText.cpp
index b456239a2..037c451e2 100644
--- a/moses/src/TranslationOptionCollectionText.cpp
+++ b/moses/src/TranslationOptionCollectionText.cpp
@@ -36,6 +36,6 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const
 void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos
 												, FactorCollection &factorCollection)
 {
-	const FactorArray &sourceWord = m_source.GetFactorArray(sourcePos);
+	const Word &sourceWord = m_source.GetWord(sourcePos);
 	ProcessOneUnknownWord(sourceWord,sourcePos,factorCollection);
 }
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index a896ded97..b67724369 100755
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -81,8 +81,8 @@ const size_t MAX_NUM_FACTORS = 4;
 
 enum FactorDirection
 {	
-	Input,
-	Output
+	Input,			//! Source factors
+	Output			//! Target factors
 };
 
 enum DecodeType
@@ -144,5 +144,3 @@ enum DictionaryFind
 // typedef
 typedef size_t FactorType;
 
-class Factor;
-typedef const Factor * FactorArray[MAX_NUM_FACTORS];
diff --git a/moses/src/Word.cpp b/moses/src/Word.cpp
index d28542fe7..5c8384316 100755
--- a/moses/src/Word.cpp
+++ b/moses/src/Word.cpp
@@ -1,4 +1,5 @@
 // $Id$
+// vim::tabstop=2
 
 /***********************************************************************
 Moses - factored phrase-based language decoder
@@ -28,25 +29,23 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 using namespace std;
 
 Word::Word(const Word &copy)
-:FactorArrayWrapper()
+#ifdef DYNAMIC_FACTOR_ARRAY
+: m_factorArray(copy.m_factorArray)
+#endif
 { // deep copy
-	m_factorArrayPtr = &m_factorArray;
-	Word::Copy(m_factorArray, copy.m_factorArray);
+#ifndef DYNAMIC_FACTOR_ARRAY
+	memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray));
+#endif
 }
 
 Word::Word()
+#ifdef DYNAMIC_FACTOR_ARRAY
+: m_factorArray(MAX_NUM_FACTORS, 0)
+#endif
 {
-	m_factorArrayPtr = &m_factorArray;
-	Word::Initialize(m_factorArray);
-}
-
-Word::Word(const FactorArray &factorArray)
-{
-	m_factorArrayPtr = &m_factorArray;
-	for (size_t factor = 0 ; factor < MAX_NUM_FACTORS ; factor++)
-	{
-		m_factorArray[factor] = factorArray[factor];
-	}
+#ifndef DYNAMIC_FACTOR_ARRAY
+	memset(m_factorArray, 0, sizeof(FactorArray));
+#endif
 }
 
 Word::~Word()
@@ -54,7 +53,7 @@ Word::~Word()
 }
 
 // static
-int Word::Compare(const FactorArray &targetWord, const FactorArray &sourceWord)
+int Word::Compare(const Word &targetWord, const Word &sourceWord)
 {
 	for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++)
 	{
@@ -73,51 +72,20 @@ int Word::Compare(const FactorArray &targetWord, const FactorArray &sourceWord)
 
 }
 
-void Word::Copy(FactorArray &target, const FactorArray &source)
-{
-	memcpy(target, source, sizeof(FactorArray));
-}
-
-void Word::Initialize(FactorArray &factorArray)
-{
-	memset(factorArray, 0, sizeof(FactorArray));
-}
-
-void Word::Merge(FactorArray &targetWord, const FactorArray &sourceWord)
+void Word::Merge(const Word &sourceWord)
 {
 	for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
 	{
-		const Factor *sourcefactor		= sourceWord[currFactor]
-								,*targetFactor			= targetWord[currFactor];
+		const Factor *sourcefactor		= sourceWord.m_factorArray[currFactor]
+								,*targetFactor		= this     ->m_factorArray[currFactor];
 		if (targetFactor == NULL && sourcefactor != NULL)
 		{
-			targetWord[currFactor] = sourcefactor;
-		}
-	}
-}
-
-std::string Word::ToString(const FactorArray &factorArray)
-{
-	stringstream strme;
-
-	const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
-	bool firstPass = true;
-	// TODO- don't loop over MAX_NUM_FACTORS here, just use the ones that
-	// actually participate in the xltn process.
-	for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
-	{
-		const Factor *factor = factorArray[currFactor];
-		if (factor != NULL)
-		{
-			if (firstPass) { firstPass = false; } else { strme << factorDelimiter; }
-			strme << *factor;
+			m_factorArray[currFactor] = sourcefactor;
 		}
 	}
-	strme << " ";
-	return strme.str();
 }
 
-std::string Word::ToString(const vector<FactorType> factorType, const FactorArray &factorArray)
+std::string Word::ToString(const vector<FactorType> factorType) const
 {
 	stringstream strme;
 	assert(factorType.size() <= MAX_NUM_FACTORS);
@@ -125,7 +93,7 @@ std::string Word::ToString(const vector<FactorType> factorType, const FactorArra
 	bool firstPass = true;
 	for (unsigned int i = 0 ; i < factorType.size() ; i++)
 	{
-		const Factor *factor = factorArray[factorType[i]];
+		const Factor *factor = m_factorArray[factorType[i]];
 		if (factor != NULL)
 		{
 			if (firstPass) { firstPass = false; } else { strme << factorDelimiter; }
diff --git a/moses/src/Word.h b/moses/src/Word.h
index 0bfb0c106..5cbf2bbcf 100755
--- a/moses/src/Word.h
+++ b/moses/src/Word.h
@@ -27,59 +27,68 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "TypeDef.h"
 #include "Factor.h"
 #include "Util.h"
-#include "FactorArrayWrapper.h"
+
+#undef DYNAMIC_FACTOR_ARRAY
 
 class Phrase;
 
 /***
  * hold a set of factors for a single word
- * 
- * TODO either replace all uses of FactorArray with Word or vice versa; don't only use the wrapper in half of cases!
  */
-class Word : public FactorArrayWrapper
+class Word
 {
 	friend std::ostream& operator<<(std::ostream&, const Word&);
 
 protected:
-	FactorArray					m_factorArray;
+
+#ifndef DYNAMIC_FACTOR_ARRAY
+	typedef const Factor * FactorArray[MAX_NUM_FACTORS];
+#else
+	typedef std::vector<const Factor*> FactorArray;
+#endif
+
+	FactorArray m_factorArray;
 
 public:
 	/**
 	 * deep copy
 	 */
 	Word(const Word &copy);
-	Word(const FactorArray &factorArray);
 	Word();
 
 	~Word();
+	const Factor*& operator[](FactorType index) {
+		return m_factorArray[index];
+	}
 
-	// why is this needed ? it should be inherited
-	const FactorArray &GetFactorArray() const
-	{
-		return m_factorArray;
+	const Factor * const & operator[](FactorType index) const {
+		return m_factorArray[index];
 	}
-	inline FactorArray &GetFactorArray()
-	{
-		return m_factorArray;
+	inline const Factor* GetFactor(FactorType factorType) const {
+		return m_factorArray[factorType];
 	}
 	inline void SetFactor(FactorType factorType, const Factor *factor)
 	{
 		m_factorArray[factorType] = factor;
 	}
 
+	void Merge(const Word &sourceWord);
+
+	std::string ToString(const std::vector<FactorType> factorType) const;
 	TO_STRING;
 
 	/* static functions */
 	
-	// FactorArray
-	static void Copy(FactorArray &target, const FactorArray &source);
-	static void Initialize(FactorArray &factorArray);
-
 	/***
 	 * wherever the source word has a given factor that the target word is missing, add it to the target word
 	 */
-	static void Merge(FactorArray &targetWord, const FactorArray &sourceWord);
-	static std::string ToString(const FactorArray &factorArray);
-	static std::string ToString(const std::vector<FactorType> factorType, const FactorArray &factorArray);
-	static int Compare(const FactorArray &targetWord, const FactorArray &sourceWord);
+	static int Compare(const Word &targetWord, const Word &sourceWord);
+
+        inline bool operator< (const Word &compare) const
+        { // needed to store word in GenerationDictionary map
+                // uses comparison of FactorKey
+                // 'proper' comparison, not address/id comparison
+                return Compare(*this, compare) < 0;
+        }
+
 };
author	redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>	2006-09-26 01:35:10 +0400
committer	redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>	2006-09-26 01:35:10 +0400
commit	441b147b3531bbefb53d9290dbf7595206e9a423 (patch)
tree	fe727a99134e7008e92ce90c9489cc6c1076226a /moses
parent	27fe63219404d339ebd406da2cbee1703e84ae8e (diff)