Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2006-09-26 01:35:10 +0400
committerredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2006-09-26 01:35:10 +0400
commit441b147b3531bbefb53d9290dbf7595206e9a423 (patch)
treefe727a99134e7008e92ce90c9489cc6c1076226a /moses
parent27fe63219404d339ebd406da2cbee1703e84ae8e (diff)
Get rid of FactorArrayWrapper/FactorArray and use only Word. Memory pool is currently disabled, but the net cleanup resulted in better performance despite this (there are fewer copies of FactorArray -> Word now).
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@827 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r--moses/src/ConfusionNet.cpp4
-rw-r--r--moses/src/ConfusionNet.h2
-rw-r--r--moses/src/DecodeStepGeneration.cpp4
-rw-r--r--moses/src/FactorArrayWrapper.cpp62
-rw-r--r--moses/src/FactorArrayWrapper.h82
-rwxr-xr-xmoses/src/GenerationDictionary.cpp13
-rwxr-xr-xmoses/src/GenerationDictionary.h8
-rwxr-xr-xmoses/src/Hypothesis.cpp22
-rwxr-xr-xmoses/src/Hypothesis.h19
-rw-r--r--moses/src/InputType.h2
-rwxr-xr-xmoses/src/LanguageModel.cpp13
-rwxr-xr-xmoses/src/LanguageModel.h10
-rw-r--r--moses/src/LanguageModelChunking.h22
-rwxr-xr-xmoses/src/LanguageModelIRST.cpp16
-rwxr-xr-xmoses/src/LanguageModelIRST.h2
-rw-r--r--moses/src/LanguageModelJoint.h16
-rw-r--r--moses/src/LanguageModelMultiFactor.cpp4
-rwxr-xr-xmoses/src/LanguageModelSRI.cpp8
-rwxr-xr-xmoses/src/LanguageModelSRI.h2
-rw-r--r--moses/src/Makefile.am1
-rw-r--r--moses/src/PDTAimp.h10
-rwxr-xr-xmoses/src/Phrase.cpp94
-rwxr-xr-xmoses/src/Phrase.h29
-rwxr-xr-xmoses/src/PhraseDictionary.cpp4
-rwxr-xr-xmoses/src/Sentence.h4
-rwxr-xr-xmoses/src/StaticData.cpp27
-rwxr-xr-xmoses/src/StaticData.h8
-rw-r--r--moses/src/TargetPhrase.cpp6
-rw-r--r--moses/src/TranslationOptionCollection.cpp4
-rwxr-xr-xmoses/src/TranslationOptionCollection.h3
-rw-r--r--moses/src/TranslationOptionCollectionConfusionNet.cpp2
-rw-r--r--moses/src/TranslationOptionCollectionText.cpp2
-rwxr-xr-xmoses/src/TypeDef.h6
-rwxr-xr-xmoses/src/Word.cpp72
-rwxr-xr-xmoses/src/Word.h51
35 files changed, 232 insertions, 402 deletions
diff --git a/moses/src/ConfusionNet.cpp b/moses/src/ConfusionNet.cpp
index 0b29cc35d..aa5062a10 100644
--- a/moses/src/ConfusionNet.cpp
+++ b/moses/src/ConfusionNet.cpp
@@ -55,7 +55,7 @@ ConfusionNet::ConfusionNet(Sentence const& s)
{
data.resize(s.GetSize());
for(size_t i=0;i<s.GetSize();++i)
- data[i].push_back(std::make_pair(Word(s.GetFactorArray(i)),0.0));
+ data[i].push_back(std::make_pair(s.GetWord(i),0.0));
}
@@ -185,7 +185,7 @@ std::string ConfusionNet::GetStringRep(const vector<FactorType> factorsToPrint)
return "";
}
#pragma warning(disable:4716)
-const FactorArray& ConfusionNet::GetFactorArray(size_t) const {
+const Word& ConfusionNet::GetWord(size_t) const {
std::cerr<<"ERROR: call to ConfusionNet::GetFactorArray\n";
abort();
}
diff --git a/moses/src/ConfusionNet.h b/moses/src/ConfusionNet.h
index 2e5aaa673..03101e829 100644
--- a/moses/src/ConfusionNet.h
+++ b/moses/src/ConfusionNet.h
@@ -41,7 +41,7 @@ class ConfusionNet : public InputType {
Phrase GetSubString(const WordsRange&) const; //TODO not defined
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
- const FactorArray& GetFactorArray(size_t pos) const;
+ const Word& GetWord(size_t pos) const;
TargetPhraseCollection const* CreateTargetPhraseCollection(PhraseDictionaryBase const& d,const WordsRange& r) const;
diff --git a/moses/src/DecodeStepGeneration.cpp b/moses/src/DecodeStepGeneration.cpp
index 0f9bb5f4d..998e6dc98 100644
--- a/moses/src/DecodeStepGeneration.cpp
+++ b/moses/src/DecodeStepGeneration.cpp
@@ -107,10 +107,10 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
{
// generatable factors for this word to be put in wordList
WordList &wordList = wordListVector[wordListVectorPos];
- const FactorArray &factorArray = targetPhrase.GetFactorArray(currPos);
+ const Word &word = targetPhrase.GetWord(currPos);
// consult dictionary for possible generations for this word
- const OutputWordCollection *wordColl = generationDictionary.FindWord(factorArray);
+ const OutputWordCollection *wordColl = generationDictionary.FindWord(word);
if (wordColl == NULL)
{ // word not found in generation dictionary
diff --git a/moses/src/FactorArrayWrapper.cpp b/moses/src/FactorArrayWrapper.cpp
deleted file mode 100644
index f205a4a2c..000000000
--- a/moses/src/FactorArrayWrapper.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include "FactorArrayWrapper.h"
-#include "Util.h"
-#include "Word.h"
-
-using namespace std;
-
-FactorArrayWrapper::~FactorArrayWrapper() {}
-
-int FactorArrayWrapper::Compare(const FactorArrayWrapper &compare) const
-{
- return Compare(GetFactorArray(), compare.GetFactorArray());
-}
-
-// static functions
-int FactorArrayWrapper::Compare(const FactorArray &targetWord, const FactorArray &sourceWord)
-{
- for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++)
- {
- const Factor *targetFactor = targetWord[factorType]
- ,*sourceFactor = sourceWord[factorType];
-
- if (targetFactor == NULL || sourceFactor == NULL)
- {
- continue;
- }
- int result = targetFactor->Compare(*sourceFactor);
- if ( result )
- return result;
- }
- return 0;
-
-}
-
-TO_STRING_BODY(FactorArrayWrapper);
-
-// friend
-ostream& operator<<(ostream& out, const FactorArrayWrapper& wrapper)
-{
- out << Word::ToString(*wrapper.m_factorArrayPtr);
- return out;
-}
diff --git a/moses/src/FactorArrayWrapper.h b/moses/src/FactorArrayWrapper.h
deleted file mode 100644
index 740dfbdeb..000000000
--- a/moses/src/FactorArrayWrapper.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// $Id$
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#pragma once
-
-#include <iostream>
-#include "TypeDef.h"
-#include "Factor.h"
-
-class FactorArrayWrapper
-{
- friend std::ostream& operator<<(std::ostream&, const FactorArrayWrapper&);
-
-protected:
- const FactorArray *m_factorArrayPtr;
-public:
- FactorArrayWrapper() {}
- FactorArrayWrapper(const FactorArray &factorArray)
- :m_factorArrayPtr(&factorArray) {}
- virtual ~FactorArrayWrapper();
-
- FactorArrayWrapper& operator=(const FactorArrayWrapper &other)
- {
- if(this != &other)
- {
- m_factorArrayPtr = other.m_factorArrayPtr;
- }
- return *this;
- }
-
- const Factor *operator[](size_t index) const
- {
- return (*m_factorArrayPtr)[index];
- }
-
- virtual const FactorArray &GetFactorArray() const
- {
- return *m_factorArrayPtr;
- }
-
- inline const Factor *GetFactor(FactorType factorType) const
- {
- return (*m_factorArrayPtr)[factorType];
- }
-
- int Compare(const FactorArrayWrapper &compare) const;
- // -1 = less than
- // +1 = more than
- // 0 = same
-
- inline bool operator< (const FactorArrayWrapper &compare) const
- { // needed to store word in GenerationDictionary map
- // uses comparison of FactorKey
- // 'proper' comparison, not address/id comparison
- return Compare(compare) < 0;
- }
-
- TO_STRING;
-
- //statics
- static int Compare(const FactorArray &targetWord, const FactorArray &sourceWord);
-
-};
-
diff --git a/moses/src/GenerationDictionary.cpp b/moses/src/GenerationDictionary.cpp
index f5fc85691..4e91fdbfd 100755
--- a/moses/src/GenerationDictionary.cpp
+++ b/moses/src/GenerationDictionary.cpp
@@ -62,7 +62,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
exit(1);
}
- m_filename = filePath;
+ m_filename = filePath;
string line;
size_t lineNum = 0;
while(getline(inFile, line))
@@ -71,7 +71,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
vector<string> token = Tokenize( line );
// add each line in generation file into class
- Word *inputWord = new Word();
+ Word *inputWord = new Word(); // deleted in destructor
Word outputWord;
// create word with certain factors filled out
@@ -113,7 +113,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
GenerationDictionary::~GenerationDictionary()
{
- std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer>::const_iterator iter;
+ std::map<const Word* , OutputWordCollection, WordComparer>::const_iterator iter;
for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter)
{
delete iter->first;
@@ -130,13 +130,12 @@ const std::string GenerationDictionary::GetScoreProducerDescription() const
return "Generation score, file=" + m_filename;
}
-const OutputWordCollection *GenerationDictionary::FindWord(const FactorArray &factorArray) const
+const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) const
{
const OutputWordCollection *ret;
- FactorArrayWrapper wrapper(factorArray);
- std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer>::const_iterator
- iter = m_collection.find(&wrapper);
+ std::map<const Word* , OutputWordCollection, WordComparer>::const_iterator
+ iter = m_collection.find(&word);
if (iter == m_collection.end())
{ // can't find source phrase
ret = NULL;
diff --git a/moses/src/GenerationDictionary.h b/moses/src/GenerationDictionary.h
index 9248fdd3c..d97f5d679 100755
--- a/moses/src/GenerationDictionary.h
+++ b/moses/src/GenerationDictionary.h
@@ -31,10 +31,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
class FactorCollection;
-struct FactorArrayWrapperComparer
+struct WordComparer
{
//! returns true if hypoA can be recombined with hypoB
- bool operator()(const FactorArrayWrapper *a, const FactorArrayWrapper *b) const
+ bool operator()(const Word *a, const Word *b) const
{
return *a < *b;
}
@@ -47,7 +47,7 @@ typedef std::map < Word , ScoreComponentCollection2 > OutputWordCollection;
class GenerationDictionary : public Dictionary, public ScoreProducer
{
protected:
- std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer> m_collection;
+ std::map<const Word* , OutputWordCollection, WordComparer> m_collection;
// 1st = source
// 2nd = target
std::string m_filename;
@@ -75,6 +75,6 @@ public:
{
return m_collection.size();
}
- const OutputWordCollection *FindWord(const FactorArray &factorArray) const;
+ const OutputWordCollection *FindWord(const Word &word) const;
};
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
index c1133dd4c..0365fa61d 100755
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@@ -214,14 +214,14 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
(*_lmstats)[lmIdx].resize(m_currTargetWordsRange.GetWordsCount(), 0);
// 1st n-gram
- vector<FactorArrayWrapper> contextFactor(nGramOrder);
+ vector<const Word*> contextFactor(nGramOrder);
size_t index = 0;
for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++)
{
if (currPos >= 0)
- contextFactor[index++] = GetFactorArray(currPos);
+ contextFactor[index++] = &GetWord(currPos);
else
- contextFactor[index++] = languageModel.GetSentenceStartArray();
+ contextFactor[index++] = &languageModel.GetSentenceStartArray();
}
lmScore = languageModel.GetValue(contextFactor);
if (_lmstats) { languageModel.GetState(contextFactor, &(*_lmstats)[lmIdx][nLmCallCount++]); }
@@ -237,7 +237,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
contextFactor[i] = contextFactor[i + 1];
// add last factor
- contextFactor.back() = GetFactorArray(currPos);
+ contextFactor.back() = &GetWord(currPos);
lmScore += languageModel.GetValue(contextFactor);
if (_lmstats)
@@ -249,15 +249,15 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
if (m_sourceCompleted.IsComplete())
{
const size_t size = GetSize();
- contextFactor.back() = languageModel.GetSentenceEndArray();
+ contextFactor.back() = &languageModel.GetSentenceEndArray();
for (size_t i = 0 ; i < nGramOrder - 1 ; i ++)
{
int currPos = size - nGramOrder + i + 1;
if (currPos < 0)
- contextFactor[i] = languageModel.GetSentenceStartArray();
+ contextFactor[i] = &languageModel.GetSentenceStartArray();
else
- contextFactor[i] = GetFactorArray((size_t)currPos);
+ contextFactor[i] = &GetWord((size_t)currPos);
}
if (_lmstats) {
(*_lmstats)[lmIdx].resize((*_lmstats)[lmIdx].size() + 1); // extra space for the last call
@@ -268,7 +268,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
for (size_t currPos = endPos+1; currPos <= currEndPos; currPos++) {
for (size_t i = 0 ; i < nGramOrder - 1 ; i++)
contextFactor[i] = contextFactor[i + 1];
- contextFactor.back() = GetFactorArray(currPos);
+ contextFactor.back() = &GetWord(currPos);
if (_lmstats)
languageModel.GetState(contextFactor, &(*_lmstats)[lmIdx][nLmCallCount++]);
}
@@ -437,7 +437,8 @@ std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factor
std::string Hypothesis::GetSourcePhraseStringRep() const
{
vector<FactorType> allFactors;
- for(size_t i=0; i < MAX_NUM_FACTORS; i++)
+ const size_t maxSourceFactors = StaticData::Instance()->GetMaxNumFactors(Input);
+ for(size_t i=0; i < maxSourceFactors; i++)
{
allFactors.push_back(i);
}
@@ -446,7 +447,8 @@ std::string Hypothesis::GetSourcePhraseStringRep() const
std::string Hypothesis::GetTargetPhraseStringRep() const
{
vector<FactorType> allFactors;
- for(size_t i=0; i < MAX_NUM_FACTORS; i++)
+ const size_t maxTargetFactors = StaticData::Instance()->GetMaxNumFactors(Output);
+ for(size_t i=0; i < maxTargetFactors; i++)
{
allFactors.push_back(i);
}
diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h
index 26694ad4d..56bc1a0fe 100755
--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@@ -1,4 +1,5 @@
// $Id$
+// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@@ -167,19 +168,19 @@ public:
std::string GetSourcePhraseStringRep() const;
std::string GetTargetPhraseStringRep() const;
- // curr - pos is relative from CURRENT hypothesis's starting ind ex
- // (ie, start of sentence would be some negative number, which is
- // not allowed- USE WITH CAUTION)
- inline const FactorArray &GetCurrFactorArray(size_t pos) const
+ /** curr - pos is relative from CURRENT hypothesis's starting index
+ * (ie, start of sentence would be some negative number, which is
+ * not allowed- USE WITH CAUTION) */
+ inline const Word &GetCurrWord(size_t pos) const
{
- return m_targetPhrase.GetFactorArray(pos);
+ return m_targetPhrase.GetWord(pos);
}
inline const Factor *GetCurrFactor(size_t pos, FactorType factorType) const
{
return m_targetPhrase.GetFactor(pos, factorType);
}
- // recursive - pos is relative from start of sentence
- inline const FactorArray &GetFactorArray(size_t pos) const
+ /** recursive - pos is relative from start of sentence */
+ inline const Word &GetWord(size_t pos) const
{
const Hypothesis *hypo = this;
while (pos < hypo->GetCurrTargetWordsRange().GetStartPos())
@@ -187,11 +188,11 @@ public:
hypo = hypo->GetPrevHypo();
assert(hypo != NULL);
}
- return hypo->GetCurrFactorArray(pos - hypo->GetCurrTargetWordsRange().GetStartPos());
+ return hypo->GetCurrWord(pos - hypo->GetCurrTargetWordsRange().GetStartPos());
}
inline const Factor* GetFactor(size_t pos, FactorType factorType) const
{
- return GetFactorArray(pos)[factorType];
+ return GetWord(pos)[factorType];
}
/***
diff --git a/moses/src/InputType.h b/moses/src/InputType.h
index 6681caa3e..0e3f4f8a5 100644
--- a/moses/src/InputType.h
+++ b/moses/src/InputType.h
@@ -42,7 +42,7 @@ protected:
virtual Phrase GetSubString(const WordsRange&) const =0;
// virtual std::string GetStringRep(const WordsRange&) const=0;
- virtual const FactorArray& GetFactorArray(size_t pos) const=0;
+ virtual const Word& GetWord(size_t pos) const=0;
TO_STRING;
diff --git a/moses/src/LanguageModel.cpp b/moses/src/LanguageModel.cpp
index 9398e2513..01336700d 100755
--- a/moses/src/LanguageModel.cpp
+++ b/moses/src/LanguageModel.cpp
@@ -39,9 +39,6 @@ LanguageModel::LanguageModel(bool registerScore)
{
if (registerScore)
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
-
- Word::Initialize(m_sentenceStartArray);
- Word::Initialize(m_sentenceEndArray);
}
LanguageModel::~LanguageModel() {}
@@ -59,19 +56,19 @@ void LanguageModel::CalcScore(const Phrase &phrase
ngramScore = 0;
size_t phraseSize = phrase.GetSize();
- vector<FactorArrayWrapper> contextFactor;
+ vector<const Word*> contextFactor;
contextFactor.reserve(m_nGramOrder);
// start of sentence
for (size_t currPos = 0 ; currPos < m_nGramOrder - 1 && currPos < phraseSize ; currPos++)
{
- contextFactor.push_back(phrase.GetFactorArray(currPos));
+ contextFactor.push_back(&phrase.GetWord(currPos));
fullScore += GetValue(contextFactor);
}
if (phraseSize >= m_nGramOrder)
{
- contextFactor.push_back(phrase.GetFactorArray(m_nGramOrder - 1));
+ contextFactor.push_back(&phrase.GetWord(m_nGramOrder - 1));
ngramScore = GetValue(contextFactor);
}
@@ -82,14 +79,14 @@ void LanguageModel::CalcScore(const Phrase &phrase
{
contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];
}
- contextFactor[m_nGramOrder - 1] = phrase.GetFactorArray(currPos);
+ contextFactor[m_nGramOrder - 1] = &phrase.GetWord(currPos);
float partScore = GetValue(contextFactor);
ngramScore += partScore;
}
fullScore += ngramScore;
}
-LanguageModel::State LanguageModel::GetState(const std::vector<FactorArrayWrapper> &contextFactor, unsigned int* len) const
+LanguageModel::State LanguageModel::GetState(const std::vector<const Word*> &contextFactor, unsigned int* len) const
{
State state;
unsigned int dummy;
diff --git a/moses/src/LanguageModel.h b/moses/src/LanguageModel.h
index ad3d48bf4..dd21c133d 100755
--- a/moses/src/LanguageModel.h
+++ b/moses/src/LanguageModel.h
@@ -39,7 +39,7 @@ protected:
float m_weight;
std::string m_filename;
size_t m_nGramOrder;
- FactorArray m_sentenceStartArray, m_sentenceEndArray;
+ Word m_sentenceStartArray, m_sentenceEndArray;
LanguageModel(bool registerScore);
@@ -59,19 +59,19 @@ public:
void CalcScore(const Phrase &phrase
, float &fullScore
, float &ngramScore) const;
- virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = 0, unsigned int* len = 0) const = 0;
+ virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const = 0;
- State GetState(const std::vector<FactorArrayWrapper> &contextFactor, unsigned int* len = 0) const;
+ State GetState(const std::vector<const Word*> &contextFactor, unsigned int* len = 0) const;
size_t GetNGramOrder() const
{
return m_nGramOrder;
}
- const FactorArray &GetSentenceStartArray() const
+ const Word &GetSentenceStartArray() const
{
return m_sentenceStartArray;
}
- const FactorArray &GetSentenceEndArray() const
+ const Word &GetSentenceEndArray() const
{
return m_sentenceEndArray;
}
diff --git a/moses/src/LanguageModelChunking.h b/moses/src/LanguageModelChunking.h
index 3c05f2aa4..fd1c0c224 100644
--- a/moses/src/LanguageModelChunking.h
+++ b/moses/src/LanguageModelChunking.h
@@ -64,7 +64,7 @@ public:
m_lmImpl->Load(fileName, factorCollection, m_factorType, weight, nGramOrder);
}
- float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
+ float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
{
if (contextFactor.size() == 0)
{
@@ -76,30 +76,30 @@ public:
TRACE_ERR(std::endl);
*/
// only process context where last word is a word we want
- const Factor *factor = contextFactor.back()[m_factorType];
+ const Factor *factor = (*contextFactor.back())[m_factorType];
std::string strWord = factor->GetString();
if (strWord.find("???") == 0)
return 0;
// add last word
- std::vector<FactorArrayWrapper> chunkContext;
- Word chunkWord;
- chunkWord.SetFactor(m_factorType, factor);
+ std::vector<const Word*> chunkContext;
+ Word* chunkWord = new Word;
+ chunkWord->SetFactor(m_factorType, factor);
chunkContext.push_back(chunkWord);
// create context in reverse 'cos we skip words we don't want
for (int currPos = (int)contextFactor.size() - 2 ; currPos >= 0 && chunkContext.size() < m_realNGramOrder ; --currPos )
{
- const FactorArrayWrapper &factorArray = contextFactor[currPos];
- factor = factorArray[m_factorType];
+ const Word &word = *contextFactor[currPos];
+ factor = word[m_factorType];
std::string strWord = factor->GetString();
bool skip = strWord.find("???") == 0;
if (skip)
continue;
// add word to chunked context
- Word chunkWord;
- chunkWord.SetFactor(m_factorType, factor);
+ Word* chunkWord = new Word;
+ chunkWord->SetFactor(m_factorType, factor);
chunkContext.push_back(chunkWord);
}
@@ -112,7 +112,9 @@ public:
*/
// calc score on chunked phrase
float ret = m_lmImpl->GetValue(chunkContext, finalState, len);
-
+
+ RemoveAllInColl(chunkContext);
+
return ret;
}
};
diff --git a/moses/src/LanguageModelIRST.cpp b/moses/src/LanguageModelIRST.cpp
index 3ef1a464b..517a592c1 100755
--- a/moses/src/LanguageModelIRST.cpp
+++ b/moses/src/LanguageModelIRST.cpp
@@ -128,7 +128,7 @@ int LanguageModelIRST::GetLmID( const std::string &str ) const
return m_lmtb->dict->encode( str.c_str() );
}
-float LanguageModelIRST::GetValue(const vector<FactorArrayWrapper> &contextFactor, State* finalState, unsigned int* len) const
+float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const
{
unsigned int dummy;
if (!len) { len = &dummy; }
@@ -137,24 +137,24 @@ float LanguageModelIRST::GetValue(const vector<FactorArrayWrapper> &contextFacto
// set up context
size_t count = contextFactor.size();
- m_lmtb_ng->size=0;
- if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
- if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);
+ m_lmtb_ng->size=0;
+ if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
+ if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);
for (size_t i = 0 ; i < count ; i++)
{
- int lmId = GetLmID(contextFactor[i][factorType]);
- m_lmtb_ng->pushc(lmId);
+ int lmId = GetLmID((*contextFactor[i])[factorType]);
+ m_lmtb_ng->pushc(lmId);
}
if (finalState){
- *finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);
+ *finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng);
// back off stats not currently available
*len = 0;
}
- return TransformIRSTScore(m_lmtb->clprob(*m_lmtb_ng));
+ return TransformIRSTScore(m_lmtb->clprob(*m_lmtb_ng));
}
diff --git a/moses/src/LanguageModelIRST.h b/moses/src/LanguageModelIRST.h
index 9c465c41e..d7c04092a 100755
--- a/moses/src/LanguageModelIRST.h
+++ b/moses/src/LanguageModelIRST.h
@@ -67,7 +67,7 @@ public:
, float weight
, size_t nGramOrder);
- virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
+ virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
const void CleanUpAfterSentenceProcessing();
const void InitializeBeforeSentenceProcessing();
diff --git a/moses/src/LanguageModelJoint.h b/moses/src/LanguageModelJoint.h
index 59696577d..5030ea5df 100644
--- a/moses/src/LanguageModelJoint.h
+++ b/moses/src/LanguageModelJoint.h
@@ -79,7 +79,7 @@ public:
m_lmImpl->Load(fileName, factorCollection, m_implFactor, weight, nGramOrder);
}
- float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
+ float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const
{
if (contextFactor.size() == 0)
{
@@ -92,29 +92,29 @@ public:
*/
// joint context for internal LM
- std::vector<FactorArrayWrapper> jointContext;
+ std::vector<const Word*> jointContext;
for (size_t currPos = 0 ; currPos < m_nGramOrder ; ++currPos )
{
- const FactorArrayWrapper &factorArray = contextFactor[currPos];
+ const Word &word = *contextFactor[currPos];
// add word to chunked context
std::stringstream stream("");
- const Factor *factor = factorArray[ m_factorTypesOrdered[0] ];
+ const Factor *factor = word[ m_factorTypesOrdered[0] ];
stream << factor->GetString();
for (size_t index = 1 ; index < m_factorTypesOrdered.size() ; ++index)
{
FactorType factorType = m_factorTypesOrdered[index];
- const Factor *factor = factorArray[factorType];
+ const Factor *factor = word[factorType];
stream << "|" << factor->GetString();
}
factor = m_factorCollection->AddFactor(Output, m_implFactor, stream.str());
- Word jointWord;
- jointWord.SetFactor(m_implFactor, factor);
+ Word* jointWord = new Word;
+ jointWord->SetFactor(m_implFactor, factor);
jointContext.push_back(jointWord);
}
@@ -125,6 +125,8 @@ public:
*/
// calc score on chunked phrase
float ret = m_lmImpl->GetValue(jointContext, finalState, len);
+
+ RemoveAllInColl(jointContext);
return ret;
}
diff --git a/moses/src/LanguageModelMultiFactor.cpp b/moses/src/LanguageModelMultiFactor.cpp
index 8dabb30a5..275e5a0e5 100644
--- a/moses/src/LanguageModelMultiFactor.cpp
+++ b/moses/src/LanguageModelMultiFactor.cpp
@@ -40,10 +40,10 @@ bool LanguageModelMultiFactor::Useable(const Phrase &phrase) const
return false;
// whether phrase contains all factors in this LM
- const FactorArray &factorArray = phrase.GetFactorArray(0);
+ const Word &word = phrase.GetWord(0);
for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; ++currFactor)
{
- if (m_factorTypes[currFactor] && factorArray[currFactor] == NULL)
+ if (m_factorTypes[currFactor] && word[currFactor] == NULL)
return false;
}
return true;
diff --git a/moses/src/LanguageModelSRI.cpp b/moses/src/LanguageModelSRI.cpp
index be59f420a..2d499b771 100755
--- a/moses/src/LanguageModelSRI.cpp
+++ b/moses/src/LanguageModelSRI.cpp
@@ -132,7 +132,7 @@ float LanguageModelSRI::GetValue(VocabIndex wordId, VocabIndex *context) const
return FloorSRIScore(TransformSRIScore(p)); // log10->log
}
-float LanguageModelSRI::GetValue(const vector<FactorArrayWrapper> &contextFactor, State* finalState, unsigned int *len) const
+float LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int *len) const
{
FactorType factorType = GetFactorType();
size_t count = contextFactor.size();
@@ -146,13 +146,13 @@ float LanguageModelSRI::GetValue(const vector<FactorArrayWrapper> &contextFactor
VocabIndex context[MAX_NGRAM_SIZE];
for (size_t i = 0 ; i < count - 1 ; i++)
{
- context[i] = GetLmID(contextFactor[count-2-i][factorType]);
+ context[i] = GetLmID((*contextFactor[count-2-i])[factorType]);
}
context[count-1] = Vocab_None;
- assert(contextFactor[count-1][factorType] != NULL);
+ assert((*contextFactor[count-1])[factorType] != NULL);
// call sri lm fn
- VocabIndex lmId= GetLmID(contextFactor[count-1][factorType]);
+ VocabIndex lmId= GetLmID((*contextFactor[count-1])[factorType]);
float ret = GetValue(lmId, context);
if (finalState) {
diff --git a/moses/src/LanguageModelSRI.h b/moses/src/LanguageModelSRI.h
index b43e4bc1f..b1a59f899 100755
--- a/moses/src/LanguageModelSRI.h
+++ b/moses/src/LanguageModelSRI.h
@@ -56,6 +56,6 @@ public:
, float weight
, size_t nGramOrder);
- virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
+ virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
};
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index 48bdb0d80..013dbdc34 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -9,7 +9,6 @@ libmoses_a_SOURCES_TMP = \
DistortionOrientation.cpp \
DummyScoreProducers.cpp \
Factor.cpp \
- FactorArrayWrapper.cpp \
FactorCollection.cpp \
FactorTypeSet.cpp \
GenerationDictionary.cpp \
diff --git a/moses/src/PDTAimp.h b/moses/src/PDTAimp.h
index cf27b687a..03bc2cafd 100644
--- a/moses/src/PDTAimp.h
+++ b/moses/src/PDTAimp.h
@@ -96,7 +96,7 @@ public:
}
- void Factors2String(FactorArray const& w,std::string& s) const
+ void Factors2String(Word const& w,std::string& s) const
{
for(size_t j=0;j<m_input.size();++j)
{
@@ -155,7 +155,7 @@ public:
std::vector<std::string> srcString(src.GetSize());
// convert source Phrase into vector of strings
for(size_t i=0;i<srcString.size();++i)
- Factors2String(src.GetFactorArray(i),srcString[i]);
+ Factors2String(src.GetWord(i),srcString[i]);
// get target phrases in string representation
std::vector<StringTgtCand> cands;
@@ -268,9 +268,9 @@ public:
for(size_t k=0;k<factorStrings.size();++k)
{
std::vector<std::string> factors=Tokenize(*factorStrings[k],"|");
- FactorArray& fa=targetPhrase.AddWord();
+ Word& w=targetPhrase.AddWord();
for(size_t l=0;l<m_output.size();++l)
- fa[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
+ w[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
}
targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels);
targetPhrase.SetSourcePhrase(srcPtr);
@@ -360,7 +360,7 @@ public:
{
const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
std::string s;
- Factors2String(w.GetFactorArray(),s);
+ Factors2String(w,s);
bool isEpsilon=(s=="" || s==EPSILON);
// do not start with epsilon (except at first position)
diff --git a/moses/src/Phrase.cpp b/moses/src/Phrase.cpp
index 5d0c3ee07..fdd54d61d 100755
--- a/moses/src/Phrase.cpp
+++ b/moses/src/Phrase.cpp
@@ -1,4 +1,5 @@
// $Id$
+// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@@ -27,20 +28,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FactorCollection.h"
#include "Phrase.h"
#include "Util.h" //malloc() replacement
+#include "StaticData.h" // GetMaxNumFactors
using namespace std;
-std::vector<mempool*> Phrase::s_memPool;
+// std::vector<mempool*> Phrase::s_memPool;
Phrase::Phrase(const Phrase &copy)
:m_direction(copy.m_direction)
,m_phraseSize(copy.m_phraseSize)
,m_arraySize(copy.m_arraySize)
-,m_memPoolIndex(copy.m_memPoolIndex)
+//,m_memPoolIndex(copy.m_memPoolIndex)
+,m_words(copy.m_words)
{
- assert(m_memPoolIndex<s_memPool.size() && s_memPool[m_memPoolIndex]);
- m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
- memcpy(m_factorArray, copy.m_factorArray, m_phraseSize * sizeof(FactorArray));
}
Phrase& Phrase::operator=(const Phrase& x)
@@ -48,19 +48,12 @@ Phrase& Phrase::operator=(const Phrase& x)
if(this!=&x)
{
- if(m_factorArray)
- {
- assert(m_memPoolIndex<s_memPool.size());
- s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
- }
-
m_direction=x.m_direction;
m_phraseSize=x.m_phraseSize;
m_arraySize=x.m_arraySize;
- m_memPoolIndex=x.m_memPoolIndex;
+// m_memPoolIndex=x.m_memPoolIndex;
- m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
- memcpy(m_factorArray, x.m_factorArray, m_phraseSize * sizeof(FactorArray));
+ m_words = x.m_words;
}
return *this;
}
@@ -70,55 +63,34 @@ Phrase::Phrase(FactorDirection direction)
: m_direction(direction)
, m_phraseSize(0)
, m_arraySize(ARRAY_SIZE_INCR)
- , m_memPoolIndex(0)
+// , m_memPoolIndex(0)
+ , m_words(ARRAY_SIZE_INCR)
{
- assert(m_memPoolIndex<s_memPool.size());
- m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
}
Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords)
:m_direction(direction)
,m_phraseSize(mergeWords.size())
+,m_words(mergeWords.size())
{
- m_memPoolIndex = (m_phraseSize + ARRAY_SIZE_INCR - 1) / ARRAY_SIZE_INCR - 1;
- m_arraySize = (m_memPoolIndex + 1) * ARRAY_SIZE_INCR;
- m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
-
for (size_t currPos = 0 ; currPos < m_phraseSize ; currPos++)
{
- FactorArray &thisWord = m_factorArray[currPos];
- const Word &mergeWord = *mergeWords[currPos];
-
- for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
- {
- FactorType factorType = static_cast<FactorType>(currFactor);
- thisWord[currFactor] = mergeWord.GetFactor(factorType);
- }
+ m_words[currPos] = *mergeWords[currPos];
}
}
Phrase::~Phrase()
{
- // RZ:
- // will segFault if Phrase was default constructed and AddWord was never called
- // TODO not sure if this is really the intended behaviour
- // assertion failure is better than segFault, but if(m_factorArray) might be more appropriate
- //assert(m_factorArray);
- if(m_factorArray)
- {
- assert(m_memPoolIndex<s_memPool.size());
- assert((char*)m_factorArray);
- s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
- }
}
void Phrase::MergeFactors(const Phrase &copy)
{
assert(GetSize() == copy.GetSize());
size_t size = GetSize();
+ const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
for (size_t currPos = 0 ; currPos < size ; currPos++)
{
- for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+ for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
{
FactorType factorType = static_cast<FactorType>(currFactor);
const Factor *factor = copy.GetFactor(currPos, factorType);
@@ -153,8 +125,8 @@ Phrase Phrase::GetSubString(const WordsRange &wordsRange) const
for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++)
{
- FactorArray &newWord = retPhrase.AddWord();
- Word::Copy(newWord, GetFactorArray(currPos));
+ Word &word = retPhrase.AddWord();
+ word = GetWord(currPos);
}
return retPhrase;
@@ -166,31 +138,21 @@ std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const
stringstream strme;
for (size_t pos = 0 ; pos < GetSize() ; pos++)
{
- strme << Word::ToString(factorsToPrint, GetFactorArray(pos));
+ strme << GetWord(pos).ToString(factorsToPrint);
}
return strme.str();
}
-FactorArray &Phrase::AddWord()
+Word &Phrase::AddWord()
{
if ((m_phraseSize+1) % ARRAY_SIZE_INCR == 0)
{ // need to expand array
- FactorArray *newArray = (FactorArray*) s_memPool[m_memPoolIndex+1]->allocate();
- memcpy(newArray, m_factorArray, m_phraseSize * sizeof(FactorArray));
- s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
-
- m_memPoolIndex++;
m_arraySize += ARRAY_SIZE_INCR;
- m_factorArray = newArray;
+ m_words.resize(m_arraySize);
}
- FactorArray &factorArray = m_factorArray[m_phraseSize];
- Word::Initialize(factorArray);
-
- m_phraseSize++;
-
- return factorArray;
+ return m_words[m_phraseSize++];
}
vector< vector<string> > Phrase::Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter)
@@ -233,13 +195,13 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++)
{
// add word this phrase
- FactorArray &factorArray = AddWord();
+ Word &word = AddWord();
for (size_t currFactorIndex= 0 ; currFactorIndex < factorOrder.size() ; currFactorIndex++)
{
FactorType factorType = factorOrder[currFactorIndex];
const string &factorStr = phraseVector[phrasePos][currFactorIndex];
const Factor *factor = factorCollection.AddFactor(m_direction, factorType, factorStr);
- factorArray[factorType] = factor;
+ word[factorType] = factor;
}
}
}
@@ -270,8 +232,9 @@ bool Phrase::operator < (const Phrase &compare) const
{
size_t minSize = std::min( thisSize , compareSize );
+ const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
// taken from word.Compare()
- for (size_t i = 0 ; i < MAX_NUM_FACTORS ; i++)
+ for (size_t i = 0 ; i < maxNumFactors ; i++)
{
FactorType factorType = static_cast<FactorType>(i);
@@ -346,9 +309,10 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase) const
const size_t size = GetSize();
+ const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection());
for (size_t currPos = 0 ; currPos < size ; currPos++)
{
- for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+ for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++)
{
FactorType factorType = static_cast<FactorType>(currFactor);
const Factor *thisFactor = GetFactor(currPos, factorType)
@@ -389,6 +353,7 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorTyp
void Phrase::InitializeMemPool()
{
+#if 0
s_memPool.push_back( new mempool(1 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 50000 ));
s_memPool.push_back( new mempool(2 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
s_memPool.push_back( new mempool(3 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
@@ -399,15 +364,18 @@ void Phrase::InitializeMemPool()
for (size_t i = 8 ; i < 30 ; ++i)
s_memPool.push_back( new mempool(i * ARRAY_SIZE_INCR * sizeof(FactorArray) , 2 ));
+#endif
}
void Phrase::FinalizeMemPool()
{
+#if 0
std::vector<mempool*>::iterator iter;
for (iter = s_memPool.begin() ; iter != s_memPool.end() ; ++iter)
{
delete *iter;
}
+#endif
}
TO_STRING_BODY(Phrase);
@@ -418,8 +386,8 @@ ostream& operator<<(ostream& out, const Phrase& phrase)
// out << "(size " << phrase.GetSize() << ") ";
for (size_t pos = 0 ; pos < phrase.GetSize() ; pos++)
{
- const FactorArray &factorArray = phrase.GetFactorArray(pos);
- out << Word::ToString(factorArray);
+ const Word &word = phrase.GetWord(pos);
+ out << word;
}
return out;
}
diff --git a/moses/src/Phrase.h b/moses/src/Phrase.h
index 321741615..4d0986098 100755
--- a/moses/src/Phrase.h
+++ b/moses/src/Phrase.h
@@ -1,4 +1,5 @@
// $Id$
+// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@@ -35,13 +36,13 @@ class Phrase
{
friend std::ostream& operator<<(std::ostream&, const Phrase&);
private:
- static std::vector<mempool*> s_memPool;
+// static std::vector<mempool*> s_memPool;
FactorDirection m_direction;
- size_t m_phraseSize, //number of words
- m_arraySize,
- m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
- FactorArray *m_factorArray;
+ size_t m_phraseSize; //number of words
+ size_t m_arraySize;
+// size_t m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
+ std::vector<Word> m_words;
public:
static void InitializeMemPool();
@@ -84,35 +85,37 @@ public:
{
return m_phraseSize;
}
- inline const FactorArray &GetFactorArray(size_t pos) const
+ inline const Word &GetWord(size_t pos) const
{
- return m_factorArray[pos];
+ return m_words[pos];
}
- inline FactorArray &GetFactorArray(size_t pos)
+ inline Word &GetWord(size_t pos)
{
- return m_factorArray[pos];
+ return m_words[pos];
}
inline const Factor *GetFactor(size_t pos, FactorType factorType) const
{
- FactorArray &ptr = m_factorArray[pos];
+ const Word &ptr = m_words[pos];
return ptr[factorType];
}
inline void SetFactor(size_t pos, FactorType factorType, const Factor *factor)
{
- FactorArray &ptr = m_factorArray[pos];
+ Word &ptr = m_words[pos];
ptr[factorType] = factor;
}
bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
, const std::vector<FactorType> &inputFactor) const;
- FactorArray &AddWord();
+ Word &AddWord();
Phrase GetSubString(const WordsRange &wordsRange) const;
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
- void push_back(Word const& w) {Word::Copy(AddWord(),w.GetFactorArray());}
+ void push_back(Word const& w) {
+ AddWord() = w;
+ }
TO_STRING;
diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp
index c55aafac6..3f4cfbf28 100755
--- a/moses/src/PhraseDictionary.cpp
+++ b/moses/src/PhraseDictionary.cpp
@@ -122,7 +122,7 @@ TargetPhraseCollection *PhraseDictionary::CreateTargetPhraseCollection(const Phr
PhraseDictionaryNode *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos)
{
- Word word(source.GetFactorArray(pos));
+ const Word& word = source.GetWord(pos);
currNode = currNode->GetOrCreateChild(word);
if (currNode == NULL)
return NULL;
@@ -144,7 +144,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const
const PhraseDictionaryNode *currNode = &m_collection;
for (size_t pos = 0 ; pos < size ; ++pos)
{
- Word word(source.GetFactorArray(pos));
+ const Word& word = source.GetWord(pos);
currNode = currNode->GetChild(word);
if (currNode == NULL)
return NULL;
diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h
index 38af542d7..b33d700cb 100755
--- a/moses/src/Sentence.h
+++ b/moses/src/Sentence.h
@@ -50,9 +50,9 @@ class Sentence : public Phrase, public InputType
{
return Phrase::GetStringRep(factorsToPrint);
}
- const FactorArray& GetFactorArray(size_t pos) const
+ const Word& GetWord(size_t pos) const
{
- return Phrase::GetFactorArray(pos);
+ return Phrase::GetWord(pos);
}
size_t GetSize() const
{
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 9edbab89f..813c6aef2 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -1,4 +1,5 @@
// $Id$
+// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@@ -42,6 +43,22 @@ using namespace std;
extern Timer timer;
+static size_t CalcMax(size_t x, const vector<size_t>& y) {
+ size_t max = x;
+ for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i)
+ if (*i > max) max = *i;
+ return max;
+}
+
+static size_t CalcMax(size_t x, const vector<size_t>& y, const vector<size_t>& z) {
+ size_t max = x;
+ for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i)
+ if (*i > max) max = *i;
+ for (vector<size_t>::const_iterator i=z.begin(); i != z.end(); ++i)
+ if (*i > max) max = *i;
+ return max;
+}
+
StaticData* StaticData::s_instance(0);
StaticData::StaticData()
@@ -57,6 +74,9 @@ StaticData::StaticData()
,m_computeLMBackoffStats(false)
,m_factorDelimiter("|") // default delimiter between factors
{
+ m_maxFactorIdx[0] = 0; // source side
+ m_maxFactorIdx[1] = 0; // target side
+
s_instance = this;
// memory pools
@@ -300,9 +320,6 @@ bool StaticData::LoadParameters(int argc, char* argv[])
// initialize n-gram order for each factor. populated only by factored lm
- for(size_t i=0; i < MAX_NUM_FACTORS ; i++)
- m_maxNgramOrderForFactor[i] = 0;
-
const vector<string> &lmVector = m_parameter.GetParam("lmodel-file");
for(size_t i=0; i<lmVector.size(); i++)
@@ -360,6 +377,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
bool oldFormat = (token.size() == 3);
vector<FactorType> input = Tokenize<FactorType>(token[0], ",")
,output = Tokenize<FactorType>(token[1], ",");
+ m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], input, output);
string filePath;
size_t numFeatures = 1;
if (oldFormat)
@@ -536,6 +554,9 @@ void StaticData::LoadPhraseTables(bool filter
//characteristics of the phrase table
vector<FactorType> input = Tokenize<FactorType>(token[0], ",")
,output = Tokenize<FactorType>(token[1], ",");
+ m_maxFactorIdx[0] = CalcMax(m_maxFactorIdx[0], input);
+ m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], output);
+ m_maxNumFactors = std::max(m_maxFactorIdx[0], m_maxFactorIdx[1]) + 1;
string filePath= token[3];
size_t noScoreComponent = Scan<size_t>(token[2]);
// weights for this phrase dictionary
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index bcd59ca11..38cf1a525 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -80,7 +80,6 @@ protected:
std::vector<std::string> m_mySQLParam;
InputOutput *m_inputOutput;
bool m_fLMsLoaded, m_labeledNBestList;
- size_t m_maxNgramOrderForFactor[MAX_NUM_FACTORS];
/***
* false = treat unknown words as unknowns, and translate them as themselves;
* true = drop (ignore) them
@@ -88,6 +87,7 @@ protected:
bool m_dropUnknown;
bool m_wordDeletionEnabled;
+
int m_inputType;
unsigned m_numInputScores;
@@ -102,7 +102,9 @@ protected:
bool m_computeLMBackoffStats;
mutable std::auto_ptr<SentenceStats> m_sentenceStats;
- std::string m_factorDelimiter;
+ std::string m_factorDelimiter; //! by default, |, but it can be changed
+ size_t m_maxFactorIdx[2]; //! number of factors on source and target side
+ size_t m_maxNumFactors; //! max number of factors on both source and target sides
public:
StaticData();
@@ -299,4 +301,6 @@ public:
bool UseDistortionFutureCosts() const {return m_useDistortionFutureCosts;}
bool OnlyDistinctNBest() const {return m_onlyDistinctNBest;}
const std::string& GetFactorDelimiter() const {return m_factorDelimiter;}
+ size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; }
+ size_t GetMaxNumFactors() const { return m_maxNumFactors; }
};
diff --git a/moses/src/TargetPhrase.cpp b/moses/src/TargetPhrase.cpp
index 06fadff9b..ff1e2604d 100644
--- a/moses/src/TargetPhrase.cpp
+++ b/moses/src/TargetPhrase.cpp
@@ -114,9 +114,9 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
const size_t len = GetSize();
for (size_t currPos = 0 ; currPos < len ; currPos++)
{
- const FactorArray &inputWord = inputPhrase.GetFactorArray(currPos);
- FactorArray &cloneWord = clone->GetFactorArray(currPos);
- Word::Merge(cloneWord, inputWord);
+ const Word &inputWord = inputPhrase.GetWord(currPos);
+ Word &cloneWord = clone->GetWord(currPos);
+ cloneWord.Merge(inputWord);
currWord++;
}
diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp
index 64c971f5f..8a1bd9508 100644
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@@ -332,7 +332,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
/** special handling of unknown words: add special translation (or drop) */
-void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourceWord,
+void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,
size_t sourcePos
, FactorCollection &factorCollection)
{
@@ -356,7 +356,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourc
{
// add to dictionary
TargetPhrase targetPhrase(Output);
- FactorArray &targetWord = targetPhrase.AddWord();
+ Word &targetWord = targetPhrase.AddWord();
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
{
diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h
index d8f9ef4db..141e8d6cf 100755
--- a/moses/src/TranslationOptionCollection.h
+++ b/moses/src/TranslationOptionCollection.h
@@ -36,6 +36,7 @@ class GenerationDictionary;
class InputType;
class LMList;
class FactorMask;
+class Word;
typedef std::vector<const TranslationOption*> TranslationOptionList;
@@ -70,7 +71,7 @@ protected:
, size_t startPos, size_t endPos, bool observeTableLimit );
void ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection);
- virtual void ProcessOneUnknownWord(const FactorArray &sourceWord
+ virtual void ProcessOneUnknownWord(const Word &sourceWord
, size_t sourcePos
, FactorCollection &factorCollection);
diff --git a/moses/src/TranslationOptionCollectionConfusionNet.cpp b/moses/src/TranslationOptionCollectionConfusionNet.cpp
index d1e506935..035212d84 100644
--- a/moses/src/TranslationOptionCollectionConfusionNet.cpp
+++ b/moses/src/TranslationOptionCollectionConfusionNet.cpp
@@ -19,7 +19,7 @@ ProcessUnknownWord( size_t sourcePos
ConfusionNet::Column const& coll=source.GetColumn(sourcePos);
for(ConfusionNet::Column::const_iterator i=coll.begin();i!=coll.end();++i)
- ProcessOneUnknownWord(i->first.GetFactorArray(),sourcePos,factorCollection);
+ ProcessOneUnknownWord(i->first,sourcePos,factorCollection);
}
diff --git a/moses/src/TranslationOptionCollectionText.cpp b/moses/src/TranslationOptionCollectionText.cpp
index b456239a2..037c451e2 100644
--- a/moses/src/TranslationOptionCollectionText.cpp
+++ b/moses/src/TranslationOptionCollectionText.cpp
@@ -36,6 +36,6 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const
void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos
, FactorCollection &factorCollection)
{
- const FactorArray &sourceWord = m_source.GetFactorArray(sourcePos);
+ const Word &sourceWord = m_source.GetWord(sourcePos);
ProcessOneUnknownWord(sourceWord,sourcePos,factorCollection);
}
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index a896ded97..b67724369 100755
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -81,8 +81,8 @@ const size_t MAX_NUM_FACTORS = 4;
enum FactorDirection
{
- Input,
- Output
+ Input, //! Source factors
+ Output //! Target factors
};
enum DecodeType
@@ -144,5 +144,3 @@ enum DictionaryFind
// typedef
typedef size_t FactorType;
-class Factor;
-typedef const Factor * FactorArray[MAX_NUM_FACTORS];
diff --git a/moses/src/Word.cpp b/moses/src/Word.cpp
index d28542fe7..5c8384316 100755
--- a/moses/src/Word.cpp
+++ b/moses/src/Word.cpp
@@ -1,4 +1,5 @@
// $Id$
+// vim::tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
@@ -28,25 +29,23 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
Word::Word(const Word &copy)
-:FactorArrayWrapper()
+#ifdef DYNAMIC_FACTOR_ARRAY
+: m_factorArray(copy.m_factorArray)
+#endif
{ // deep copy
- m_factorArrayPtr = &m_factorArray;
- Word::Copy(m_factorArray, copy.m_factorArray);
+#ifndef DYNAMIC_FACTOR_ARRAY
+ memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray));
+#endif
}
Word::Word()
+#ifdef DYNAMIC_FACTOR_ARRAY
+: m_factorArray(MAX_NUM_FACTORS, 0)
+#endif
{
- m_factorArrayPtr = &m_factorArray;
- Word::Initialize(m_factorArray);
-}
-
-Word::Word(const FactorArray &factorArray)
-{
- m_factorArrayPtr = &m_factorArray;
- for (size_t factor = 0 ; factor < MAX_NUM_FACTORS ; factor++)
- {
- m_factorArray[factor] = factorArray[factor];
- }
+#ifndef DYNAMIC_FACTOR_ARRAY
+ memset(m_factorArray, 0, sizeof(FactorArray));
+#endif
}
Word::~Word()
@@ -54,7 +53,7 @@ Word::~Word()
}
// static
-int Word::Compare(const FactorArray &targetWord, const FactorArray &sourceWord)
+int Word::Compare(const Word &targetWord, const Word &sourceWord)
{
for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++)
{
@@ -73,51 +72,20 @@ int Word::Compare(const FactorArray &targetWord, const FactorArray &sourceWord)
}
-void Word::Copy(FactorArray &target, const FactorArray &source)
-{
- memcpy(target, source, sizeof(FactorArray));
-}
-
-void Word::Initialize(FactorArray &factorArray)
-{
- memset(factorArray, 0, sizeof(FactorArray));
-}
-
-void Word::Merge(FactorArray &targetWord, const FactorArray &sourceWord)
+void Word::Merge(const Word &sourceWord)
{
for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
{
- const Factor *sourcefactor = sourceWord[currFactor]
- ,*targetFactor = targetWord[currFactor];
+ const Factor *sourcefactor = sourceWord.m_factorArray[currFactor]
+ ,*targetFactor = this ->m_factorArray[currFactor];
if (targetFactor == NULL && sourcefactor != NULL)
{
- targetWord[currFactor] = sourcefactor;
- }
- }
-}
-
-std::string Word::ToString(const FactorArray &factorArray)
-{
- stringstream strme;
-
- const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter();
- bool firstPass = true;
- // TODO- don't loop over MAX_NUM_FACTORS here, just use the ones that
- // actually participate in the xltn process.
- for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
- {
- const Factor *factor = factorArray[currFactor];
- if (factor != NULL)
- {
- if (firstPass) { firstPass = false; } else { strme << factorDelimiter; }
- strme << *factor;
+ m_factorArray[currFactor] = sourcefactor;
}
}
- strme << " ";
- return strme.str();
}
-std::string Word::ToString(const vector<FactorType> factorType, const FactorArray &factorArray)
+std::string Word::ToString(const vector<FactorType> factorType) const
{
stringstream strme;
assert(factorType.size() <= MAX_NUM_FACTORS);
@@ -125,7 +93,7 @@ std::string Word::ToString(const vector<FactorType> factorType, const FactorArra
bool firstPass = true;
for (unsigned int i = 0 ; i < factorType.size() ; i++)
{
- const Factor *factor = factorArray[factorType[i]];
+ const Factor *factor = m_factorArray[factorType[i]];
if (factor != NULL)
{
if (firstPass) { firstPass = false; } else { strme << factorDelimiter; }
diff --git a/moses/src/Word.h b/moses/src/Word.h
index 0bfb0c106..5cbf2bbcf 100755
--- a/moses/src/Word.h
+++ b/moses/src/Word.h
@@ -27,59 +27,68 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h"
#include "Factor.h"
#include "Util.h"
-#include "FactorArrayWrapper.h"
+
+#undef DYNAMIC_FACTOR_ARRAY
class Phrase;
/***
* hold a set of factors for a single word
- *
- * TODO either replace all uses of FactorArray with Word or vice versa; don't only use the wrapper in half of cases!
*/
-class Word : public FactorArrayWrapper
+class Word
{
friend std::ostream& operator<<(std::ostream&, const Word&);
protected:
- FactorArray m_factorArray;
+
+#ifndef DYNAMIC_FACTOR_ARRAY
+ typedef const Factor * FactorArray[MAX_NUM_FACTORS];
+#else
+ typedef std::vector<const Factor*> FactorArray;
+#endif
+
+ FactorArray m_factorArray;
public:
/**
* deep copy
*/
Word(const Word &copy);
- Word(const FactorArray &factorArray);
Word();
~Word();
+ const Factor*& operator[](FactorType index) {
+ return m_factorArray[index];
+ }
- // why is this needed ? it should be inherited
- const FactorArray &GetFactorArray() const
- {
- return m_factorArray;
+ const Factor * const & operator[](FactorType index) const {
+ return m_factorArray[index];
}
- inline FactorArray &GetFactorArray()
- {
- return m_factorArray;
+ inline const Factor* GetFactor(FactorType factorType) const {
+ return m_factorArray[factorType];
}
inline void SetFactor(FactorType factorType, const Factor *factor)
{
m_factorArray[factorType] = factor;
}
+ void Merge(const Word &sourceWord);
+
+ std::string ToString(const std::vector<FactorType> factorType) const;
TO_STRING;
/* static functions */
- // FactorArray
- static void Copy(FactorArray &target, const FactorArray &source);
- static void Initialize(FactorArray &factorArray);
-
/***
* wherever the source word has a given factor that the target word is missing, add it to the target word
*/
- static void Merge(FactorArray &targetWord, const FactorArray &sourceWord);
- static std::string ToString(const FactorArray &factorArray);
- static std::string ToString(const std::vector<FactorType> factorType, const FactorArray &factorArray);
- static int Compare(const FactorArray &targetWord, const FactorArray &sourceWord);
+ static int Compare(const Word &targetWord, const Word &sourceWord);
+
+ inline bool operator< (const Word &compare) const
+ { // needed to store word in GenerationDictionary map
+ // uses comparison of FactorKey
+ // 'proper' comparison, not address/id comparison
+ return Compare(*this, compare) < 0;
+ }
+
};