diff options
author | redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230> | 2006-09-26 01:35:10 +0400 |
---|---|---|
committer | redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230> | 2006-09-26 01:35:10 +0400 |
commit | 441b147b3531bbefb53d9290dbf7595206e9a423 (patch) | |
tree | fe727a99134e7008e92ce90c9489cc6c1076226a /moses | |
parent | 27fe63219404d339ebd406da2cbee1703e84ae8e (diff) |
Get rid of FactorArrayWrapper/FactorArray and use only Word. Memory pool is currently disabled, but the net cleanup resulted in better performance despite this (there are fewer copies of FactorArray -> Word now).
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@827 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
35 files changed, 232 insertions, 402 deletions
diff --git a/moses/src/ConfusionNet.cpp b/moses/src/ConfusionNet.cpp index 0b29cc35d..aa5062a10 100644 --- a/moses/src/ConfusionNet.cpp +++ b/moses/src/ConfusionNet.cpp @@ -55,7 +55,7 @@ ConfusionNet::ConfusionNet(Sentence const& s) { data.resize(s.GetSize()); for(size_t i=0;i<s.GetSize();++i) - data[i].push_back(std::make_pair(Word(s.GetFactorArray(i)),0.0)); + data[i].push_back(std::make_pair(s.GetWord(i),0.0)); } @@ -185,7 +185,7 @@ std::string ConfusionNet::GetStringRep(const vector<FactorType> factorsToPrint) return ""; } #pragma warning(disable:4716) -const FactorArray& ConfusionNet::GetFactorArray(size_t) const { +const Word& ConfusionNet::GetWord(size_t) const { std::cerr<<"ERROR: call to ConfusionNet::GetFactorArray\n"; abort(); } diff --git a/moses/src/ConfusionNet.h b/moses/src/ConfusionNet.h index 2e5aaa673..03101e829 100644 --- a/moses/src/ConfusionNet.h +++ b/moses/src/ConfusionNet.h @@ -41,7 +41,7 @@ class ConfusionNet : public InputType { Phrase GetSubString(const WordsRange&) const; //TODO not defined std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined - const FactorArray& GetFactorArray(size_t pos) const; + const Word& GetWord(size_t pos) const; TargetPhraseCollection const* CreateTargetPhraseCollection(PhraseDictionaryBase const& d,const WordsRange& r) const; diff --git a/moses/src/DecodeStepGeneration.cpp b/moses/src/DecodeStepGeneration.cpp index 0f9bb5f4d..998e6dc98 100644 --- a/moses/src/DecodeStepGeneration.cpp +++ b/moses/src/DecodeStepGeneration.cpp @@ -107,10 +107,10 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp { // generatable factors for this word to be put in wordList WordList &wordList = wordListVector[wordListVectorPos]; - const FactorArray &factorArray = targetPhrase.GetFactorArray(currPos); + const Word &word = targetPhrase.GetWord(currPos); // consult dictionary for possible generations for this word - const OutputWordCollection *wordColl = generationDictionary.FindWord(factorArray); + const OutputWordCollection *wordColl = generationDictionary.FindWord(word); if (wordColl == NULL) { // word not found in generation dictionary diff --git a/moses/src/FactorArrayWrapper.cpp b/moses/src/FactorArrayWrapper.cpp deleted file mode 100644 index f205a4a2c..000000000 --- a/moses/src/FactorArrayWrapper.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#include "FactorArrayWrapper.h" -#include "Util.h" -#include "Word.h" - -using namespace std; - -FactorArrayWrapper::~FactorArrayWrapper() {} - -int FactorArrayWrapper::Compare(const FactorArrayWrapper &compare) const -{ - return Compare(GetFactorArray(), compare.GetFactorArray()); -} - -// static functions -int FactorArrayWrapper::Compare(const FactorArray &targetWord, const FactorArray &sourceWord) -{ - for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++) - { - const Factor *targetFactor = targetWord[factorType] - ,*sourceFactor = sourceWord[factorType]; - - if (targetFactor == NULL || sourceFactor == NULL) - { - continue; - } - int result = targetFactor->Compare(*sourceFactor); - if ( result ) - return result; - } - return 0; - -} - -TO_STRING_BODY(FactorArrayWrapper); - -// friend -ostream& operator<<(ostream& out, const FactorArrayWrapper& wrapper) -{ - out << Word::ToString(*wrapper.m_factorArrayPtr); - return out; -} diff --git a/moses/src/FactorArrayWrapper.h b/moses/src/FactorArrayWrapper.h deleted file mode 100644 index 740dfbdeb..000000000 --- a/moses/src/FactorArrayWrapper.h +++ /dev/null @@ -1,82 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#pragma once - -#include <iostream> -#include "TypeDef.h" -#include "Factor.h" - -class FactorArrayWrapper -{ - friend std::ostream& operator<<(std::ostream&, const FactorArrayWrapper&); - -protected: - const FactorArray *m_factorArrayPtr; -public: - FactorArrayWrapper() {} - FactorArrayWrapper(const FactorArray &factorArray) - :m_factorArrayPtr(&factorArray) {} - virtual ~FactorArrayWrapper(); - - FactorArrayWrapper& operator=(const FactorArrayWrapper &other) - { - if(this != &other) - { - m_factorArrayPtr = other.m_factorArrayPtr; - } - return *this; - } - - const Factor *operator[](size_t index) const - { - return (*m_factorArrayPtr)[index]; - } - - virtual const FactorArray &GetFactorArray() const - { - return *m_factorArrayPtr; - } - - inline const Factor *GetFactor(FactorType factorType) const - { - return (*m_factorArrayPtr)[factorType]; - } - - int Compare(const FactorArrayWrapper &compare) const; - // -1 = less than - // +1 = more than - // 0 = same - - inline bool operator< (const FactorArrayWrapper &compare) const - { // needed to store word in GenerationDictionary map - // uses comparison of FactorKey - // 'proper' comparison, not address/id comparison - return Compare(compare) < 0; - } - - TO_STRING; - - //statics - static int Compare(const FactorArray &targetWord, const FactorArray &sourceWord); - -}; - diff --git a/moses/src/GenerationDictionary.cpp b/moses/src/GenerationDictionary.cpp index f5fc85691..4e91fdbfd 100755 --- a/moses/src/GenerationDictionary.cpp +++ b/moses/src/GenerationDictionary.cpp @@ -62,7 +62,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input exit(1); } - m_filename = filePath; + m_filename = filePath; string line; size_t lineNum = 0; while(getline(inFile, line)) @@ -71,7 +71,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input vector<string> token = Tokenize( line ); // add each line in generation file into class - Word *inputWord = new Word(); + Word *inputWord = new Word(); // deleted in destructor Word outputWord; // create word with certain factors filled out @@ -113,7 +113,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input GenerationDictionary::~GenerationDictionary() { - std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer>::const_iterator iter; + std::map<const Word* , OutputWordCollection, WordComparer>::const_iterator iter; for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter) { delete iter->first; @@ -130,13 +130,12 @@ const std::string GenerationDictionary::GetScoreProducerDescription() const return "Generation score, file=" + m_filename; } -const OutputWordCollection *GenerationDictionary::FindWord(const FactorArray &factorArray) const +const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) const { const OutputWordCollection *ret; - FactorArrayWrapper wrapper(factorArray); - std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer>::const_iterator - iter = m_collection.find(&wrapper); + std::map<const Word* , OutputWordCollection, WordComparer>::const_iterator + iter = m_collection.find(&word); if (iter == m_collection.end()) { // can't find source phrase ret = NULL; diff --git a/moses/src/GenerationDictionary.h b/moses/src/GenerationDictionary.h index 9248fdd3c..d97f5d679 100755 --- a/moses/src/GenerationDictionary.h +++ b/moses/src/GenerationDictionary.h @@ -31,10 +31,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA class FactorCollection; -struct FactorArrayWrapperComparer +struct WordComparer { //! returns true if hypoA can be recombined with hypoB - bool operator()(const FactorArrayWrapper *a, const FactorArrayWrapper *b) const + bool operator()(const Word *a, const Word *b) const { return *a < *b; } @@ -47,7 +47,7 @@ typedef std::map < Word , ScoreComponentCollection2 > OutputWordCollection; class GenerationDictionary : public Dictionary, public ScoreProducer { protected: - std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer> m_collection; + std::map<const Word* , OutputWordCollection, WordComparer> m_collection; // 1st = source // 2nd = target std::string m_filename; @@ -75,6 +75,6 @@ public: { return m_collection.size(); } - const OutputWordCollection *FindWord(const FactorArray &factorArray) const; + const OutputWordCollection *FindWord(const Word &word) const; }; diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp index c1133dd4c..0365fa61d 100755 --- a/moses/src/Hypothesis.cpp +++ b/moses/src/Hypothesis.cpp @@ -214,14 +214,14 @@ void Hypothesis::CalcLMScore(const LMList &languageModels) (*_lmstats)[lmIdx].resize(m_currTargetWordsRange.GetWordsCount(), 0); // 1st n-gram - vector<FactorArrayWrapper> contextFactor(nGramOrder); + vector<const Word*> contextFactor(nGramOrder); size_t index = 0; for (int currPos = (int) startPos - (int) nGramOrder + 1 ; currPos <= (int) startPos ; currPos++) { if (currPos >= 0) - contextFactor[index++] = GetFactorArray(currPos); + contextFactor[index++] = &GetWord(currPos); else - contextFactor[index++] = languageModel.GetSentenceStartArray(); + contextFactor[index++] = &languageModel.GetSentenceStartArray(); } lmScore = languageModel.GetValue(contextFactor); if (_lmstats) { languageModel.GetState(contextFactor, &(*_lmstats)[lmIdx][nLmCallCount++]); } @@ -237,7 +237,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels) contextFactor[i] = contextFactor[i + 1]; // add last factor - contextFactor.back() = GetFactorArray(currPos); + contextFactor.back() = &GetWord(currPos); lmScore += languageModel.GetValue(contextFactor); if (_lmstats) @@ -249,15 +249,15 @@ void Hypothesis::CalcLMScore(const LMList &languageModels) if (m_sourceCompleted.IsComplete()) { const size_t size = GetSize(); - contextFactor.back() = languageModel.GetSentenceEndArray(); + contextFactor.back() = &languageModel.GetSentenceEndArray(); for (size_t i = 0 ; i < nGramOrder - 1 ; i ++) { int currPos = size - nGramOrder + i + 1; if (currPos < 0) - contextFactor[i] = languageModel.GetSentenceStartArray(); + contextFactor[i] = &languageModel.GetSentenceStartArray(); else - contextFactor[i] = GetFactorArray((size_t)currPos); + contextFactor[i] = &GetWord((size_t)currPos); } if (_lmstats) { (*_lmstats)[lmIdx].resize((*_lmstats)[lmIdx].size() + 1); // extra space for the last call @@ -268,7 +268,7 @@ void Hypothesis::CalcLMScore(const LMList &languageModels) for (size_t currPos = endPos+1; currPos <= currEndPos; currPos++) { for (size_t i = 0 ; i < nGramOrder - 1 ; i++) contextFactor[i] = contextFactor[i + 1]; - contextFactor.back() = GetFactorArray(currPos); + contextFactor.back() = &GetWord(currPos); if (_lmstats) languageModel.GetState(contextFactor, &(*_lmstats)[lmIdx][nLmCallCount++]); } @@ -437,7 +437,8 @@ std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factor std::string Hypothesis::GetSourcePhraseStringRep() const { vector<FactorType> allFactors; - for(size_t i=0; i < MAX_NUM_FACTORS; i++) + const size_t maxSourceFactors = StaticData::Instance()->GetMaxNumFactors(Input); + for(size_t i=0; i < maxSourceFactors; i++) { allFactors.push_back(i); } @@ -446,7 +447,8 @@ std::string Hypothesis::GetSourcePhraseStringRep() const std::string Hypothesis::GetTargetPhraseStringRep() const { vector<FactorType> allFactors; - for(size_t i=0; i < MAX_NUM_FACTORS; i++) + const size_t maxTargetFactors = StaticData::Instance()->GetMaxNumFactors(Output); + for(size_t i=0; i < maxTargetFactors; i++) { allFactors.push_back(i); } diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h index 26694ad4d..56bc1a0fe 100755 --- a/moses/src/Hypothesis.h +++ b/moses/src/Hypothesis.h @@ -1,4 +1,5 @@ // $Id$ +// vim:tabstop=2 /*********************************************************************** Moses - factored phrase-based language decoder @@ -167,19 +168,19 @@ public: std::string GetSourcePhraseStringRep() const; std::string GetTargetPhraseStringRep() const; - // curr - pos is relative from CURRENT hypothesis's starting ind ex - // (ie, start of sentence would be some negative number, which is - // not allowed- USE WITH CAUTION) - inline const FactorArray &GetCurrFactorArray(size_t pos) const + /** curr - pos is relative from CURRENT hypothesis's starting index + * (ie, start of sentence would be some negative number, which is + * not allowed- USE WITH CAUTION) */ + inline const Word &GetCurrWord(size_t pos) const { - return m_targetPhrase.GetFactorArray(pos); + return m_targetPhrase.GetWord(pos); } inline const Factor *GetCurrFactor(size_t pos, FactorType factorType) const { return m_targetPhrase.GetFactor(pos, factorType); } - // recursive - pos is relative from start of sentence - inline const FactorArray &GetFactorArray(size_t pos) const + /** recursive - pos is relative from start of sentence */ + inline const Word &GetWord(size_t pos) const { const Hypothesis *hypo = this; while (pos < hypo->GetCurrTargetWordsRange().GetStartPos()) @@ -187,11 +188,11 @@ public: hypo = hypo->GetPrevHypo(); assert(hypo != NULL); } - return hypo->GetCurrFactorArray(pos - hypo->GetCurrTargetWordsRange().GetStartPos()); + return hypo->GetCurrWord(pos - hypo->GetCurrTargetWordsRange().GetStartPos()); } inline const Factor* GetFactor(size_t pos, FactorType factorType) const { - return GetFactorArray(pos)[factorType]; + return GetWord(pos)[factorType]; } /*** diff --git a/moses/src/InputType.h b/moses/src/InputType.h index 6681caa3e..0e3f4f8a5 100644 --- a/moses/src/InputType.h +++ b/moses/src/InputType.h @@ -42,7 +42,7 @@ protected: virtual Phrase GetSubString(const WordsRange&) const =0; // virtual std::string GetStringRep(const WordsRange&) const=0; - virtual const FactorArray& GetFactorArray(size_t pos) const=0; + virtual const Word& GetWord(size_t pos) const=0; TO_STRING; diff --git a/moses/src/LanguageModel.cpp b/moses/src/LanguageModel.cpp index 9398e2513..01336700d 100755 --- a/moses/src/LanguageModel.cpp +++ b/moses/src/LanguageModel.cpp @@ -39,9 +39,6 @@ LanguageModel::LanguageModel(bool registerScore) { if (registerScore) const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this); - - Word::Initialize(m_sentenceStartArray); - Word::Initialize(m_sentenceEndArray); } LanguageModel::~LanguageModel() {} @@ -59,19 +56,19 @@ void LanguageModel::CalcScore(const Phrase &phrase ngramScore = 0; size_t phraseSize = phrase.GetSize(); - vector<FactorArrayWrapper> contextFactor; + vector<const Word*> contextFactor; contextFactor.reserve(m_nGramOrder); // start of sentence for (size_t currPos = 0 ; currPos < m_nGramOrder - 1 && currPos < phraseSize ; currPos++) { - contextFactor.push_back(phrase.GetFactorArray(currPos)); + contextFactor.push_back(&phrase.GetWord(currPos)); fullScore += GetValue(contextFactor); } if (phraseSize >= m_nGramOrder) { - contextFactor.push_back(phrase.GetFactorArray(m_nGramOrder - 1)); + contextFactor.push_back(&phrase.GetWord(m_nGramOrder - 1)); ngramScore = GetValue(contextFactor); } @@ -82,14 +79,14 @@ void LanguageModel::CalcScore(const Phrase &phrase { contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1]; } - contextFactor[m_nGramOrder - 1] = phrase.GetFactorArray(currPos); + contextFactor[m_nGramOrder - 1] = &phrase.GetWord(currPos); float partScore = GetValue(contextFactor); ngramScore += partScore; } fullScore += ngramScore; } -LanguageModel::State LanguageModel::GetState(const std::vector<FactorArrayWrapper> &contextFactor, unsigned int* len) const +LanguageModel::State LanguageModel::GetState(const std::vector<const Word*> &contextFactor, unsigned int* len) const { State state; unsigned int dummy; diff --git a/moses/src/LanguageModel.h b/moses/src/LanguageModel.h index ad3d48bf4..dd21c133d 100755 --- a/moses/src/LanguageModel.h +++ b/moses/src/LanguageModel.h @@ -39,7 +39,7 @@ protected: float m_weight; std::string m_filename; size_t m_nGramOrder; - FactorArray m_sentenceStartArray, m_sentenceEndArray; + Word m_sentenceStartArray, m_sentenceEndArray; LanguageModel(bool registerScore); @@ -59,19 +59,19 @@ public: void CalcScore(const Phrase &phrase , float &fullScore , float &ngramScore) const; - virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = 0, unsigned int* len = 0) const = 0; + virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const = 0; - State GetState(const std::vector<FactorArrayWrapper> &contextFactor, unsigned int* len = 0) const; + State GetState(const std::vector<const Word*> &contextFactor, unsigned int* len = 0) const; size_t GetNGramOrder() const { return m_nGramOrder; } - const FactorArray &GetSentenceStartArray() const + const Word &GetSentenceStartArray() const { return m_sentenceStartArray; } - const FactorArray &GetSentenceEndArray() const + const Word &GetSentenceEndArray() const { return m_sentenceEndArray; } diff --git a/moses/src/LanguageModelChunking.h b/moses/src/LanguageModelChunking.h index 3c05f2aa4..fd1c0c224 100644 --- a/moses/src/LanguageModelChunking.h +++ b/moses/src/LanguageModelChunking.h @@ -64,7 +64,7 @@ public: m_lmImpl->Load(fileName, factorCollection, m_factorType, weight, nGramOrder); } - float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const + float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const { if (contextFactor.size() == 0) { @@ -76,30 +76,30 @@ public: TRACE_ERR(std::endl); */ // only process context where last word is a word we want - const Factor *factor = contextFactor.back()[m_factorType]; + const Factor *factor = (*contextFactor.back())[m_factorType]; std::string strWord = factor->GetString(); if (strWord.find("???") == 0) return 0; // add last word - std::vector<FactorArrayWrapper> chunkContext; - Word chunkWord; - chunkWord.SetFactor(m_factorType, factor); + std::vector<const Word*> chunkContext; + Word* chunkWord = new Word; + chunkWord->SetFactor(m_factorType, factor); chunkContext.push_back(chunkWord); // create context in reverse 'cos we skip words we don't want for (int currPos = (int)contextFactor.size() - 2 ; currPos >= 0 && chunkContext.size() < m_realNGramOrder ; --currPos ) { - const FactorArrayWrapper &factorArray = contextFactor[currPos]; - factor = factorArray[m_factorType]; + const Word &word = *contextFactor[currPos]; + factor = word[m_factorType]; std::string strWord = factor->GetString(); bool skip = strWord.find("???") == 0; if (skip) continue; // add word to chunked context - Word chunkWord; - chunkWord.SetFactor(m_factorType, factor); + Word* chunkWord = new Word; + chunkWord->SetFactor(m_factorType, factor); chunkContext.push_back(chunkWord); } @@ -112,7 +112,9 @@ public: */ // calc score on chunked phrase float ret = m_lmImpl->GetValue(chunkContext, finalState, len); - + + RemoveAllInColl(chunkContext); + return ret; } }; diff --git a/moses/src/LanguageModelIRST.cpp b/moses/src/LanguageModelIRST.cpp index 3ef1a464b..517a592c1 100755 --- a/moses/src/LanguageModelIRST.cpp +++ b/moses/src/LanguageModelIRST.cpp @@ -128,7 +128,7 @@ int LanguageModelIRST::GetLmID( const std::string &str ) const return m_lmtb->dict->encode( str.c_str() ); } -float LanguageModelIRST::GetValue(const vector<FactorArrayWrapper> &contextFactor, State* finalState, unsigned int* len) const +float LanguageModelIRST::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const { unsigned int dummy; if (!len) { len = &dummy; } @@ -137,24 +137,24 @@ float LanguageModelIRST::GetValue(const vector<FactorArrayWrapper> &contextFacto // set up context size_t count = contextFactor.size(); - m_lmtb_ng->size=0; - if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd); - if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart); + m_lmtb_ng->size=0; + if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd); + if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart); for (size_t i = 0 ; i < count ; i++) { - int lmId = GetLmID(contextFactor[i][factorType]); - m_lmtb_ng->pushc(lmId); + int lmId = GetLmID((*contextFactor[i])[factorType]); + m_lmtb_ng->pushc(lmId); } if (finalState){ - *finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng); + *finalState=(State *)m_lmtb->cmaxsuffptr(*m_lmtb_ng); // back off stats not currently available *len = 0; } - return TransformIRSTScore(m_lmtb->clprob(*m_lmtb_ng)); + return TransformIRSTScore(m_lmtb->clprob(*m_lmtb_ng)); } diff --git a/moses/src/LanguageModelIRST.h b/moses/src/LanguageModelIRST.h index 9c465c41e..d7c04092a 100755 --- a/moses/src/LanguageModelIRST.h +++ b/moses/src/LanguageModelIRST.h @@ -67,7 +67,7 @@ public: , float weight , size_t nGramOrder); - virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len=0) const; + virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const; const void CleanUpAfterSentenceProcessing(); const void InitializeBeforeSentenceProcessing(); diff --git a/moses/src/LanguageModelJoint.h b/moses/src/LanguageModelJoint.h index 59696577d..5030ea5df 100644 --- a/moses/src/LanguageModelJoint.h +++ b/moses/src/LanguageModelJoint.h @@ -79,7 +79,7 @@ public: m_lmImpl->Load(fileName, factorCollection, m_implFactor, weight, nGramOrder); } - float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const + float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const { if (contextFactor.size() == 0) { @@ -92,29 +92,29 @@ public: */ // joint context for internal LM - std::vector<FactorArrayWrapper> jointContext; + std::vector<const Word*> jointContext; for (size_t currPos = 0 ; currPos < m_nGramOrder ; ++currPos ) { - const FactorArrayWrapper &factorArray = contextFactor[currPos]; + const Word &word = *contextFactor[currPos]; // add word to chunked context std::stringstream stream(""); - const Factor *factor = factorArray[ m_factorTypesOrdered[0] ]; + const Factor *factor = word[ m_factorTypesOrdered[0] ]; stream << factor->GetString(); for (size_t index = 1 ; index < m_factorTypesOrdered.size() ; ++index) { FactorType factorType = m_factorTypesOrdered[index]; - const Factor *factor = factorArray[factorType]; + const Factor *factor = word[factorType]; stream << "|" << factor->GetString(); } factor = m_factorCollection->AddFactor(Output, m_implFactor, stream.str()); - Word jointWord; - jointWord.SetFactor(m_implFactor, factor); + Word* jointWord = new Word; + jointWord->SetFactor(m_implFactor, factor); jointContext.push_back(jointWord); } @@ -125,6 +125,8 @@ public: */ // calc score on chunked phrase float ret = m_lmImpl->GetValue(jointContext, finalState, len); + + RemoveAllInColl(jointContext); return ret; } diff --git a/moses/src/LanguageModelMultiFactor.cpp b/moses/src/LanguageModelMultiFactor.cpp index 8dabb30a5..275e5a0e5 100644 --- a/moses/src/LanguageModelMultiFactor.cpp +++ b/moses/src/LanguageModelMultiFactor.cpp @@ -40,10 +40,10 @@ bool LanguageModelMultiFactor::Useable(const Phrase &phrase) const return false; // whether phrase contains all factors in this LM - const FactorArray &factorArray = phrase.GetFactorArray(0); + const Word &word = phrase.GetWord(0); for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; ++currFactor) { - if (m_factorTypes[currFactor] && factorArray[currFactor] == NULL) + if (m_factorTypes[currFactor] && word[currFactor] == NULL) return false; } return true; diff --git a/moses/src/LanguageModelSRI.cpp b/moses/src/LanguageModelSRI.cpp index be59f420a..2d499b771 100755 --- a/moses/src/LanguageModelSRI.cpp +++ b/moses/src/LanguageModelSRI.cpp @@ -132,7 +132,7 @@ float LanguageModelSRI::GetValue(VocabIndex wordId, VocabIndex *context) const return FloorSRIScore(TransformSRIScore(p)); // log10->log } -float LanguageModelSRI::GetValue(const vector<FactorArrayWrapper> &contextFactor, State* finalState, unsigned int *len) const +float LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState, unsigned int *len) const { FactorType factorType = GetFactorType(); size_t count = contextFactor.size(); @@ -146,13 +146,13 @@ float LanguageModelSRI::GetValue(const vector<FactorArrayWrapper> &contextFactor VocabIndex context[MAX_NGRAM_SIZE]; for (size_t i = 0 ; i < count - 1 ; i++) { - context[i] = GetLmID(contextFactor[count-2-i][factorType]); + context[i] = GetLmID((*contextFactor[count-2-i])[factorType]); } context[count-1] = Vocab_None; - assert(contextFactor[count-1][factorType] != NULL); + assert((*contextFactor[count-1])[factorType] != NULL); // call sri lm fn - VocabIndex lmId= GetLmID(contextFactor[count-1][factorType]); + VocabIndex lmId= GetLmID((*contextFactor[count-1])[factorType]); float ret = GetValue(lmId, context); if (finalState) { diff --git a/moses/src/LanguageModelSRI.h b/moses/src/LanguageModelSRI.h index b43e4bc1f..b1a59f899 100755 --- a/moses/src/LanguageModelSRI.h +++ b/moses/src/LanguageModelSRI.h @@ -56,6 +56,6 @@ public: , float weight , size_t nGramOrder); - virtual float GetValue(const std::vector<FactorArrayWrapper> &contextFactor, State* finalState = 0, unsigned int* len = 0) const; + virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const; }; diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am index 48bdb0d80..013dbdc34 100644 --- a/moses/src/Makefile.am +++ b/moses/src/Makefile.am @@ -9,7 +9,6 @@ libmoses_a_SOURCES_TMP = \ DistortionOrientation.cpp \ DummyScoreProducers.cpp \ Factor.cpp \ - FactorArrayWrapper.cpp \ FactorCollection.cpp \ FactorTypeSet.cpp \ GenerationDictionary.cpp \ diff --git a/moses/src/PDTAimp.h b/moses/src/PDTAimp.h index cf27b687a..03bc2cafd 100644 --- a/moses/src/PDTAimp.h +++ b/moses/src/PDTAimp.h @@ -96,7 +96,7 @@ public: } - void Factors2String(FactorArray const& w,std::string& s) const + void Factors2String(Word const& w,std::string& s) const { for(size_t j=0;j<m_input.size();++j) { @@ -155,7 +155,7 @@ public: std::vector<std::string> srcString(src.GetSize()); // convert source Phrase into vector of strings for(size_t i=0;i<srcString.size();++i) - Factors2String(src.GetFactorArray(i),srcString[i]); + Factors2String(src.GetWord(i),srcString[i]); // get target phrases in string representation std::vector<StringTgtCand> cands; @@ -268,9 +268,9 @@ public: for(size_t k=0;k<factorStrings.size();++k) { std::vector<std::string> factors=Tokenize(*factorStrings[k],"|"); - FactorArray& fa=targetPhrase.AddWord(); + Word& w=targetPhrase.AddWord(); for(size_t l=0;l<m_output.size();++l) - fa[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]); + w[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]); } targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels); targetPhrase.SetSourcePhrase(srcPtr); @@ -360,7 +360,7 @@ public: { const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx std::string s; - Factors2String(w.GetFactorArray(),s); + Factors2String(w,s); bool isEpsilon=(s=="" || s==EPSILON); // do not start with epsilon (except at first position) diff --git a/moses/src/Phrase.cpp b/moses/src/Phrase.cpp index 5d0c3ee07..fdd54d61d 100755 --- a/moses/src/Phrase.cpp +++ b/moses/src/Phrase.cpp @@ -1,4 +1,5 @@ // $Id$ +// vim:tabstop=2 /*********************************************************************** Moses - factored phrase-based language decoder @@ -27,20 +28,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "FactorCollection.h" #include "Phrase.h" #include "Util.h" //malloc() replacement +#include "StaticData.h" // GetMaxNumFactors using namespace std; -std::vector<mempool*> Phrase::s_memPool; +// std::vector<mempool*> Phrase::s_memPool; Phrase::Phrase(const Phrase ©) :m_direction(copy.m_direction) ,m_phraseSize(copy.m_phraseSize) ,m_arraySize(copy.m_arraySize) -,m_memPoolIndex(copy.m_memPoolIndex) +//,m_memPoolIndex(copy.m_memPoolIndex) +,m_words(copy.m_words) { - assert(m_memPoolIndex<s_memPool.size() && s_memPool[m_memPoolIndex]); - m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate(); - memcpy(m_factorArray, copy.m_factorArray, m_phraseSize * sizeof(FactorArray)); } Phrase& Phrase::operator=(const Phrase& x) @@ -48,19 +48,12 @@ Phrase& Phrase::operator=(const Phrase& x) if(this!=&x) { - if(m_factorArray) - { - assert(m_memPoolIndex<s_memPool.size()); - s_memPool[m_memPoolIndex]->free((char*)m_factorArray); - } - m_direction=x.m_direction; m_phraseSize=x.m_phraseSize; m_arraySize=x.m_arraySize; - m_memPoolIndex=x.m_memPoolIndex; +// m_memPoolIndex=x.m_memPoolIndex; - m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate(); - memcpy(m_factorArray, x.m_factorArray, m_phraseSize * sizeof(FactorArray)); + m_words = x.m_words; } return *this; } @@ -70,55 +63,34 @@ Phrase::Phrase(FactorDirection direction) : m_direction(direction) , m_phraseSize(0) , m_arraySize(ARRAY_SIZE_INCR) - , m_memPoolIndex(0) +// , m_memPoolIndex(0) + , m_words(ARRAY_SIZE_INCR) { - assert(m_memPoolIndex<s_memPool.size()); - m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate(); } Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords) :m_direction(direction) ,m_phraseSize(mergeWords.size()) +,m_words(mergeWords.size()) { - m_memPoolIndex = (m_phraseSize + ARRAY_SIZE_INCR - 1) / ARRAY_SIZE_INCR - 1; - m_arraySize = (m_memPoolIndex + 1) * ARRAY_SIZE_INCR; - m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate(); - for (size_t currPos = 0 ; currPos < m_phraseSize ; currPos++) { - FactorArray &thisWord = m_factorArray[currPos]; - const Word &mergeWord = *mergeWords[currPos]; - - for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) - { - FactorType factorType = static_cast<FactorType>(currFactor); - thisWord[currFactor] = mergeWord.GetFactor(factorType); - } + m_words[currPos] = *mergeWords[currPos]; } } Phrase::~Phrase() { - // RZ: - // will segFault if Phrase was default constructed and AddWord was never called - // TODO not sure if this is really the intended behaviour - // assertion failure is better than segFault, but if(m_factorArray) might be more appropriate - //assert(m_factorArray); - if(m_factorArray) - { - assert(m_memPoolIndex<s_memPool.size()); - assert((char*)m_factorArray); - s_memPool[m_memPoolIndex]->free((char*)m_factorArray); - } } void Phrase::MergeFactors(const Phrase ©) { assert(GetSize() == copy.GetSize()); size_t size = GetSize(); + const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection()); for (size_t currPos = 0 ; currPos < size ; currPos++) { - for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) + for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++) { FactorType factorType = static_cast<FactorType>(currFactor); const Factor *factor = copy.GetFactor(currPos, factorType); @@ -153,8 +125,8 @@ Phrase Phrase::GetSubString(const WordsRange &wordsRange) const for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++) { - FactorArray &newWord = retPhrase.AddWord(); - Word::Copy(newWord, GetFactorArray(currPos)); + Word &word = retPhrase.AddWord(); + word = GetWord(currPos); } return retPhrase; @@ -166,31 +138,21 @@ std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const stringstream strme; for (size_t pos = 0 ; pos < GetSize() ; pos++) { - strme << Word::ToString(factorsToPrint, GetFactorArray(pos)); + strme << GetWord(pos).ToString(factorsToPrint); } return strme.str(); } -FactorArray &Phrase::AddWord() +Word &Phrase::AddWord() { if ((m_phraseSize+1) % ARRAY_SIZE_INCR == 0) { // need to expand array - FactorArray *newArray = (FactorArray*) s_memPool[m_memPoolIndex+1]->allocate(); - memcpy(newArray, m_factorArray, m_phraseSize * sizeof(FactorArray)); - s_memPool[m_memPoolIndex]->free((char*)m_factorArray); - - m_memPoolIndex++; m_arraySize += ARRAY_SIZE_INCR; - m_factorArray = newArray; + m_words.resize(m_arraySize); } - FactorArray &factorArray = m_factorArray[m_phraseSize]; - Word::Initialize(factorArray); - - m_phraseSize++; - - return factorArray; + return m_words[m_phraseSize++]; } vector< vector<string> > Phrase::Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter) @@ -233,13 +195,13 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++) { // add word this phrase - FactorArray &factorArray = AddWord(); + Word &word = AddWord(); for (size_t currFactorIndex= 0 ; currFactorIndex < factorOrder.size() ; currFactorIndex++) { FactorType factorType = factorOrder[currFactorIndex]; const string &factorStr = phraseVector[phrasePos][currFactorIndex]; const Factor *factor = factorCollection.AddFactor(m_direction, factorType, factorStr); - factorArray[factorType] = factor; + word[factorType] = factor; } } } @@ -270,8 +232,9 @@ bool Phrase::operator < (const Phrase &compare) const { size_t minSize = std::min( thisSize , compareSize ); + const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection()); // taken from word.Compare() - for (size_t i = 0 ; i < MAX_NUM_FACTORS ; i++) + for (size_t i = 0 ; i < maxNumFactors ; i++) { FactorType factorType = static_cast<FactorType>(i); @@ -346,9 +309,10 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase) const const size_t size = GetSize(); + const size_t maxNumFactors = StaticData::Instance()->GetMaxNumFactors(this->GetDirection()); for (size_t currPos = 0 ; currPos < size ; currPos++) { - for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) + for (unsigned int currFactor = 0 ; currFactor < maxNumFactors ; currFactor++) { FactorType factorType = static_cast<FactorType>(currFactor); const Factor *thisFactor = GetFactor(currPos, factorType) @@ -389,6 +353,7 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorTyp void Phrase::InitializeMemPool() { +#if 0 s_memPool.push_back( new mempool(1 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 50000 )); s_memPool.push_back( new mempool(2 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 )); s_memPool.push_back( new mempool(3 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 )); @@ -399,15 +364,18 @@ void Phrase::InitializeMemPool() for (size_t i = 8 ; i < 30 ; ++i) s_memPool.push_back( new mempool(i * ARRAY_SIZE_INCR * sizeof(FactorArray) , 2 )); +#endif } void Phrase::FinalizeMemPool() { +#if 0 std::vector<mempool*>::iterator iter; for (iter = s_memPool.begin() ; iter != s_memPool.end() ; ++iter) { delete *iter; } +#endif } TO_STRING_BODY(Phrase); @@ -418,8 +386,8 @@ ostream& operator<<(ostream& out, const Phrase& phrase) // out << "(size " << phrase.GetSize() << ") "; for (size_t pos = 0 ; pos < phrase.GetSize() ; pos++) { - const FactorArray &factorArray = phrase.GetFactorArray(pos); - out << Word::ToString(factorArray); + const Word &word = phrase.GetWord(pos); + out << word; } return out; } diff --git a/moses/src/Phrase.h b/moses/src/Phrase.h index 321741615..4d0986098 100755 --- a/moses/src/Phrase.h +++ b/moses/src/Phrase.h @@ -1,4 +1,5 @@ // $Id$ +// vim:tabstop=2 /*********************************************************************** Moses - factored phrase-based language decoder @@ -35,13 +36,13 @@ class Phrase { friend std::ostream& operator<<(std::ostream&, const Phrase&); private: - static std::vector<mempool*> s_memPool; +// static std::vector<mempool*> s_memPool; FactorDirection m_direction; - size_t m_phraseSize, //number of words - m_arraySize, - m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated? - FactorArray *m_factorArray; + size_t m_phraseSize; //number of words + size_t m_arraySize; +// size_t m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated? + std::vector<Word> m_words; public: static void InitializeMemPool(); @@ -84,35 +85,37 @@ public: { return m_phraseSize; } - inline const FactorArray &GetFactorArray(size_t pos) const + inline const Word &GetWord(size_t pos) const { - return m_factorArray[pos]; + return m_words[pos]; } - inline FactorArray &GetFactorArray(size_t pos) + inline Word &GetWord(size_t pos) { - return m_factorArray[pos]; + return m_words[pos]; } inline const Factor *GetFactor(size_t pos, FactorType factorType) const { - FactorArray &ptr = m_factorArray[pos]; + const Word &ptr = m_words[pos]; return ptr[factorType]; } inline void SetFactor(size_t pos, FactorType factorType, const Factor *factor) { - FactorArray &ptr = m_factorArray[pos]; + Word &ptr = m_words[pos]; ptr[factorType] = factor; } bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector , const std::vector<FactorType> &inputFactor) const; - FactorArray &AddWord(); + Word &AddWord(); Phrase GetSubString(const WordsRange &wordsRange) const; std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; - void push_back(Word const& w) {Word::Copy(AddWord(),w.GetFactorArray());} + void push_back(Word const& w) { + AddWord() = w; + } TO_STRING; diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp index c55aafac6..3f4cfbf28 100755 --- a/moses/src/PhraseDictionary.cpp +++ b/moses/src/PhraseDictionary.cpp @@ -122,7 +122,7 @@ TargetPhraseCollection *PhraseDictionary::CreateTargetPhraseCollection(const Phr PhraseDictionaryNode *currNode = &m_collection; for (size_t pos = 0 ; pos < size ; ++pos) { - Word word(source.GetFactorArray(pos)); + const Word& word = source.GetWord(pos); currNode = currNode->GetOrCreateChild(word); if (currNode == NULL) return NULL; @@ -144,7 +144,7 @@ const TargetPhraseCollection *PhraseDictionary::GetTargetPhraseCollection(const const PhraseDictionaryNode *currNode = &m_collection; for (size_t pos = 0 ; pos < size ; ++pos) { - Word word(source.GetFactorArray(pos)); + const Word& word = source.GetWord(pos); currNode = currNode->GetChild(word); if (currNode == NULL) return NULL; diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h index 38af542d7..b33d700cb 100755 --- a/moses/src/Sentence.h +++ b/moses/src/Sentence.h @@ -50,9 +50,9 @@ class Sentence : public Phrase, public InputType { return Phrase::GetStringRep(factorsToPrint); } - const FactorArray& GetFactorArray(size_t pos) const + const Word& GetWord(size_t pos) const { - return Phrase::GetFactorArray(pos); + return Phrase::GetWord(pos); } size_t GetSize() const { diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp index 9edbab89f..813c6aef2 100755 --- a/moses/src/StaticData.cpp +++ b/moses/src/StaticData.cpp @@ -1,4 +1,5 @@ // $Id$ +// vim:tabstop=2 /*********************************************************************** Moses - factored phrase-based language decoder @@ -42,6 +43,22 @@ using namespace std; extern Timer timer; +static size_t CalcMax(size_t x, const vector<size_t>& y) { + size_t max = x; + for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i) + if (*i > max) max = *i; + return max; +} + +static size_t CalcMax(size_t x, const vector<size_t>& y, const vector<size_t>& z) { + size_t max = x; + for (vector<size_t>::const_iterator i=y.begin(); i != y.end(); ++i) + if (*i > max) max = *i; + for (vector<size_t>::const_iterator i=z.begin(); i != z.end(); ++i) + if (*i > max) max = *i; + return max; +} + StaticData* StaticData::s_instance(0); StaticData::StaticData() @@ -57,6 +74,9 @@ StaticData::StaticData() ,m_computeLMBackoffStats(false) ,m_factorDelimiter("|") // default delimiter between factors { + m_maxFactorIdx[0] = 0; // source side + m_maxFactorIdx[1] = 0; // target side + s_instance = this; // memory pools @@ -300,9 +320,6 @@ bool StaticData::LoadParameters(int argc, char* argv[]) // initialize n-gram order for each factor. populated only by factored lm - for(size_t i=0; i < MAX_NUM_FACTORS ; i++) - m_maxNgramOrderForFactor[i] = 0; - const vector<string> &lmVector = m_parameter.GetParam("lmodel-file"); for(size_t i=0; i<lmVector.size(); i++) @@ -360,6 +377,7 @@ bool StaticData::LoadParameters(int argc, char* argv[]) bool oldFormat = (token.size() == 3); vector<FactorType> input = Tokenize<FactorType>(token[0], ",") ,output = Tokenize<FactorType>(token[1], ","); + m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], input, output); string filePath; size_t numFeatures = 1; if (oldFormat) @@ -536,6 +554,9 @@ void StaticData::LoadPhraseTables(bool filter //characteristics of the phrase table vector<FactorType> input = Tokenize<FactorType>(token[0], ",") ,output = Tokenize<FactorType>(token[1], ","); + m_maxFactorIdx[0] = CalcMax(m_maxFactorIdx[0], input); + m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], output); + m_maxNumFactors = std::max(m_maxFactorIdx[0], m_maxFactorIdx[1]) + 1; string filePath= token[3]; size_t noScoreComponent = Scan<size_t>(token[2]); // weights for this phrase dictionary diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h index bcd59ca11..38cf1a525 100755 --- a/moses/src/StaticData.h +++ b/moses/src/StaticData.h @@ -80,7 +80,6 @@ protected: std::vector<std::string> m_mySQLParam; InputOutput *m_inputOutput; bool m_fLMsLoaded, m_labeledNBestList; - size_t m_maxNgramOrderForFactor[MAX_NUM_FACTORS]; /*** * false = treat unknown words as unknowns, and translate them as themselves; * true = drop (ignore) them @@ -88,6 +87,7 @@ protected: bool m_dropUnknown; bool m_wordDeletionEnabled; + int m_inputType; unsigned m_numInputScores; @@ -102,7 +102,9 @@ protected: bool m_computeLMBackoffStats; mutable std::auto_ptr<SentenceStats> m_sentenceStats; - std::string m_factorDelimiter; + std::string m_factorDelimiter; //! by default, |, but it can be changed + size_t m_maxFactorIdx[2]; //! number of factors on source and target side + size_t m_maxNumFactors; //! max number of factors on both source and target sides public: StaticData(); @@ -299,4 +301,6 @@ public: bool UseDistortionFutureCosts() const {return m_useDistortionFutureCosts;} bool OnlyDistinctNBest() const {return m_onlyDistinctNBest;} const std::string& GetFactorDelimiter() const {return m_factorDelimiter;} + size_t GetMaxNumFactors(FactorDirection direction) const { return m_maxFactorIdx[(size_t)direction]+1; } + size_t GetMaxNumFactors() const { return m_maxNumFactors; } }; diff --git a/moses/src/TargetPhrase.cpp b/moses/src/TargetPhrase.cpp index 06fadff9b..ff1e2604d 100644 --- a/moses/src/TargetPhrase.cpp +++ b/moses/src/TargetPhrase.cpp @@ -114,9 +114,9 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const const size_t len = GetSize(); for (size_t currPos = 0 ; currPos < len ; currPos++) { - const FactorArray &inputWord = inputPhrase.GetFactorArray(currPos); - FactorArray &cloneWord = clone->GetFactorArray(currPos); - Word::Merge(cloneWord, inputWord); + const Word &inputWord = inputPhrase.GetWord(currPos); + Word &cloneWord = clone->GetWord(currPos); + cloneWord.Merge(inputWord); currWord++; } diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp index 64c971f5f..8a1bd9508 100644 --- a/moses/src/TranslationOptionCollection.cpp +++ b/moses/src/TranslationOptionCollection.cpp @@ -332,7 +332,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange( /** special handling of unknown words: add special translation (or drop) */ -void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourceWord, +void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord, size_t sourcePos , FactorCollection &factorCollection) { @@ -356,7 +356,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourc { // add to dictionary TargetPhrase targetPhrase(Output); - FactorArray &targetWord = targetPhrase.AddWord(); + Word &targetWord = targetPhrase.AddWord(); for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) { diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h index d8f9ef4db..141e8d6cf 100755 --- a/moses/src/TranslationOptionCollection.h +++ b/moses/src/TranslationOptionCollection.h @@ -36,6 +36,7 @@ class GenerationDictionary; class InputType; class LMList; class FactorMask; +class Word; typedef std::vector<const TranslationOption*> TranslationOptionList; @@ -70,7 +71,7 @@ protected: , size_t startPos, size_t endPos, bool observeTableLimit ); void ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection); - virtual void ProcessOneUnknownWord(const FactorArray &sourceWord + virtual void ProcessOneUnknownWord(const Word &sourceWord , size_t sourcePos , FactorCollection &factorCollection); diff --git a/moses/src/TranslationOptionCollectionConfusionNet.cpp b/moses/src/TranslationOptionCollectionConfusionNet.cpp index d1e506935..035212d84 100644 --- a/moses/src/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/src/TranslationOptionCollectionConfusionNet.cpp @@ -19,7 +19,7 @@ ProcessUnknownWord( size_t sourcePos ConfusionNet::Column const& coll=source.GetColumn(sourcePos); for(ConfusionNet::Column::const_iterator i=coll.begin();i!=coll.end();++i) - ProcessOneUnknownWord(i->first.GetFactorArray(),sourcePos,factorCollection); + ProcessOneUnknownWord(i->first,sourcePos,factorCollection); } diff --git a/moses/src/TranslationOptionCollectionText.cpp b/moses/src/TranslationOptionCollectionText.cpp index b456239a2..037c451e2 100644 --- a/moses/src/TranslationOptionCollectionText.cpp +++ b/moses/src/TranslationOptionCollectionText.cpp @@ -36,6 +36,6 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos , FactorCollection &factorCollection) { - const FactorArray &sourceWord = m_source.GetFactorArray(sourcePos); + const Word &sourceWord = m_source.GetWord(sourcePos); ProcessOneUnknownWord(sourceWord,sourcePos,factorCollection); } diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h index a896ded97..b67724369 100755 --- a/moses/src/TypeDef.h +++ b/moses/src/TypeDef.h @@ -81,8 +81,8 @@ const size_t MAX_NUM_FACTORS = 4; enum FactorDirection { - Input, - Output + Input, //! Source factors + Output //! Target factors }; enum DecodeType @@ -144,5 +144,3 @@ enum DictionaryFind // typedef typedef size_t FactorType; -class Factor; -typedef const Factor * FactorArray[MAX_NUM_FACTORS]; diff --git a/moses/src/Word.cpp b/moses/src/Word.cpp index d28542fe7..5c8384316 100755 --- a/moses/src/Word.cpp +++ b/moses/src/Word.cpp @@ -1,4 +1,5 @@ // $Id$ +// vim::tabstop=2 /*********************************************************************** Moses - factored phrase-based language decoder @@ -28,25 +29,23 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA using namespace std; Word::Word(const Word ©) -:FactorArrayWrapper() +#ifdef DYNAMIC_FACTOR_ARRAY +: m_factorArray(copy.m_factorArray) +#endif { // deep copy - m_factorArrayPtr = &m_factorArray; - Word::Copy(m_factorArray, copy.m_factorArray); +#ifndef DYNAMIC_FACTOR_ARRAY + memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray)); +#endif } Word::Word() +#ifdef DYNAMIC_FACTOR_ARRAY +: m_factorArray(MAX_NUM_FACTORS, 0) +#endif { - m_factorArrayPtr = &m_factorArray; - Word::Initialize(m_factorArray); -} - -Word::Word(const FactorArray &factorArray) -{ - m_factorArrayPtr = &m_factorArray; - for (size_t factor = 0 ; factor < MAX_NUM_FACTORS ; factor++) - { - m_factorArray[factor] = factorArray[factor]; - } +#ifndef DYNAMIC_FACTOR_ARRAY + memset(m_factorArray, 0, sizeof(FactorArray)); +#endif } Word::~Word() @@ -54,7 +53,7 @@ Word::~Word() } // static -int Word::Compare(const FactorArray &targetWord, const FactorArray &sourceWord) +int Word::Compare(const Word &targetWord, const Word &sourceWord) { for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++) { @@ -73,51 +72,20 @@ int Word::Compare(const FactorArray &targetWord, const FactorArray &sourceWord) } -void Word::Copy(FactorArray &target, const FactorArray &source) -{ - memcpy(target, source, sizeof(FactorArray)); -} - -void Word::Initialize(FactorArray &factorArray) -{ - memset(factorArray, 0, sizeof(FactorArray)); -} - -void Word::Merge(FactorArray &targetWord, const FactorArray &sourceWord) +void Word::Merge(const Word &sourceWord) { for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) { - const Factor *sourcefactor = sourceWord[currFactor] - ,*targetFactor = targetWord[currFactor]; + const Factor *sourcefactor = sourceWord.m_factorArray[currFactor] + ,*targetFactor = this ->m_factorArray[currFactor]; if (targetFactor == NULL && sourcefactor != NULL) { - targetWord[currFactor] = sourcefactor; - } - } -} - -std::string Word::ToString(const FactorArray &factorArray) -{ - stringstream strme; - - const std::string& factorDelimiter = StaticData::Instance()->GetFactorDelimiter(); - bool firstPass = true; - // TODO- don't loop over MAX_NUM_FACTORS here, just use the ones that - // actually participate in the xltn process. - for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) - { - const Factor *factor = factorArray[currFactor]; - if (factor != NULL) - { - if (firstPass) { firstPass = false; } else { strme << factorDelimiter; } - strme << *factor; + m_factorArray[currFactor] = sourcefactor; } } - strme << " "; - return strme.str(); } -std::string Word::ToString(const vector<FactorType> factorType, const FactorArray &factorArray) +std::string Word::ToString(const vector<FactorType> factorType) const { stringstream strme; assert(factorType.size() <= MAX_NUM_FACTORS); @@ -125,7 +93,7 @@ std::string Word::ToString(const vector<FactorType> factorType, const FactorArra bool firstPass = true; for (unsigned int i = 0 ; i < factorType.size() ; i++) { - const Factor *factor = factorArray[factorType[i]]; + const Factor *factor = m_factorArray[factorType[i]]; if (factor != NULL) { if (firstPass) { firstPass = false; } else { strme << factorDelimiter; } diff --git a/moses/src/Word.h b/moses/src/Word.h index 0bfb0c106..5cbf2bbcf 100755 --- a/moses/src/Word.h +++ b/moses/src/Word.h @@ -27,59 +27,68 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "TypeDef.h" #include "Factor.h" #include "Util.h" -#include "FactorArrayWrapper.h" + +#undef DYNAMIC_FACTOR_ARRAY class Phrase; /*** * hold a set of factors for a single word - * - * TODO either replace all uses of FactorArray with Word or vice versa; don't only use the wrapper in half of cases! */ -class Word : public FactorArrayWrapper +class Word { friend std::ostream& operator<<(std::ostream&, const Word&); protected: - FactorArray m_factorArray; + +#ifndef DYNAMIC_FACTOR_ARRAY + typedef const Factor * FactorArray[MAX_NUM_FACTORS]; +#else + typedef std::vector<const Factor*> FactorArray; +#endif + + FactorArray m_factorArray; public: /** * deep copy */ Word(const Word ©); - Word(const FactorArray &factorArray); Word(); ~Word(); + const Factor*& operator[](FactorType index) { + return m_factorArray[index]; + } - // why is this needed ? it should be inherited - const FactorArray &GetFactorArray() const - { - return m_factorArray; + const Factor * const & operator[](FactorType index) const { + return m_factorArray[index]; } - inline FactorArray &GetFactorArray() - { - return m_factorArray; + inline const Factor* GetFactor(FactorType factorType) const { + return m_factorArray[factorType]; } inline void SetFactor(FactorType factorType, const Factor *factor) { m_factorArray[factorType] = factor; } + void Merge(const Word &sourceWord); + + std::string ToString(const std::vector<FactorType> factorType) const; TO_STRING; /* static functions */ - // FactorArray - static void Copy(FactorArray &target, const FactorArray &source); - static void Initialize(FactorArray &factorArray); - /*** * wherever the source word has a given factor that the target word is missing, add it to the target word */ - static void Merge(FactorArray &targetWord, const FactorArray &sourceWord); - static std::string ToString(const FactorArray &factorArray); - static std::string ToString(const std::vector<FactorType> factorType, const FactorArray &factorArray); - static int Compare(const FactorArray &targetWord, const FactorArray &sourceWord); + static int Compare(const Word &targetWord, const Word &sourceWord); + + inline bool operator< (const Word &compare) const + { // needed to store word in GenerationDictionary map + // uses comparison of FactorKey + // 'proper' comparison, not address/id comparison + return Compare(*this, compare) < 0; + } + }; |