diff options
author | hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230> | 2006-10-05 22:50:21 +0400 |
---|---|---|
committer | hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230> | 2006-10-05 22:50:21 +0400 |
commit | 7a1414e4c13d99848ed5db90489d0668810531f0 (patch) | |
tree | a9598a8991a14615a4571b72fcd7c4a3ef1bf060 /moses | |
parent | d9e67b804be0a5c6af484252077fd4e4ca28438a (diff) |
added comments
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@861 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r-- | moses/src/PDTAimp.h | 2 | ||||
-rwxr-xr-x | moses/src/Phrase.cpp | 45 | ||||
-rwxr-xr-x | moses/src/Phrase.h | 69 | ||||
-rwxr-xr-x | moses/src/TranslationOption.cpp | 4 | ||||
-rwxr-xr-x | moses/src/TranslationOption.h | 4 |
5 files changed, 71 insertions, 53 deletions
diff --git a/moses/src/PDTAimp.h b/moses/src/PDTAimp.h index 2e0d9af0d..adac3b9e4 100644 --- a/moses/src/PDTAimp.h +++ b/moses/src/PDTAimp.h @@ -375,7 +375,7 @@ public: Range newRange(curr.begin(),curr.end()+1); float newScore=curr.GetScore()+currCol[colidx].second; // CN score Phrase newSrc(curr.src); - if(!isEpsilon) newSrc.push_back(w); + if(!isEpsilon) newSrc.AddWord(w); if(newRange.second<srcSize && newScore>LOWEST_SCORE) { // if there is more room to grow, add a new state onto the queue diff --git a/moses/src/Phrase.cpp b/moses/src/Phrase.cpp index fdd54d61d..3050e79f4 100755 --- a/moses/src/Phrase.cpp +++ b/moses/src/Phrase.cpp @@ -27,18 +27,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "memory.h" #include "FactorCollection.h" #include "Phrase.h" -#include "Util.h" //malloc() replacement #include "StaticData.h" // GetMaxNumFactors using namespace std; -// std::vector<mempool*> Phrase::s_memPool; - Phrase::Phrase(const Phrase ©) :m_direction(copy.m_direction) ,m_phraseSize(copy.m_phraseSize) ,m_arraySize(copy.m_arraySize) -//,m_memPoolIndex(copy.m_memPoolIndex) ,m_words(copy.m_words) { } @@ -46,15 +42,13 @@ Phrase::Phrase(const Phrase ©) Phrase& Phrase::operator=(const Phrase& x) { if(this!=&x) - { - - m_direction=x.m_direction; - m_phraseSize=x.m_phraseSize; - m_arraySize=x.m_arraySize; -// m_memPoolIndex=x.m_memPoolIndex; + { + m_direction=x.m_direction; + m_phraseSize=x.m_phraseSize; + m_arraySize=x.m_arraySize; - m_words = x.m_words; - } + m_words = x.m_words; + } return *this; } @@ -63,19 +57,17 @@ Phrase::Phrase(FactorDirection direction) : m_direction(direction) , m_phraseSize(0) , m_arraySize(ARRAY_SIZE_INCR) -// , m_memPoolIndex(0) , m_words(ARRAY_SIZE_INCR) { } Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords) :m_direction(direction) -,m_phraseSize(mergeWords.size()) ,m_words(mergeWords.size()) { - for (size_t currPos = 0 ; currPos < m_phraseSize ; currPos++) + for (size_t currPos = 0 ; currPos < mergeWords.size() ; currPos++) { - m_words[currPos] = *mergeWords[currPos]; + AddWord(*mergeWords[currPos]); } } @@ -209,7 +201,7 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder , const string &phraseString , FactorCollection &factorCollection - , const string &factorDelimiter) + , const string &factorDelimiter) { vector< vector<string> > phraseVector = Parse(phraseString, factorOrder, factorDelimiter); CreateFromString(factorOrder, phraseVector, factorCollection); @@ -353,29 +345,10 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorTyp void Phrase::InitializeMemPool() { -#if 0 - s_memPool.push_back( new mempool(1 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 50000 )); - s_memPool.push_back( new mempool(2 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 )); - s_memPool.push_back( new mempool(3 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 )); - s_memPool.push_back( new mempool(4 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 100 )); - s_memPool.push_back( new mempool(5 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 )); - s_memPool.push_back( new mempool(6 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 )); - s_memPool.push_back( new mempool(7 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 )); - - for (size_t i = 8 ; i < 30 ; ++i) - s_memPool.push_back( new mempool(i * ARRAY_SIZE_INCR * sizeof(FactorArray) , 2 )); -#endif } void Phrase::FinalizeMemPool() { -#if 0 - std::vector<mempool*>::iterator iter; - for (iter = s_memPool.begin() ; iter != s_memPool.end() ; ++iter) - { - delete *iter; - } -#endif } TO_STRING_BODY(Phrase); diff --git a/moses/src/Phrase.h b/moses/src/Phrase.h index 4d0986098..b859df1ca 100755 --- a/moses/src/Phrase.h +++ b/moses/src/Phrase.h @@ -36,55 +36,91 @@ class Phrase { friend std::ostream& operator<<(std::ostream&, const Phrase&); private: -// static std::vector<mempool*> s_memPool; - FactorDirection m_direction; + FactorDirection m_direction; /** Reusing Direction enum to really mean which language + Input = Source, Output = Target. + Not really used, but nice to know for debugging purposes + */ size_t m_phraseSize; //number of words - size_t m_arraySize; -// size_t m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated? + size_t m_arraySize; /** current size of vector m_words. This number is equal or bigger + than m_phraseSize. Used for faster allocation of m_word */ std::vector<Word> m_words; public: + /** No longer does anything as not using mem pool for Phrase class anymore */ static void InitializeMemPool(); static void FinalizeMemPool(); + /** copy constructor */ Phrase(const Phrase ©); Phrase& operator=(const Phrase&); + /** create empty phrase + * \param direction = language (Input = Source, Output = Target) + */ Phrase(FactorDirection direction); + /** create phrase from vectors of words */ Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords); + /** destructor */ virtual ~Phrase(); - static std::vector< std::vector<std::string> > Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter); + /** parse a string from phrase table or sentence input and create a 2D vector of strings + * \param phraseString string to parse + * \param factorOrder factors in the parse string. This argument is not fully used, only as a check to make ensure + * number of factors is what was promised + * \param factorDelimiter what char to use to separate factor strings from each other. Usually use '|'. Can be multi-char + */ + static std::vector< std::vector<std::string> > Parse( + const std::string &phraseString + , const std::vector<FactorType> &factorOrder + , const std::string& factorDelimiter); + /** Fills phrase with words from 2D string vector + * \param factorOrder factor types of each element in 2D string vector + * \param phraseVector 2D string vector + */ void CreateFromString(const std::vector<FactorType> &factorOrder , const std::vector< std::vector<std::string> > &phraseVector , FactorCollection &factorCollection); + /** Fills phrase with words from format string, typically from phrase table or sentence input + * \param factorOrder factor types of each element in 2D string vector + * \param phraseString formatted input string to parse + * \param factorDelimiter delimiter, as used by Parse() + */ void CreateFromString(const std::vector<FactorType> &factorOrder , const std::string &phraseString , FactorCollection &factorCollection , const std::string &factorDelimiter); + /** copy factors from the other phrase to this phrase. + IsCompatible() must be run beforehand to ensure incompatible factors aren't overwritten + */ void MergeFactors(const Phrase ©); //! copy a single factor (specified by factorType) void MergeFactors(const Phrase ©, FactorType factorType); //! copy all factors specified in factorVec and none others void MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec); - // must run IsCompatible() to ensure incompatible factors aren't being overwritten + /** compare 2 phrases to ensure no factors are lost if the phrases are merged + * must run IsCompatible() to ensure incompatible factors aren't being overwritten + */ bool IsCompatible(const Phrase &inputPhrase) const; bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const; bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const; - + + //! really means what language. Input = Source, Output = Target inline FactorDirection GetDirection() const { return m_direction; } + //! number of words inline size_t GetSize() const { return m_phraseSize; } + + //! word at a particular position inline const Word &GetWord(size_t pos) const { return m_words[pos]; @@ -93,6 +129,7 @@ public: { return m_words[pos]; } + //! particular factor at a particular position inline const Factor *GetFactor(size_t pos, FactorType factorType) const { const Word &ptr = m_words[pos]; @@ -104,22 +141,28 @@ public: ptr[factorType] = factor; } + //! whether the 2D vector is a substring of this phrase bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector , const std::vector<FactorType> &inputFactor) const; + //! create an empty word at the end of the phrase Word &AddWord(); - + //! create copy of input word at the end of the phrase + void AddWord(const Word &newWord) + { + AddWord() = newWord; + } + //! create new phrase class that is a substring of this phrase Phrase GetSubString(const WordsRange &wordsRange) const; + //! return a string rep of the phrase. Each factor is separated by the factor delimiter as specified in StaticData class std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; - void push_back(Word const& w) { - AddWord() = w; - } - TO_STRING; - // used to insert & find phrase in dictionary + /** transitive comparison between 2 phrases + * used to insert & find phrase in dictionary + */ bool operator< (const Phrase &compare) const; }; diff --git a/moses/src/TranslationOption.cpp b/moses/src/TranslationOption.cpp index 0ca55411f..7fbb73849 100755 --- a/moses/src/TranslationOption.cpp +++ b/moses/src/TranslationOption.cpp @@ -33,13 +33,11 @@ using namespace std; TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase) : m_targetPhrase(targetPhrase),m_sourcePhrase(targetPhrase.GetSourcePhrase()) ,m_sourceWordsRange (wordsRange) -{ // used by initial translation step - +{ // set score m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown()); } -// used to create trans opt from unknown word //TODO this should be a factory function! TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int /*whatever*/) : m_targetPhrase(targetPhrase) diff --git a/moses/src/TranslationOption.h b/moses/src/TranslationOption.h index d119500f9..7a1971d4d 100755 --- a/moses/src/TranslationOption.h +++ b/moses/src/TranslationOption.h @@ -70,7 +70,9 @@ protected: ScoreComponentCollection2 m_scoreBreakdown; public: + /** constructor. Used by initial translation step */ TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase); + /** constructor. Used to create trans opt from unknown word */ TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int); /** used by initial translation step */ @@ -100,6 +102,7 @@ public: return m_sourcePhrase; } + /** whether source span overlaps with those of a hypothesis */ bool Overlap(const Hypothesis &hypothesis) const; /** return start index of source phrase */ @@ -143,6 +146,7 @@ public: return m_scoreBreakdown; } + /** Calculate future score and n-gram score of this trans option, plus the score breakdowns */ void CalcScore(); TO_STRING; |