Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2006-10-05 22:50:21 +0400
committerhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2006-10-05 22:50:21 +0400
commit7a1414e4c13d99848ed5db90489d0668810531f0 (patch)
treea9598a8991a14615a4571b72fcd7c4a3ef1bf060 /moses
parentd9e67b804be0a5c6af484252077fd4e4ca28438a (diff)
added comments
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@861 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r--moses/src/PDTAimp.h2
-rwxr-xr-xmoses/src/Phrase.cpp45
-rwxr-xr-xmoses/src/Phrase.h69
-rwxr-xr-xmoses/src/TranslationOption.cpp4
-rwxr-xr-xmoses/src/TranslationOption.h4
5 files changed, 71 insertions, 53 deletions
diff --git a/moses/src/PDTAimp.h b/moses/src/PDTAimp.h
index 2e0d9af0d..adac3b9e4 100644
--- a/moses/src/PDTAimp.h
+++ b/moses/src/PDTAimp.h
@@ -375,7 +375,7 @@ public:
Range newRange(curr.begin(),curr.end()+1);
float newScore=curr.GetScore()+currCol[colidx].second; // CN score
Phrase newSrc(curr.src);
- if(!isEpsilon) newSrc.push_back(w);
+ if(!isEpsilon) newSrc.AddWord(w);
if(newRange.second<srcSize && newScore>LOWEST_SCORE)
{
// if there is more room to grow, add a new state onto the queue
diff --git a/moses/src/Phrase.cpp b/moses/src/Phrase.cpp
index fdd54d61d..3050e79f4 100755
--- a/moses/src/Phrase.cpp
+++ b/moses/src/Phrase.cpp
@@ -27,18 +27,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "memory.h"
#include "FactorCollection.h"
#include "Phrase.h"
-#include "Util.h" //malloc() replacement
#include "StaticData.h" // GetMaxNumFactors
using namespace std;
-// std::vector<mempool*> Phrase::s_memPool;
-
Phrase::Phrase(const Phrase &copy)
:m_direction(copy.m_direction)
,m_phraseSize(copy.m_phraseSize)
,m_arraySize(copy.m_arraySize)
-//,m_memPoolIndex(copy.m_memPoolIndex)
,m_words(copy.m_words)
{
}
@@ -46,15 +42,13 @@ Phrase::Phrase(const Phrase &copy)
Phrase& Phrase::operator=(const Phrase& x)
{
if(this!=&x)
- {
-
- m_direction=x.m_direction;
- m_phraseSize=x.m_phraseSize;
- m_arraySize=x.m_arraySize;
-// m_memPoolIndex=x.m_memPoolIndex;
+ {
+ m_direction=x.m_direction;
+ m_phraseSize=x.m_phraseSize;
+ m_arraySize=x.m_arraySize;
- m_words = x.m_words;
- }
+ m_words = x.m_words;
+ }
return *this;
}
@@ -63,19 +57,17 @@ Phrase::Phrase(FactorDirection direction)
: m_direction(direction)
, m_phraseSize(0)
, m_arraySize(ARRAY_SIZE_INCR)
-// , m_memPoolIndex(0)
, m_words(ARRAY_SIZE_INCR)
{
}
Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords)
:m_direction(direction)
-,m_phraseSize(mergeWords.size())
,m_words(mergeWords.size())
{
- for (size_t currPos = 0 ; currPos < m_phraseSize ; currPos++)
+ for (size_t currPos = 0 ; currPos < mergeWords.size() ; currPos++)
{
- m_words[currPos] = *mergeWords[currPos];
+ AddWord(*mergeWords[currPos]);
}
}
@@ -209,7 +201,7 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
, const string &phraseString
, FactorCollection &factorCollection
- , const string &factorDelimiter)
+ , const string &factorDelimiter)
{
vector< vector<string> > phraseVector = Parse(phraseString, factorOrder, factorDelimiter);
CreateFromString(factorOrder, phraseVector, factorCollection);
@@ -353,29 +345,10 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorTyp
void Phrase::InitializeMemPool()
{
-#if 0
- s_memPool.push_back( new mempool(1 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 50000 ));
- s_memPool.push_back( new mempool(2 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
- s_memPool.push_back( new mempool(3 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
- s_memPool.push_back( new mempool(4 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 100 ));
- s_memPool.push_back( new mempool(5 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 ));
- s_memPool.push_back( new mempool(6 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 ));
- s_memPool.push_back( new mempool(7 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 ));
-
- for (size_t i = 8 ; i < 30 ; ++i)
- s_memPool.push_back( new mempool(i * ARRAY_SIZE_INCR * sizeof(FactorArray) , 2 ));
-#endif
}
void Phrase::FinalizeMemPool()
{
-#if 0
- std::vector<mempool*>::iterator iter;
- for (iter = s_memPool.begin() ; iter != s_memPool.end() ; ++iter)
- {
- delete *iter;
- }
-#endif
}
TO_STRING_BODY(Phrase);
diff --git a/moses/src/Phrase.h b/moses/src/Phrase.h
index 4d0986098..b859df1ca 100755
--- a/moses/src/Phrase.h
+++ b/moses/src/Phrase.h
@@ -36,55 +36,91 @@ class Phrase
{
friend std::ostream& operator<<(std::ostream&, const Phrase&);
private:
-// static std::vector<mempool*> s_memPool;
- FactorDirection m_direction;
+ FactorDirection m_direction; /** Reusing Direction enum to really mean which language
+ Input = Source, Output = Target.
+ Not really used, but nice to know for debugging purposes
+ */
size_t m_phraseSize; //number of words
- size_t m_arraySize;
-// size_t m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
+ size_t m_arraySize; /** current size of vector m_words. This number is equal or bigger
+ than m_phraseSize. Used for faster allocation of m_word */
std::vector<Word> m_words;
public:
+ /** No longer does anything as not using mem pool for Phrase class anymore */
static void InitializeMemPool();
static void FinalizeMemPool();
+ /** copy constructor */
Phrase(const Phrase &copy);
Phrase& operator=(const Phrase&);
+ /** create empty phrase
+ * \param direction = language (Input = Source, Output = Target)
+ */
Phrase(FactorDirection direction);
+ /** create phrase from vectors of words */
Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords);
+ /** destructor */
virtual ~Phrase();
- static std::vector< std::vector<std::string> > Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter);
+ /** parse a string from phrase table or sentence input and create a 2D vector of strings
+ * \param phraseString string to parse
+ * \param factorOrder factors in the parse string. This argument is not fully used, only as a check to make ensure
+ * number of factors is what was promised
+ * \param factorDelimiter what char to use to separate factor strings from each other. Usually use '|'. Can be multi-char
+ */
+ static std::vector< std::vector<std::string> > Parse(
+ const std::string &phraseString
+ , const std::vector<FactorType> &factorOrder
+ , const std::string& factorDelimiter);
+ /** Fills phrase with words from 2D string vector
+ * \param factorOrder factor types of each element in 2D string vector
+ * \param phraseVector 2D string vector
+ */
void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::vector< std::vector<std::string> > &phraseVector
, FactorCollection &factorCollection);
+ /** Fills phrase with words from format string, typically from phrase table or sentence input
+ * \param factorOrder factor types of each element in 2D string vector
+ * \param phraseString formatted input string to parse
+ * \param factorDelimiter delimiter, as used by Parse()
+ */
void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &phraseString
, FactorCollection &factorCollection
, const std::string &factorDelimiter);
+ /** copy factors from the other phrase to this phrase.
+ IsCompatible() must be run beforehand to ensure incompatible factors aren't overwritten
+ */
void MergeFactors(const Phrase &copy);
//! copy a single factor (specified by factorType)
void MergeFactors(const Phrase &copy, FactorType factorType);
//! copy all factors specified in factorVec and none others
void MergeFactors(const Phrase &copy, const std::vector<FactorType>& factorVec);
- // must run IsCompatible() to ensure incompatible factors aren't being overwritten
+ /** compare 2 phrases to ensure no factors are lost if the phrases are merged
+ * must run IsCompatible() to ensure incompatible factors aren't being overwritten
+ */
bool IsCompatible(const Phrase &inputPhrase) const;
bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const;
bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const;
-
+
+ //! really means what language. Input = Source, Output = Target
inline FactorDirection GetDirection() const
{
return m_direction;
}
+ //! number of words
inline size_t GetSize() const
{
return m_phraseSize;
}
+
+ //! word at a particular position
inline const Word &GetWord(size_t pos) const
{
return m_words[pos];
@@ -93,6 +129,7 @@ public:
{
return m_words[pos];
}
+ //! particular factor at a particular position
inline const Factor *GetFactor(size_t pos, FactorType factorType) const
{
const Word &ptr = m_words[pos];
@@ -104,22 +141,28 @@ public:
ptr[factorType] = factor;
}
+ //! whether the 2D vector is a substring of this phrase
bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
, const std::vector<FactorType> &inputFactor) const;
+ //! create an empty word at the end of the phrase
Word &AddWord();
-
+ //! create copy of input word at the end of the phrase
+ void AddWord(const Word &newWord)
+ {
+ AddWord() = newWord;
+ }
+ //! create new phrase class that is a substring of this phrase
Phrase GetSubString(const WordsRange &wordsRange) const;
+ //! return a string rep of the phrase. Each factor is separated by the factor delimiter as specified in StaticData class
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
- void push_back(Word const& w) {
- AddWord() = w;
- }
-
TO_STRING;
- // used to insert & find phrase in dictionary
+ /** transitive comparison between 2 phrases
+ * used to insert & find phrase in dictionary
+ */
bool operator< (const Phrase &compare) const;
};
diff --git a/moses/src/TranslationOption.cpp b/moses/src/TranslationOption.cpp
index 0ca55411f..7fbb73849 100755
--- a/moses/src/TranslationOption.cpp
+++ b/moses/src/TranslationOption.cpp
@@ -33,13 +33,11 @@ using namespace std;
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase)
: m_targetPhrase(targetPhrase),m_sourcePhrase(targetPhrase.GetSourcePhrase())
,m_sourceWordsRange (wordsRange)
-{ // used by initial translation step
-
+{
// set score
m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
}
-// used to create trans opt from unknown word
//TODO this should be a factory function!
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int /*whatever*/)
: m_targetPhrase(targetPhrase)
diff --git a/moses/src/TranslationOption.h b/moses/src/TranslationOption.h
index d119500f9..7a1971d4d 100755
--- a/moses/src/TranslationOption.h
+++ b/moses/src/TranslationOption.h
@@ -70,7 +70,9 @@ protected:
ScoreComponentCollection2 m_scoreBreakdown;
public:
+ /** constructor. Used by initial translation step */
TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase);
+ /** constructor. Used to create trans opt from unknown word */
TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int);
/** used by initial translation step */
@@ -100,6 +102,7 @@ public:
return m_sourcePhrase;
}
+ /** whether source span overlaps with those of a hypothesis */
bool Overlap(const Hypothesis &hypothesis) const;
/** return start index of source phrase */
@@ -143,6 +146,7 @@ public:
return m_scoreBreakdown;
}
+ /** Calculate future score and n-gram score of this trans option, plus the score breakdowns */
void CalcScore();
TO_STRING;