Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2006-10-05 13:45:49 +0400
committerhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2006-10-05 13:45:49 +0400
commitefd3ada355fe31a24b0f59d7c7431071df606e69 (patch)
treeb3526a057542db99966ebe91de79e9e7d1a6eb23 /moses
parente5231db5477d226c34db68a15d79b74fcde306f5 (diff)
added comments
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@857 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r--moses/src/DecodeStep.h2
-rw-r--r--moses/src/DecodeStepGeneration.cpp2
-rw-r--r--moses/src/DecodeStepGeneration.h2
-rw-r--r--moses/src/DecodeStepTranslation.cpp4
-rw-r--r--moses/src/DecodeStepTranslation.h2
-rw-r--r--moses/src/TranslationOptionCollection.cpp163
-rwxr-xr-xmoses/src/TranslationOptionCollection.h26
7 files changed, 118 insertions, 83 deletions
diff --git a/moses/src/DecodeStep.h b/moses/src/DecodeStep.h
index d31e3cb54..b6dae8c1b 100644
--- a/moses/src/DecodeStep.h
+++ b/moses/src/DecodeStep.h
@@ -102,6 +102,6 @@ public:
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
- , bool observeTableLimit) const = 0;
+ , bool adhereTableLimit) const = 0;
};
diff --git a/moses/src/DecodeStepGeneration.cpp b/moses/src/DecodeStepGeneration.cpp
index 998e6dc98..0677424a2 100644
--- a/moses/src/DecodeStepGeneration.cpp
+++ b/moses/src/DecodeStepGeneration.cpp
@@ -79,7 +79,7 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
- , bool observeTableLimit) const
+ , bool adhereTableLimit) const
{
//TRACE_ERR(inputPartialTranslOpt << endl);
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0)
diff --git a/moses/src/DecodeStepGeneration.h b/moses/src/DecodeStepGeneration.h
index 66adc5f8f..f4ab90761 100644
--- a/moses/src/DecodeStepGeneration.h
+++ b/moses/src/DecodeStepGeneration.h
@@ -40,7 +40,7 @@ public:
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
- , bool observeTableLimit) const;
+ , bool adhereTableLimit) const;
private:
TranslationOption *MergeGeneration(const TranslationOption& oldTO, Phrase &mergePhrase
diff --git a/moses/src/DecodeStepTranslation.cpp b/moses/src/DecodeStepTranslation.cpp
index b0517dde0..1095f0024 100644
--- a/moses/src/DecodeStepTranslation.cpp
+++ b/moses/src/DecodeStepTranslation.cpp
@@ -53,7 +53,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
- , bool observeTableLimit) const
+ , bool adhereTableLimit) const
{
//TRACE_ERR(inputPartialTranslOpt << endl);
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0)
@@ -75,7 +75,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
if (phraseColl != NULL)
{
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
- iterEnd = (!observeTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
+ iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
for (iterTargetPhrase = phraseColl->begin(); iterTargetPhrase != iterEnd; ++iterTargetPhrase)
{
diff --git a/moses/src/DecodeStepTranslation.h b/moses/src/DecodeStepTranslation.h
index 86c1e250b..218892ddc 100644
--- a/moses/src/DecodeStepTranslation.h
+++ b/moses/src/DecodeStepTranslation.h
@@ -39,7 +39,7 @@ public:
, PartialTranslOptColl &outputPartialTranslOptColl
, FactorCollection &factorCollection
, TranslationOptionCollection *toc
- , bool observeTableLimit) const;
+ , bool adhereTableLimit) const;
private:
TranslationOption *MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const;
};
diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp
index 8a1bd9508..80edc0afd 100644
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@@ -33,7 +33,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
-/** constructor; since translation options are indexed by coverage span, the corresponding data structure is initialized here */
+/** constructor; since translation options are indexed by coverage span, the corresponding data structure is initialized here
+ * This fn should be called by inherited classes
+*/
TranslationOptionCollection::TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage)
: m_source(src)
,m_futureScore(src.GetSize())
@@ -108,6 +110,22 @@ void TranslationOptionCollection::Prune()
<< "Total translation options pruned: " << totalPruned << std::endl);
}
+/** Force a creation of a translation option where there are none for a particular source position.
+* ie. where a source word has not been translated, create a translation option by
+* 1. not observing the table limits on phrase/generation tables
+* 2. using the handler ProcessUnknownWord()
+* Call this function once translation option collection has been filled with translation options
+*
+* This function calls for unknown words is complicated by the fact it must handle different input types.
+* The call stack is
+* Base::ProcessUnknownWord()
+* Inherited::ProcessUnknownWord()
+* Base::ProcessOneUnknownWord()
+*
+* \param decodeStepList list of decoding steps
+* \param factorCollection input sentence with all factors
+*/
+
void TranslationOptionCollection::ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection)
{
size_t size = m_source.GetSize();
@@ -147,11 +165,75 @@ void TranslationOptionCollection::ProcessUnknownWord(const std::list < DecodeSte
}
}
-/** compute the future score matrix used in search */
-void TranslationOptionCollection::CalcFutureScore()
+/** special handling of ONE unknown words. Either add temporarily add word to translation table,
+ * or drop the translation.
+ * This function should be called by the ProcessOneUnknownWord() in the inherited class
+ * At the moment, this unknown word handler is a bit of a hack, if copies over each factor from source
+ * to target word, or uses the 'UNK' factor.
+ * Ideally, this function should be in a class which can be expanded upon, for example,
+ * to create a morphologically aware handler.
+ *
+ * \param sourceWord the unknown word
+ * \param sourcePos
+ * \param factorCollection input sentence with all factors
+ */
+void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,
+ size_t sourcePos
+ , FactorCollection &factorCollection)
{
- // create future score matrix in a dynamic programming fashion
+ // unknown word, add as trans opt
+
+ size_t isDigit = 0;
+ if (StaticData::Instance()->GetDropUnknown())
+ {
+ const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
+ std::string s = f->ToString();
+ isDigit = s.find_first_of("0123456789");
+ if (isDigit == string::npos)
+ isDigit = 0;
+ else
+ isDigit = 1;
+ // modify the starting bitmap
+ }
+
+ TranslationOption *transOpt;
+ if (! StaticData::Instance()->GetDropUnknown() || isDigit)
+ {
+ // add to dictionary
+ TargetPhrase targetPhrase(Output);
+ Word &targetWord = targetPhrase.AddWord();
+
+ for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
+ {
+ FactorType factorType = static_cast<FactorType>(currFactor);
+
+ const Factor *sourceFactor = sourceWord[currFactor];
+ if (sourceFactor == NULL)
+ targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
+ else
+ targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
+ }
+
+ targetPhrase.SetScore();
+
+ transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
+ }
+ else
+ { // drop source word. create blank trans opt
+ const TargetPhrase targetPhrase(Output);
+ transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
+ }
+ transOpt->CalcScore();
+ Add(transOpt);
+}
+
+/** compute future score matrix in a dynamic programming fashion.
+ * This matrix used in search.
+ * Call this function once translation option collection has been filled with translation options
+*/
+void TranslationOptionCollection::CalcFutureScore()
+{
// setup the matrix (ignore lower triangle, set upper triangle to -inf
size_t size = m_source.GetSize(); // the width of the matrix
@@ -260,19 +342,20 @@ void TranslationOptionCollection::CreateTranslationOptions(const list < DecodeSt
CalcFutureScore();
}
-/** subroutine for CreateTranslationOptions: collect translation options
- * that exactly cover a specific input span
+/** collect translation options that exactly cover a specific input span.
+ * Called by CreateTranslationOptions() and ProcessUnknownWord()
* \param decodeStepList list of decoding steps
* \param factorCollection input sentence with all factors
* \param startPos first position in input sentence
* \param lastPos last position in input sentence
+ * \param adhereTableLimit whether phrase & generation table limits are adhered to
*/
void TranslationOptionCollection::CreateTranslationOptionsForRange(
const list < DecodeStep* > &decodeStepList
, FactorCollection &factorCollection
, size_t startPos
, size_t endPos
- , bool observeTableLimit)
+ , bool adhereTableLimit)
{
// partial trans opt stored in here
PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
@@ -283,7 +366,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
ProcessInitialTranslation(decodeStep, factorCollection
, *oldPtoc
- , startPos, endPos, observeTableLimit );
+ , startPos, endPos, adhereTableLimit );
// do rest of decode steps
size_t totalEarlyPruned = 0;
@@ -304,7 +387,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
, *newPtoc
, factorCollection
, this
- , observeTableLimit);
+ , adhereTableLimit);
}
// last but 1 partial trans not required anymore
totalEarlyPruned += newPtoc->GetPrunedCount();
@@ -330,68 +413,16 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
// cerr << "Early translation options pruned: " << totalEarlyPruned << endl;
}
-
-/** special handling of unknown words: add special translation (or drop) */
-void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,
- size_t sourcePos
- , FactorCollection &factorCollection)
-{
- // unknown word, add as trans opt
-
- size_t isDigit = 0;
- if (StaticData::Instance()->GetDropUnknown())
- {
- const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
- std::string s = f->ToString();
- isDigit = s.find_first_of("0123456789");
- if (isDigit == string::npos)
- isDigit = 0;
- else
- isDigit = 1;
- // modify the starting bitmap
- }
-
- TranslationOption *transOpt;
- if (! StaticData::Instance()->GetDropUnknown() || isDigit)
- {
- // add to dictionary
- TargetPhrase targetPhrase(Output);
- Word &targetWord = targetPhrase.AddWord();
-
- for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++)
- {
- FactorType factorType = static_cast<FactorType>(currFactor);
-
- const Factor *sourceFactor = sourceWord[currFactor];
- if (sourceFactor == NULL)
- targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
- else
- targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
- }
-
- targetPhrase.SetScore();
-
- transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
- }
- else
- { // drop source word. create blank trans opt
- const TargetPhrase targetPhrase(Output);
- transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos), targetPhrase, 0);
- }
-
- transOpt->CalcScore();
- Add(transOpt);
-}
-
-
-/** initialize list of partial translation options by applying the first translation step */
+/** initialize list of partial translation options by applying the first translation step
+ * Ideally, this function should be in DecodeStepTranslation class
+ */
void TranslationOptionCollection::ProcessInitialTranslation(
const DecodeStep &decodeStep
, FactorCollection &factorCollection
, PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos
, size_t endPos
- , bool observeTableLimit)
+ , bool adhereTableLimit)
{
const PhraseDictionaryBase &phraseDictionary = decodeStep.GetPhraseDictionary();
const size_t tableLimit = phraseDictionary.GetTableLimit();
@@ -403,7 +434,7 @@ void TranslationOptionCollection::ProcessInitialTranslation(
VERBOSE(3,"[" << m_source.GetSubString(wordsRange) << "; " << startPos << "-" << endPos << "]\n");
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
- iterEnd = (!observeTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
+ iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase)
{
diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h
index 141e8d6cf..b71e7094e 100755
--- a/moses/src/TranslationOptionCollection.h
+++ b/moses/src/TranslationOptionCollection.h
@@ -40,7 +40,7 @@ class Word;
typedef std::vector<const TranslationOption*> TranslationOptionList;
-/** Contains all phrase translations applicable to current sentence.
+/** Contains all phrase translations applicable to current input type (a sentence or confusion network).
* A key insight into efficient decoding is that various input
* conditions (lattices, factored input, normal text, xml markup)
* all lead to the same decoding algorithm: hypotheses are expanded
@@ -48,7 +48,11 @@ typedef std::vector<const TranslationOption*> TranslationOptionList;
*
* The precomputation of a collection of instances of such TranslationOption
* depends on the input condition, but they all are presented to
- * decoding algorithm in the same form, using this class. **/
+ * decoding algorithm in the same form, using this class.
+ *
+ * This class cannot, and should not be instantiated directly. Instantiate 1 of the inherited
+ * classes instead, for a particular input type
+ **/
class TranslationOptionCollection
{
@@ -56,9 +60,9 @@ class TranslationOptionCollection
TranslationOptionCollection(const TranslationOptionCollection&); /*< no copy constructor */
protected:
std::vector< std::vector< TranslationOptionList > > m_collection; /*< contains translation options */
- InputType const &m_source;
- SquareMatrix m_futureScore; /*< matrix of future costs for parts of the sentence */
- const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span (phrase) */
+ InputType const &m_source; /*< reference to the input */
+ SquareMatrix m_futureScore; /*< matrix of future costs for contiguous parts (span) of the input */
+ const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span (phrase???) */
FactorCollection *m_factorCollection;
TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage);
@@ -68,7 +72,7 @@ protected:
virtual void ProcessInitialTranslation(const DecodeStep &decodeStep
, FactorCollection &factorCollection
, PartialTranslOptColl &outputPartialTranslOptColl
- , size_t startPos, size_t endPos, bool observeTableLimit );
+ , size_t startPos, size_t endPos, bool adhereTableLimit );
void ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection);
virtual void ProcessOneUnknownWord(const Word &sourceWord
@@ -86,7 +90,10 @@ protected:
return m_collection[startPos][endPos - startPos];
}
void Add(const TranslationOption *translationOption);
-
+
+ virtual void ProcessUnknownWord(size_t sourcePos
+ , FactorCollection &factorCollection)=0;
+
public:
virtual ~TranslationOptionCollection();
const InputType& GetSource() const { return m_source; }
@@ -94,9 +101,6 @@ public:
// get length/size of source input
size_t GetSize() const;
- virtual void ProcessUnknownWord(size_t sourcePos
- , FactorCollection &factorCollection)=0;
-
virtual void CreateTranslationOptions(const std::list < DecodeStep* > &decodeStepList
, FactorCollection &factorCollection);
@@ -104,7 +108,7 @@ public:
, FactorCollection &factorCollection
, size_t startPosition
, size_t endPosition
- , bool observeTableLimit);
+ , bool adhereTableLimit);
/** returns future cost matrix for sentence */
inline virtual const SquareMatrix &GetFutureScore() const