Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2007-07-20 17:03:12 +0400
committerhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2007-07-20 17:03:12 +0400
commitc9563bdcc879b542ad56f4df488cdb8bf13a2eaf (patch)
tree91103c61d1dbd8fc17687fa36000c8083eff69a5 /moses
parentfb606c3bf37df45aa5239d7f3302e1337b9c62fd (diff)
cache for translation options for speed-up. default to true.
only works for sentence input git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1429 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rwxr-xr-xmoses/src/Parameter.cpp1
-rwxr-xr-xmoses/src/StaticData.cpp37
-rwxr-xr-xmoses/src/StaticData.h15
-rwxr-xr-xmoses/src/TranslationOption.cpp12
-rwxr-xr-xmoses/src/TranslationOption.h3
-rw-r--r--moses/src/TranslationOptionCollection.cpp104
-rwxr-xr-xmoses/src/TranslationOptionCollection.h2
-rwxr-xr-xmoses/src/TypeDef.h4
8 files changed, 142 insertions, 36 deletions
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
index 2d7b8bf9c..ef6e3986a 100755
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@@ -76,6 +76,7 @@ Parameter::Parameter()
AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'");
AddParam("mbr-scale", "scaling factor to convert log linear score into a probability.");
AddParam("decoder-type", "MAP/MBR decoder (default=MAP=0)");
+ AddParam("use-persistent-cache", "cache translation options across sentences (default=true)");
}
Parameter::~Parameter()
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index fb2c68b5a..74f2f6e17 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -133,7 +133,18 @@ bool StaticData::LoadData(Parameter *parameter)
// print all factors of output translations
SetBooleanParameter( &m_reportAllFactors, "report-all-factors", false );
-
+
+ //
+ if (m_inputType == SentenceInput)
+ {
+ SetBooleanParameter( &m_useTransOptCache, "use-persistent-cache", false );
+ }
+ else
+ {
+ m_useTransOptCache = false;
+ }
+
+
//input factors
const vector<string> &inputFactorVector = m_parameter->GetParam("input-factors");
for(size_t i=0; i<inputFactorVector.size(); i++)
@@ -285,6 +296,19 @@ StaticData::~StaticData()
}
}
+ // delete trans opt
+ map<Phrase, std::vector<TranslationOption*> >::iterator iterCache;
+ for (iterCache = m_transOptCache.begin() ; iterCache != m_transOptCache.end() ; ++iterCache)
+ {
+ TranslationOptionList &transOptList = iterCache->second;
+
+ TranslationOptionList::iterator iterTransOpt;
+ for (iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt)
+ {
+ delete *iterTransOpt;
+ }
+ }
+
RemoveAllInColl(m_reorderModels);
// small score producers
@@ -823,3 +847,14 @@ void StaticData::SetWeightsForScoreProducer(const ScoreProducer* sp, const std::
for (size_t i = begin; i < end; i++)
m_allWeights[i] = *weightIter++;
}
+
+const TranslationOptionList* StaticData::FindTransOptListInCache(const Phrase &sourcePhrase) const
+{
+ std::map<Phrase, TranslationOptionList>::const_iterator iter
+ = m_transOptCache.find(sourcePhrase);
+ if (iter == m_transOptCache.end())
+ return NULL;
+
+ return &(iter->second);
+}
+
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index 14c805523..4c1c51551 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -111,6 +111,10 @@ protected:
XmlInputType m_xmlInputType; //! method for handling sentence XML input
DecoderType m_decoderType; //! MAP or MBR decoder
+
+ bool m_useTransOptCache;
+ mutable std::map<Phrase, TranslationOptionList> m_transOptCache;
+
float m_mbrScale; //! Scaling factor for computing marginal probability of candidate translation
//! constructor. only the 1 static variable can be created
StaticData();
@@ -134,6 +138,7 @@ protected:
bool LoadLexicalReorderingModel();
public:
+
//! destructor
~StaticData();
//! return static instance for use like global variable
@@ -337,5 +342,13 @@ public:
float GetMBRScale() const {return m_mbrScale;}
XmlInputType GetXmlInputType() const { return m_xmlInputType; }
-
+
+ bool GetUseTransOptCache() const { return m_useTransOptCache; }
+
+ void AddTransOptListToCache(const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const
+ {
+ m_transOptCache[sourcePhrase] = transOptList;
+ }
+
+ const TranslationOptionList* FindTransOptListInCache(const Phrase &sourcePhrase) const;
};
diff --git a/moses/src/TranslationOption.cpp b/moses/src/TranslationOption.cpp
index b9751bb75..374907b74 100755
--- a/moses/src/TranslationOption.cpp
+++ b/moses/src/TranslationOption.cpp
@@ -89,6 +89,18 @@ TranslationOption::TranslationOption(const TranslationOption &copy)
, m_reordering(copy.m_reordering)
{}
+TranslationOption::TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange)
+: m_targetPhrase(copy.m_targetPhrase)
+//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
+, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input) : new Phrase(*copy.m_sourcePhrase))
+, m_sourceWordsRange(sourceWordsRange)
+, m_totalScore(copy.m_totalScore)
+, m_futureScore(copy.m_futureScore)
+, m_partialScore(copy.m_partialScore)
+, m_scoreBreakdown(copy.m_scoreBreakdown)
+, m_reordering(copy.m_reordering)
+{}
+
void TranslationOption::MergeNewFeatures(const Phrase& phrase, const ScoreComponentCollection& score, const std::vector<FactorType>& featuresToAdd)
{
assert(phrase.GetSize() == m_targetPhrase.GetSize());
diff --git a/moses/src/TranslationOption.h b/moses/src/TranslationOption.h
index cc74f12a0..a26b3d7a1 100755
--- a/moses/src/TranslationOption.h
+++ b/moses/src/TranslationOption.h
@@ -83,6 +83,9 @@ public:
/** copy constructor */
TranslationOption(const TranslationOption &copy);
+ /** copy constructor, but change words range. used by caching */
+ TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange);
+
~TranslationOption()
{
delete m_sourcePhrase;
diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp
index 83dd0e608..acc31dedf 100644
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@@ -56,7 +56,6 @@ TranslationOptionCollection::TranslationOptionCollection(InputType const& src, s
/** destructor, clears out data structures */
TranslationOptionCollection::~TranslationOptionCollection()
{
- // delete all trans opt
size_t size = m_source.GetSize();
for (size_t startPos = 0 ; startPos < size ; ++startPos)
{
@@ -65,6 +64,7 @@ TranslationOptionCollection::~TranslationOptionCollection()
RemoveAllInColl(GetTranslationOptionList(startPos, endPos));
}
}
+
RemoveAllInColl(m_unksrcs);
}
@@ -352,31 +352,47 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
, size_t startPos
, size_t endPos
, bool adhereTableLimit)
-{
-
- if ((StaticData::Instance().GetXmlInputType() != XmlPassThrough) && HasXmlOptionsOverlappingRange(startPos,endPos))
+{
+ if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
{
-
- CreateXmlOptionsForRange(startPos, endPos);
+ Phrase *sourcePhrase = NULL; // can't initialise with substring, in case it's confusion network
- }
-
- if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
- {
- // partial trans opt stored in here
- PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
- size_t totalEarlyPruned = 0;
-
- // initial translation step
- list <const DecodeStep* >::const_iterator iterStep = decodeStepList.begin();
- const DecodeStep &decodeStep = **iterStep;
+ // consult persistent (cross-sentence) cache for stored translation options
+ bool skipTransOptCreation = false;
+ if (StaticData::Instance().GetUseTransOptCache())
+ {
+ const WordsRange wordsRange(startPos, endPos);
+ sourcePhrase = new Phrase(m_source.GetSubString(wordsRange));
+
+ const TranslationOptionList *transOptList = StaticData::Instance().FindTransOptListInCache(*sourcePhrase);
+ // is phrase in cache?
+ if (transOptList != NULL) {
+ skipTransOptCreation = true;
+ TranslationOptionList::const_iterator iterTransOpt;
+ for (iterTransOpt = transOptList->begin() ; iterTransOpt != transOptList->end() ; ++iterTransOpt)
+ {
+ TranslationOption *transOpt = new TranslationOption(**iterTransOpt, wordsRange);
+ Add(transOpt);
+ }
+ }
+ }
- ProcessInitialTranslation(decodeStep, *oldPtoc
- , startPos, endPos, adhereTableLimit );
+ if (!skipTransOptCreation)
+ {
+ // partial trans opt stored in here
+ PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
+ size_t totalEarlyPruned = 0;
+
+ // initial translation step
+ list <const DecodeStep* >::const_iterator iterStep = decodeStepList.begin();
+ const DecodeStep &decodeStep = **iterStep;
- // do rest of decode steps
- int indexStep = 0;
- for (++iterStep ; iterStep != decodeStepList.end() ; ++iterStep)
+ ProcessInitialTranslation(decodeStep, *oldPtoc
+ , startPos, endPos, adhereTableLimit );
+
+ // do rest of decode steps
+ int indexStep = 0;
+ for (++iterStep ; iterStep != decodeStepList.end() ; ++iterStep)
{
const DecodeStep &decodeStep = **iterStep;
PartialTranslOptColl* newPtoc = new PartialTranslOptColl;
@@ -399,23 +415,47 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
oldPtoc = newPtoc;
indexStep++;
} // for (++iterStep
- // add to fully formed translation option list
- PartialTranslOptColl &lastPartialTranslOptColl = *oldPtoc;
- const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();
- vector<TranslationOption*>::const_iterator iterColl;
- for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl)
+
+ // add to fully formed translation option list
+ PartialTranslOptColl &lastPartialTranslOptColl = *oldPtoc;
+ const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();
+ vector<TranslationOption*>::const_iterator iterColl;
+ for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl)
{
TranslationOption *transOpt = *iterColl;
transOpt->CalcScore();
Add(transOpt);
}
- lastPartialTranslOptColl.DetachAll();
- totalEarlyPruned += oldPtoc->GetPrunedCount();
- delete oldPtoc;
- // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
+ // storing translation options in persistent cache (kept across sentences)
+ if (StaticData::Instance().GetUseTransOptCache())
+ {
+ if (partTransOptList.size() > 0)
+ {
+ vector<TranslationOption*> cachedTransOptList = GetTranslationOptionList(startPos, endPos);
+ vector<TranslationOption*>::iterator iterList;
+ for (size_t i = 0 ; i < cachedTransOptList.size() ; ++i)
+ {
+ cachedTransOptList[i] = new TranslationOption(*cachedTransOptList[i]);
+ }
+
+ StaticData::Instance().AddTransOptListToCache(*sourcePhrase, cachedTransOptList);
+ }
+
+ delete sourcePhrase;
+ }
+
+ lastPartialTranslOptColl.DetachAll();
+ totalEarlyPruned += oldPtoc->GetPrunedCount();
+ delete oldPtoc;
+ // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
+ } //if non-exclusive XML or no options for range
+ } // skipTransOptCreation
- } //if non-exclusive XML or no options for range
+ if ((StaticData::Instance().GetXmlInputType() != XmlPassThrough) && HasXmlOptionsOverlappingRange(startPos,endPos))
+ {
+ CreateXmlOptionsForRange(startPos, endPos);
+ }
}
diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h
index 012f73cdf..6a0fe7216 100755
--- a/moses/src/TranslationOptionCollection.h
+++ b/moses/src/TranslationOptionCollection.h
@@ -38,8 +38,6 @@ class LMList;
class FactorMask;
class Word;
-typedef std::vector<TranslationOption*> TranslationOptionList;
-
/** Contains all phrase translations applicable to current input type (a sentence or confusion network).
* A key insight into efficient decoding is that various input
* conditions (trelliss, factored input, normal text, xml markup)
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index 2a7714360..0a7738032 100755
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <list>
#include <limits>
+#include <vector>
#define PROJECT_NAME "moses"
@@ -161,3 +162,6 @@ typedef size_t FactorType;
typedef uint32_t UINT32;
#endif
+class TranslationOption;
+typedef std::vector<TranslationOption*> TranslationOptionList;
+