Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorphkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>2007-07-27 16:08:59 +0400
committerphkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>2007-07-27 16:08:59 +0400
commitdde35d466fd57a9ff70652016aa663bc2982ccf3 (patch)
tree10db0146d8e7aa6416c99274598d5a46bea97637 /moses
parent1cb5ff709efa884b6fe2c89f6510c9fc03935f06 (diff)
performance fix: saner behaviour for very very long sentences (hundreds of words) by using a maximum phrase length, which is set to 20 by default and can be changed with the switch "-max-phrase-length".
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1440 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rwxr-xr-xmoses/src/Manager.cpp12
-rwxr-xr-xmoses/src/Parameter.cpp1
-rwxr-xr-xmoses/src/StaticData.cpp3
-rwxr-xr-xmoses/src/StaticData.h7
-rw-r--r--moses/src/TranslationOptionCollection.cpp55
-rwxr-xr-xmoses/src/TranslationOptionCollection.h4
-rwxr-xr-xmoses/src/TypeDef.h1
7 files changed, 68 insertions, 15 deletions
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
index 3223442b7..b68c75582 100755
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@@ -136,7 +136,11 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
{
- for (size_t endPos = startPos ; endPos < sourceSize ; ++endPos)
+ size_t maxSize = sourceSize - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
{
if (!hypoBitmap.Overlap(WordsRange(startPos, endPos)))
{
@@ -159,7 +163,11 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
// MAIN LOOP. go through each possible hypo
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
{
- for (size_t endPos = startPos ; endPos < sourceSize ; ++endPos)
+ size_t maxSize = sourceSize - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
{
// no gap so far => don't skip more than allowed limit
if (hypoFirstGapPos == hypoWordCount)
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
index ef6e3986a..e393ac27f 100755
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@@ -49,6 +49,7 @@ Parameter::Parameter()
AddParam("mapping", "description of decoding steps");
AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
+ AddParam("max-phrase-length", "maximum phrase length (default 20)");
AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
AddParam("output-factors", "list of factors in the output");
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 6080ed55d..bdc59a4ad 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -218,6 +218,9 @@ bool StaticData::LoadData(Parameter *parameter)
m_maxNoPartTransOpt = (m_parameter->GetParam("max-partial-trans-opt").size() > 0)
? Scan<size_t>(m_parameter->GetParam("max-partial-trans-opt")[0]) : DEFAULT_MAX_PART_TRANS_OPT_SIZE;
+ m_maxPhraseLength = (m_parameter->GetParam("max-phrase-length").size() > 0)
+ ? Scan<size_t>(m_parameter->GetParam("max-phrase-length")[0]) : DEFAULT_MAX_PHRASE_LENGTH;
+
// Unknown Word Processing -- wade
//TODO replace this w/general word dropping -- EVH
SetBooleanParameter( &m_dropUnknown, "drop-unknown", false );
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index 5f35cb64f..ca422438d 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -77,7 +77,8 @@ protected:
, m_nBestSize
, m_nBestFactor
, m_maxNoTransOptPerCoverage
- , m_maxNoPartTransOpt;
+ , m_maxNoPartTransOpt
+ , m_maxPhraseLength;
std::string m_nBestFilePath;
bool m_fLMsLoaded, m_labeledNBestList,m_nBestIncludesAlignment;
@@ -204,6 +205,10 @@ public:
{
return m_maxNoPartTransOpt;
}
+ inline size_t GetMaxPhraseLength() const
+ {
+ return m_maxPhraseLength;
+ }
std::vector<LexicalReordering*> GetReorderModels() const
{
return m_reorderModels;
diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp
index b3e60f8f2..4f8c56eb7 100644
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@@ -48,7 +48,12 @@ TranslationOptionCollection::TranslationOptionCollection(InputType const& src, s
for (size_t startPos = 0 ; startPos < size ; ++startPos)
{
m_collection.push_back( vector< TranslationOptionList >() );
- for (size_t endPos = startPos ; endPos < size ; ++endPos)
+
+ size_t maxSize = size - startPos + 1;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
{
m_collection[startPos].push_back( TranslationOptionList() );
}
@@ -61,7 +66,11 @@ TranslationOptionCollection::~TranslationOptionCollection()
size_t size = m_source.GetSize();
for (size_t startPos = 0 ; startPos < size ; ++startPos)
{
- for (size_t endPos = startPos ; endPos < size ; ++endPos)
+ size_t maxSize = size - startPos + 1;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
{
RemoveAllInColl(GetTranslationOptionList(startPos, endPos));
}
@@ -86,9 +95,13 @@ void TranslationOptionCollection::Prune()
size_t total = 0;
size_t totalPruned = 0;
- for (size_t startPos = 0 ; startPos < size ; ++startPos)
+ for (size_t startPos = 0 ; startPos < size; ++startPos)
{
- for (size_t endPos = startPos ; endPos < size ; ++endPos)
+ size_t maxSize = size - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
{
TranslationOptionList &fullList = GetTranslationOptionList(startPos, endPos);
total += fullList.size();
@@ -240,7 +253,11 @@ void TranslationOptionCollection::CalcFutureScore()
// walk all the translation options and record the cheapest option for each span
for (size_t startPos = 0 ; startPos < m_source.GetSize() ; ++startPos)
{
- for (size_t endPos = startPos ; endPos < m_source.GetSize() ; ++endPos)
+ size_t maxSize = m_source.GetSize() - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
{
TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
@@ -284,7 +301,11 @@ void TranslationOptionCollection::CalcFutureScore()
int total = 0;
for(size_t row=0; row<size; row++)
{
- for(size_t col=row; col<size; col++)
+ size_t maxSize = size - row;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for(size_t col=row; col<row+maxSize; col++)
{
int count = GetTranslationOptionList(row, col).size();
TRACE_ERR( "translation options spanning from "
@@ -320,7 +341,11 @@ void TranslationOptionCollection::CreateTranslationOptions(const vector <DecodeG
const DecodeGraph &decodeStepList = decodeStepVL[startVL];
for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++)
{
- for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++)
+ size_t maxSize = m_source.GetSize() - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; endPos++)
{
CreateTranslationOptionsForRange( decodeStepList, startPos, endPos, true);
}
@@ -471,7 +496,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
* \param lastPos last position in input sentence
* \param adhereTableLimit whether phrase & generation table limits are adhered to
*/
- bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const {
+ bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const {
return false;
}
@@ -481,7 +506,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
* \param startPos first position in input sentence
* \param lastPos last position in input sentence
*/
- void TranslationOptionCollection::CreateXmlOptionsForRange(size_t startPosition, size_t endPosition) {
+ void TranslationOptionCollection::CreateXmlOptionsForRange(size_t, size_t) {
//not implemented for base class
};
@@ -503,7 +528,11 @@ inline std::ostream& operator<<(std::ostream& out, const TranslationOptionCollec
size_t size = coll.GetSize();
for (size_t startPos = 0 ; startPos < size ; ++startPos)
{
- for (size_t endPos = startPos ; endPos < size ; ++endPos)
+ size_t maxSize = size - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
{
TranslationOptionList fullList = coll.GetTranslationOptionList(startPos, endPos);
size_t sizeFull = fullList.size();
@@ -535,7 +564,11 @@ void TranslationOptionCollection::CacheLexReordering()
for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++)
{
- for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++)
+ size_t maxSize = m_source.GetSize() - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
+ for (size_t endPos = startPos ; endPos < startPos + maxSize; endPos++)
{
TranslationOptionList &transOptList = GetTranslationOptionList( startPos, endPos);
TranslationOptionList::iterator iterTransOpt;
diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h
index 495528980..403ebd46b 100755
--- a/moses/src/TranslationOptionCollection.h
+++ b/moses/src/TranslationOptionCollection.h
@@ -80,11 +80,13 @@ protected:
//! list of trans opt for a particular span
TranslationOptionList &GetTranslationOptionList(size_t startPos, size_t endPos)
{
+ assert(endPos-startPos < m_collection[startPos].size());
return m_collection[startPos][endPos - startPos];
}
const TranslationOptionList &GetTranslationOptionList(size_t startPos, size_t endPos) const
{
- return m_collection[startPos][endPos - startPos];
+ assert(endPos-startPos < m_collection[startPos].size());
+ return m_collection[startPos][endPos - startPos];
}
void Add(TranslationOption *translationOption);
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index 0a7738032..d24ceab05 100755
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -43,6 +43,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 50;
const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
+const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
const size_t ARRAY_SIZE_INCR = 10; //amount by which a phrase gets resized when necessary
const float LOWEST_SCORE = -100.0f;
const float DEFAULT_BEAM_THRESHOLD = 0.00001f;