Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorJeroen Vermeulen <jtv@precisiontranslationtools.com>2015-02-13 16:37:57 +0300
committerJeroen Vermeulen <jtv@precisiontranslationtools.com>2015-02-13 16:37:57 +0300
commit1ccd7b55d5fde335993b20a238f9cb613682bb48 (patch)
tree3bad56e5350c1df24f4f88b6ba0a1e99dbdb373b /moses
parent63299df44737134f45533479723b6535a7d3a6da (diff)
parent34b139e2aea46706e6714801108464f3ab196a86 (diff)
Merge branch 'master' of github.com:moses-smt/mosesdecoder
Diffstat (limited to 'moses')
-rw-r--r--moses/ConfusionNet.cpp2
-rw-r--r--moses/FF/BleuScoreFeature.cpp2
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.cpp10
-rw-r--r--moses/FF/StatefulFeatureFunction.h4
-rw-r--r--moses/SearchCubePruning.cpp69
-rw-r--r--moses/SearchNormal.cpp252
-rw-r--r--moses/Sentence.cpp6
-rw-r--r--moses/StaticData.cpp24
-rw-r--r--moses/StaticData.h9
-rw-r--r--moses/TranslationOption.h11
-rw-r--r--moses/TranslationOptionCollection.cpp1239
-rw-r--r--moses/TranslationOptionCollection.h38
-rw-r--r--moses/TranslationOptionCollectionConfusionNet.cpp69
-rw-r--r--moses/TranslationOptionCollectionConfusionNet.h14
-rw-r--r--moses/TranslationOptionCollectionLattice.cpp18
-rw-r--r--moses/TranslationOptionCollectionLattice.h9
-rw-r--r--moses/TranslationOptionCollectionText.cpp23
-rw-r--r--moses/TranslationOptionCollectionText.h2
-rw-r--r--moses/TranslationOptionList.cpp95
-rw-r--r--moses/TranslationOptionList.h113
-rw-r--r--moses/TranslationTask.cpp2
-rw-r--r--moses/TypeDef.h3
-rw-r--r--moses/Util.h14
23 files changed, 1041 insertions, 987 deletions
diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp
index ce0d5e1c0..f6d7b4168 100644
--- a/moses/ConfusionNet.cpp
+++ b/moses/ConfusionNet.cpp
@@ -67,7 +67,7 @@ ConfusionNet()
stats.createOne();
const StaticData& staticData = StaticData::Instance();
- if (staticData.IsChart()) {
+ if (staticData.IsSyntax()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp
index 5be3b0b6b..24887c373 100644
--- a/moses/FF/BleuScoreFeature.cpp
+++ b/moses/FF/BleuScoreFeature.cpp
@@ -27,7 +27,7 @@ int BleuScoreState::Compare(const FFState& o) const
if (&o == this)
return 0;
- if (StaticData::Instance().IsChart())
+ if (StaticData::Instance().IsSyntax())
return 0;
const BleuScoreState& other = dynamic_cast<const BleuScoreState&>(o);
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index 43ed5f346..d4e2f8719 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -66,14 +66,14 @@ void OpSequenceModel:: EvaluateInIsolation(const Phrase &source
alignments.push_back(iter->second);
}
- for (int i = 0; i < targetPhrase.GetSize(); i++) {
+ for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
if (targetPhrase.GetWord(i).IsOOV() && sFactor == 0 && tFactor == 0)
myTargetPhrase.push_back("_TRANS_SLF_");
else
myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(tFactor)->GetString().as_string());
}
- for (int i = 0; i < source.GetSize(); i++) {
+ for (size_t i = 0; i < source.GetSize(); i++) {
mySourcePhrase.push_back(source.GetWord(i).GetFactor(sFactor)->GetString().as_string());
}
@@ -97,7 +97,7 @@ FFState* OpSequenceModel::EvaluateWhenApplied(
WordsBitmap myBitmap = bitmap;
const Manager &manager = cur_hypo.GetManager();
const InputType &source = manager.GetSource();
- const Sentence &sourceSentence = static_cast<const Sentence&>(source);
+ // const Sentence &sourceSentence = static_cast<const Sentence&>(source);
osmHypothesis obj;
vector <string> mySourcePhrase;
vector <string> myTargetPhrase;
@@ -124,7 +124,7 @@ FFState* OpSequenceModel::EvaluateWhenApplied(
int startIndex = sourceRange.GetStartPos();
int endIndex = sourceRange.GetEndPos();
const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
- osmState * statePtr;
+ // osmState * statePtr;
vector <int> alignments;
@@ -149,7 +149,7 @@ FFState* OpSequenceModel::EvaluateWhenApplied(
// cerr<<mySourcePhrase[i]<<endl;
}
- for (int i = 0; i < target.GetSize(); i++) {
+ for (size_t i = 0; i < target.GetSize(); i++) {
if (target.GetWord(i).IsOOV() && sFactor == 0 && tFactor == 0)
myTargetPhrase.push_back("_TRANS_SLF_");
diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h
index 08b7c607d..f54f3a746 100644
--- a/moses/FF/StatefulFeatureFunction.h
+++ b/moses/FF/StatefulFeatureFunction.h
@@ -17,7 +17,9 @@ class StatefulFeatureFunction: public FeatureFunction
static std::vector<const StatefulFeatureFunction*> m_statefulFFs;
public:
- static const std::vector<const StatefulFeatureFunction*>& GetStatefulFeatureFunctions() {
+ static const std::vector<const StatefulFeatureFunction*>&
+ GetStatefulFeatureFunctions()
+ {
return m_statefulFFs;
}
diff --git a/moses/SearchCubePruning.cpp b/moses/SearchCubePruning.cpp
index 2595e35ab..6c981276e 100644
--- a/moses/SearchCubePruning.cpp
+++ b/moses/SearchCubePruning.cpp
@@ -214,47 +214,49 @@ void SearchCubePruning::CreateForwardTodos(HypothesisStackCubePruning &stack)
}
}
-void SearchCubePruning::CreateForwardTodos(const WordsBitmap &bitmap, const WordsRange &range, BitmapContainer &bitmapContainer)
+void
+SearchCubePruning::
+CreateForwardTodos(WordsBitmap const& bitmap, WordsRange const& range,
+ BitmapContainer& bitmapContainer)
{
WordsBitmap newBitmap = bitmap;
newBitmap.SetValue(range.GetStartPos(), range.GetEndPos(), true);
-
+
size_t numCovered = newBitmap.GetNumWordsCovered();
- const TranslationOptionList &transOptList = m_transOptColl.GetTranslationOptionList(range);
+ const TranslationOptionList* transOptList;
+ transOptList = m_transOptColl.GetTranslationOptionList(range);
const SquareMatrix &futureScore = m_transOptColl.GetFutureScore();
- if (transOptList.size() > 0) {
- HypothesisStackCubePruning &newStack = *static_cast<HypothesisStackCubePruning*>(m_hypoStackColl[numCovered]);
- newStack.SetBitmapAccessor(newBitmap, newStack, range, bitmapContainer, futureScore, transOptList);
+ if (transOptList && transOptList->size() > 0) {
+ HypothesisStackCubePruning& newStack
+ = *static_cast<HypothesisStackCubePruning*>(m_hypoStackColl[numCovered]);
+ newStack.SetBitmapAccessor(newBitmap, newStack, range, bitmapContainer,
+ futureScore, *transOptList);
}
}
-
-bool SearchCubePruning::CheckDistortion(const WordsBitmap &hypoBitmap, const WordsRange &range) const
+
+bool
+SearchCubePruning::
+CheckDistortion(const WordsBitmap &hypoBitmap, const WordsRange &range) const
{
// since we check for reordering limits, its good to have that limit handy
int maxDistortion = StaticData::Instance().GetMaxDistortion();
-
+ if (maxDistortion < 0) return true;
+
// if there are reordering limits, make sure it is not violated
// the coverage bitmap is handy here (and the position of the first gap)
- const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos()
- , startPos = range.GetStartPos()
- , endPos = range.GetEndPos();
+ size_t const startPos = range.GetStartPos();
+ size_t const endPos = range.GetEndPos();
- // if reordering constraints are used (--monotone-at-punctuation or xml), check if passes all
- if (! m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) ) {
+ // if reordering constraints are used (--monotone-at-punctuation or xml),
+ // check if passes all
+ if (!m_source.GetReorderingConstraint().Check(hypoBitmap, startPos, endPos))
return false;
- }
-
- // no limit of reordering: no problem
- if (maxDistortion < 0) {
- return true;
- }
-
- bool leftMostEdge = (hypoFirstGapPos == startPos);
+
+ size_t const hypoFirstGapPos = hypoBitmap.GetFirstGapPos();
// any length extension is okay if starting at left-most edge
- if (leftMostEdge) {
- return true;
- }
+ if (hypoFirstGapPos == startPos) return true;
+
// starting somewhere other than left-most edge, use caution
// the basic idea is this: we would like to translate a phrase starting
// from a position further right than the left-most open gap. The
@@ -264,20 +266,17 @@ bool SearchCubePruning::CheckDistortion(const WordsBitmap &hypoBitmap, const Wor
// hypothesis starting at the left-most edge). If this vlaue is than
// the distortion limit, we don't allow this extension to be made.
WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
- int required_distortion =
- m_source.ComputeDistortionDistance(range, bestNextExtension);
-
- if (required_distortion > maxDistortion) {
- return false;
- }
- return true;
+ return (m_source.ComputeDistortionDistance(range, bestNextExtension)
+ <= maxDistortion);
}
/**
* Find best hypothesis on the last stack.
* This is the end point of the best translation, which can be traced back from here
*/
-const Hypothesis *SearchCubePruning::GetBestHypothesis() const
+Hypothesis const*
+SearchCubePruning::
+GetBestHypothesis() const
{
// const HypothesisStackCubePruning &hypoColl = m_hypoStackColl.back();
const HypothesisStack &hypoColl = *m_hypoStackColl.back();
@@ -287,7 +286,9 @@ const Hypothesis *SearchCubePruning::GetBestHypothesis() const
/**
* Logging of hypothesis stack sizes
*/
-void SearchCubePruning::OutputHypoStackSize()
+void
+SearchCubePruning::
+OutputHypoStackSize()
{
std::vector < HypothesisStack* >::const_iterator iterStack = m_hypoStackColl.begin();
TRACE_ERR( "Stack sizes: " << (int)(*iterStack)->size());
diff --git a/moses/SearchNormal.cpp b/moses/SearchNormal.cpp
index b3e647299..786b554c6 100644
--- a/moses/SearchNormal.cpp
+++ b/moses/SearchNormal.cpp
@@ -3,6 +3,8 @@
#include "SearchNormal.h"
#include "SentenceStats.h"
+#include <boost/foreach.hpp>
+
using namespace std;
namespace Moses
@@ -104,136 +106,140 @@ void SearchNormal::Decode()
* violation of reordering limits.
* \param hypothesis hypothesis to be expanded upon
*/
-void SearchNormal::ProcessOneHypothesis(const Hypothesis &hypothesis)
+void
+SearchNormal::
+ProcessOneHypothesis(const Hypothesis &hypothesis)
{
// since we check for reordering limits, its good to have that limit handy
int maxDistortion = StaticData::Instance().GetMaxDistortion();
bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput;
+ const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
+ const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos();
+ size_t const sourceSize = m_source.GetSize();
+
+ ReorderingConstraint const&
+ ReoConstraint = m_source.GetReorderingConstraint();
+
// no limit of reordering: only check for overlap
if (maxDistortion < 0) {
- const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
- const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos()
- , sourceSize = m_source.GetSize();
-
- for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
- size_t maxSize = sourceSize - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
- for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) {
- // basic checks
- // there have to be translation options
- if (m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos)).size() == 0 ||
- // no overlap with existing words
- hypoBitmap.Overlap(WordsRange(startPos, endPos)) ||
- // specified reordering constraints (set with -monotone-at-punctuation or xml)
- !m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) ) {
- continue;
- }
-
- //TODO: does this method include incompatible WordLattice hypotheses?
- ExpandAllHypotheses(hypothesis, startPos, endPos);
+ for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
+ {
+ TranslationOptionList const* tol;
+ size_t endPos = startPos;
+ for (tol = m_transOptColl.GetTranslationOptionList(startPos, endPos);
+ tol && endPos < sourceSize;
+ tol = m_transOptColl.GetTranslationOptionList(startPos, ++endPos))
+ {
+ if (tol->size() == 0
+ || hypoBitmap.Overlap(WordsRange(startPos, endPos))
+ || !ReoConstraint.Check(hypoBitmap, startPos, endPos))
+ { continue; }
+
+ //TODO: does this method include incompatible WordLattice hypotheses?
+ ExpandAllHypotheses(hypothesis, startPos, endPos);
+ }
}
- }
-
return; // done with special case (no reordering limit)
}
- // if there are reordering limits, make sure it is not violated
- // the coverage bitmap is handy here (and the position of the first gap)
- const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
- const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos()
- , sourceSize = m_source.GetSize();
-
- // MAIN LOOP. go through each possible range
- for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
- // don't bother expanding phrases if the first position is already taken
- if(hypoBitmap.GetValue(startPos))
- continue;
-
- WordsRange prevRange = hypothesis.GetCurrSourceWordsRange();
-
- size_t maxSize = sourceSize - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
- size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
- if (isWordLattice) {
- // first question: is there a path from the closest translated word to the left
- // of the hypothesized extension to the start of the hypothesized extension?
- // long version: is there anything to our left? is it farther left than where we're starting anyway? can we get to it?
- // closestLeft is exclusive: a value of 3 means 2 is covered, our arc is currently ENDING at 3 and can start at 3 implicitly
- if (closestLeft != 0 && closestLeft != startPos && !m_source.CanIGetFromAToB(closestLeft, startPos)) {
- continue;
- }
- if (prevRange.GetStartPos() != NOT_FOUND &&
- prevRange.GetStartPos() > startPos && !m_source.CanIGetFromAToB(startPos, prevRange.GetStartPos())) {
- continue;
- }
- }
+ // There are reordering limits. Make sure they are not violated.
- WordsRange currentStartRange(startPos, startPos);
- if(m_source.ComputeDistortionDistance(prevRange, currentStartRange) > maxDistortion)
- continue;
-
- for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) {
- // basic checks
- WordsRange extRange(startPos, endPos);
- // there have to be translation options
- if (m_transOptColl.GetTranslationOptionList(extRange).size() == 0 ||
- // no overlap with existing words
- hypoBitmap.Overlap(extRange) ||
- // specified reordering constraints (set with -monotone-at-punctuation or xml)
- !m_source.GetReorderingConstraint().Check( hypoBitmap, startPos, endPos ) || //
- // connection in input word lattice
- (isWordLattice && !m_source.IsCoveragePossible(extRange))) {
- continue;
- }
-
- // ask second question here:
- // we already know we can get to our starting point from the closest thing to the left. We now ask the follow up:
- // can we get from our end to the closest thing on the right?
- // long version: is anything to our right? is it farther right than our (inclusive) end? can our end reach it?
- bool leftMostEdge = (hypoFirstGapPos == startPos);
-
- // closest right definition:
- size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);
- if (isWordLattice) {
- //if (!leftMostEdge && closestRight != endPos && closestRight != sourceSize && !m_source.CanIGetFromAToB(endPos, closestRight + 1)) {
- if (closestRight != endPos && ((closestRight + 1) < sourceSize) && !m_source.CanIGetFromAToB(endPos + 1, closestRight + 1)) {
- continue;
- }
- }
+ WordsRange prevRange = hypothesis.GetCurrSourceWordsRange();
+ for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
+ {
+
+ // don't bother expanding phrases if the first position is already taken
+ if(hypoBitmap.GetValue(startPos)) continue;
- // any length extension is okay if starting at left-most edge
- if (leftMostEdge) {
- ExpandAllHypotheses(hypothesis, startPos, endPos);
- }
- // starting somewhere other than left-most edge, use caution
- else {
- // the basic idea is this: we would like to translate a phrase starting
- // from a position further right than the left-most open gap. The
- // distortion penalty for the following phrase will be computed relative
- // to the ending position of the current extension, so we ask now what
- // its maximum value will be (which will always be the value of the
- // hypothesis starting at the left-most edge). If this value is less than
- // the distortion limit, we don't allow this extension to be made.
- WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
- int required_distortion =
- m_source.ComputeDistortionDistance(extRange, bestNextExtension);
-
- if (required_distortion > maxDistortion) {
- continue;
- }
-
- // everything is fine, we're good to go
- ExpandAllHypotheses(hypothesis, startPos, endPos);
-
- }
+ size_t maxSize = sourceSize - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+ size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
+
+ if (isWordLattice)
+ {
+ // first question: is there a path from the closest translated word to the left
+ // of the hypothesized extension to the start of the hypothesized extension?
+ // long version:
+ // - is there anything to our left?
+ // - is it farther left than where we're starting anyway?
+ // - can we get to it?
+
+ // closestLeft is exclusive: a value of 3 means 2 is covered, our
+ // arc is currently ENDING at 3 and can start at 3 implicitly
+ if (closestLeft != 0 && closestLeft != startPos
+ && !m_source.CanIGetFromAToB(closestLeft, startPos))
+ continue;
+
+ if (prevRange.GetStartPos() != NOT_FOUND &&
+ prevRange.GetStartPos() > startPos &&
+ !m_source.CanIGetFromAToB(startPos, prevRange.GetStartPos()))
+ continue;
+ }
+
+ WordsRange currentStartRange(startPos, startPos);
+ if(m_source.ComputeDistortionDistance(prevRange, currentStartRange)
+ > maxDistortion)
+ continue;
+
+ TranslationOptionList const* tol;
+ size_t endPos = startPos;
+ for (tol = m_transOptColl.GetTranslationOptionList(startPos, endPos);
+ tol && endPos < sourceSize;
+ tol = m_transOptColl.GetTranslationOptionList(startPos, ++endPos))
+ {
+ WordsRange extRange(startPos, endPos);
+ if (tol->size() == 0
+ || hypoBitmap.Overlap(extRange)
+ || !ReoConstraint.Check(hypoBitmap, startPos, endPos)
+ || (isWordLattice && !m_source.IsCoveragePossible(extRange)))
+ { continue; }
+
+ // ask second question here: we already know we can get to our
+ // starting point from the closest thing to the left. We now ask the
+ // follow up: can we get from our end to the closest thing on the
+ // right?
+ //
+ // long version: is anything to our right? is it farther
+ // right than our (inclusive) end? can our end reach it?
+ bool isLeftMostEdge = (hypoFirstGapPos == startPos);
+
+ size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);
+ if (isWordLattice) {
+ if (closestRight != endPos
+ && ((closestRight + 1) < sourceSize)
+ && !m_source.CanIGetFromAToB(endPos + 1, closestRight + 1))
+ { continue; }
+ }
+
+ if (isLeftMostEdge)
+ { // any length extension is okay if starting at left-most edge
+ ExpandAllHypotheses(hypothesis, startPos, endPos);
+ }
+ else // starting somewhere other than left-most edge, use caution
+ {
+ // the basic idea is this: we would like to translate a phrase
+ // starting from a position further right than the left-most
+ // open gap. The distortion penalty for the following phrase
+ // will be computed relative to the ending position of the
+ // current extension, so we ask now what its maximum value will
+ // be (which will always be the value of the hypothesis starting
+ // at the left-most edge). If this value is less than the
+ // distortion limit, we don't allow this extension to be made.
+ WordsRange bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
+
+ if (m_source.ComputeDistortionDistance(extRange, bestNextExtension)
+ > maxDistortion) continue;
+
+ // everything is fine, we're good to go
+ ExpandAllHypotheses(hypothesis, startPos, endPos);
+ }
+ }
}
- }
}
-
+
/**
* Expand a hypothesis given a list of translation options
@@ -242,7 +248,9 @@ void SearchNormal::ProcessOneHypothesis(const Hypothesis &hypothesis)
* \param endPos last word position of span covered
*/
-void SearchNormal::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos)
+void
+SearchNormal::
+ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos)
{
// early discarding: check if hypothesis is too bad to build
// this idea is explained in (Moore&Quirk, MT Summit 2007)
@@ -250,15 +258,19 @@ void SearchNormal::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t star
if (StaticData::Instance().UseEarlyDiscarding()) {
// expected score is based on score of current hypothesis
expectedScore = hypothesis.GetScore();
-
+
// add new future score estimate
- expectedScore += m_transOptColl.GetFutureScore().CalcFutureScore( hypothesis.GetWordsBitmap(), startPos, endPos );
+ expectedScore +=
+ m_transOptColl.GetFutureScore()
+ .CalcFutureScore(hypothesis.GetWordsBitmap(), startPos, endPos);
}
-
+
// loop through all translation options
- const TranslationOptionList &transOptList = m_transOptColl.GetTranslationOptionList(WordsRange(startPos, endPos));
+ const TranslationOptionList* tol
+ = m_transOptColl.GetTranslationOptionList(startPos, endPos);
+ if (!tol) return;
TranslationOptionList::const_iterator iter;
- for (iter = transOptList.begin() ; iter != transOptList.end() ; ++iter) {
+ for (iter = tol->begin() ; iter != tol->end() ; ++iter) {
ExpandHypothesis(hypothesis, **iter, expectedScore);
}
}
diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp
index 58d650aa3..a937f21e3 100644
--- a/moses/Sentence.cpp
+++ b/moses/Sentence.cpp
@@ -43,7 +43,7 @@ Sentence::Sentence()
, InputType()
{
const StaticData& staticData = StaticData::Instance();
- if (staticData.IsChart()) {
+ if (staticData.IsSyntax()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
}
@@ -168,7 +168,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
if (staticData.GetXmlInputType() != XmlPassThrough) {
int offset = 0;
- if (staticData.IsChart()) {
+ if (staticData.IsSyntax()) {
offset = 1;
}
@@ -188,7 +188,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
// placeholders
ProcessPlaceholders(placeholders);
- if (staticData.IsChart()) {
+ if (staticData.IsSyntax()) {
InitStartEndWord();
}
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 94d5381f5..8709d758f 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -60,15 +60,15 @@ bool g_mosesDebug = false;
StaticData StaticData::s_instance;
StaticData::StaticData()
- :m_sourceStartPosMattersForRecombination(false)
- ,m_inputType(SentenceInput)
- ,m_onlyDistinctNBest(false)
- ,m_needAlignmentInfo(false)
- ,m_lmEnableOOVFeature(false)
- ,m_isAlwaysCreateDirectTranslationOption(false)
- ,m_currentWeightSetting("default")
- ,m_requireSortingAfterSourceContext(false)
- ,m_treeStructure(NULL)
+ : m_sourceStartPosMattersForRecombination(false)
+ , m_requireSortingAfterSourceContext(false)
+ , m_inputType(SentenceInput)
+ , m_onlyDistinctNBest(false)
+ , m_needAlignmentInfo(false)
+ , m_lmEnableOOVFeature(false)
+ , m_isAlwaysCreateDirectTranslationOption(false)
+ , m_currentWeightSetting("default")
+ , m_treeStructure(NULL)
{
m_xmlBrackets.first="<";
m_xmlBrackets.second=">";
@@ -112,7 +112,7 @@ bool StaticData::LoadData(Parameter *parameter)
// to cube or not to cube
m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
- if (IsChart())
+ if (IsSyntax())
LoadChartDecodingParameters();
// input type has to be specified BEFORE loading the phrase tables!
@@ -698,7 +698,7 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
DecodeGraph *decodeGraph;
- if (IsChart()) {
+ if (IsSyntax()) {
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
@@ -765,7 +765,7 @@ void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVect
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
DecodeGraph *decodeGraph;
- if (IsChart()) {
+ if (IsSyntax()) {
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
diff --git a/moses/StaticData.h b/moses/StaticData.h
index 193f79aad..d9a96aaa3 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -436,8 +436,13 @@ public:
SearchAlgorithm GetSearchAlgorithm() const {
return m_searchAlgorithm;
}
- bool IsChart() const {
- return m_searchAlgorithm == CYKPlus || m_searchAlgorithm == ChartIncremental;
+ bool IsSyntax() const {
+ return m_searchAlgorithm == CYKPlus ||
+ m_searchAlgorithm == ChartIncremental ||
+ m_searchAlgorithm == SyntaxS2T ||
+ m_searchAlgorithm == SyntaxT2S ||
+ m_searchAlgorithm == SyntaxT2S_SCFG ||
+ m_searchAlgorithm == SyntaxF2S;
}
const ScoreComponentCollection& GetAllWeights() const {
diff --git a/moses/TranslationOption.h b/moses/TranslationOption.h
index b5a50fc32..a5effef88 100644
--- a/moses/TranslationOption.h
+++ b/moses/TranslationOption.h
@@ -1,3 +1,4 @@
+// -*- c++ -*-
// $Id$
/***********************************************************************
@@ -74,6 +75,16 @@ protected:
_ScoreCacheMap m_lexReorderingScores;
public:
+ struct Better
+ {
+ bool operator()(TranslationOption const& a, TranslationOption const& b) const
+ { return a.GetFutureScore() > b.GetFutureScore(); }
+
+ bool operator()(TranslationOption const* a, TranslationOption const* b) const
+ { return a->GetFutureScore() > b->GetFutureScore(); }
+ };
+
+
explicit TranslationOption(); // For initial hypo that does translate nothing
/** constructor. Used by initial translation step */
diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp
index 212b346d0..aa65cb320 100644
--- a/moses/TranslationOptionCollection.cpp
+++ b/moses/TranslationOptionCollection.cpp
@@ -40,723 +40,660 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/FF/InputFeature.h"
#include "util/exception.hh"
+#include <boost/foreach.hpp>
using namespace std;
namespace Moses
{
-/** helper for pruning */
-bool CompareTranslationOption(const TranslationOption *a, const TranslationOption *b)
-{
- return a->GetFutureScore() > b->GetFutureScore();
-}
-
-/** constructor; since translation options are indexed by coverage span, the corresponding data structure is initialized here
- * This fn should be called by inherited classes
-*/
-TranslationOptionCollection::TranslationOptionCollection(
- InputType const& src, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
- : m_source(src)
- ,m_futureScore(src.GetSize())
- ,m_maxNoTransOptPerCoverage(maxNoTransOptPerCoverage)
- ,m_translationOptionThreshold(translationOptionThreshold)
-{
- // create 2-d vector
- size_t size = src.GetSize();
- for (size_t startPos = 0 ; startPos < size ; ++startPos) {
- m_collection.push_back( vector< TranslationOptionList >() );
-
- size_t maxSize = size - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
+ /** helper for pruning */
+ // bool CompareTranslationOption(const TranslationOption *a, const TranslationOption *b)
+ // {
+ // return a->GetFutureScore() > b->GetFutureScore();
+ // }
+
+ /** constructor; since translation options are indexed by coverage span, the
+ * corresponding data structure is initialized here This fn should be
+ * called by inherited classe */
+ TranslationOptionCollection::
+ TranslationOptionCollection(InputType const& src,
+ size_t maxNoTransOptPerCoverage,
+ float translationOptionThreshold)
+ : m_source(src)
+ , m_futureScore(src.GetSize())
+ , m_maxNoTransOptPerCoverage(maxNoTransOptPerCoverage)
+ , m_translationOptionThreshold(translationOptionThreshold)
+ {
+ // create 2-d vector
+ size_t size = src.GetSize();
+ for (size_t sPos = 0 ; sPos < size ; ++sPos) {
+ m_collection.push_back( vector< TranslationOptionList >() );
+
+ size_t maxSize = size - sPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
- for (size_t endPos = 0 ; endPos < maxSize ; ++endPos) {
- m_collection[startPos].push_back( TranslationOptionList() );
+ for (size_t ePos = 0 ; ePos < maxSize ; ++ePos) {
+ m_collection[sPos].push_back( TranslationOptionList() );
+ }
}
}
-}
-/** destructor, clears out data structures */
-TranslationOptionCollection::~TranslationOptionCollection()
-{
- RemoveAllInColl(m_inputPathQueue);
-}
+ /** destructor, clears out data structures */
+ TranslationOptionCollection::
+ ~TranslationOptionCollection()
+ {
+ RemoveAllInColl(m_inputPathQueue);
+ }
-void TranslationOptionCollection::Prune()
-{
- // quit, if max size, threshold
- if (m_maxNoTransOptPerCoverage == 0 && m_translationOptionThreshold == -std::numeric_limits<float>::infinity())
- return;
-
- // bookkeeping for how many options used, pruned
- size_t total = 0;
- size_t totalPruned = 0;
-
- // loop through all spans
- size_t size = m_source.GetSize();
- for (size_t startPos = 0 ; startPos < size; ++startPos) {
- size_t maxSize = size - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
-
- for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) {
- // consider list for a span
- TranslationOptionList &fullList = GetTranslationOptionList(startPos, endPos);
- total += fullList.size();
-
- // size pruning
- if (m_maxNoTransOptPerCoverage > 0 &&
- fullList.size() > m_maxNoTransOptPerCoverage) {
- // sort in vector
- NTH_ELEMENT4(fullList.begin(), fullList.begin() + m_maxNoTransOptPerCoverage, fullList.end(), CompareTranslationOption);
- totalPruned += fullList.size() - m_maxNoTransOptPerCoverage;
-
- // delete the rest
- for (size_t i = m_maxNoTransOptPerCoverage ; i < fullList.size() ; ++i) {
- delete fullList.Get(i);
- }
- fullList.resize(m_maxNoTransOptPerCoverage);
+ void
+ TranslationOptionCollection::
+ Prune()
+ {
+ static float no_th = -std::numeric_limits<float>::infinity();
+
+ if (m_maxNoTransOptPerCoverage == 0 && m_translationOptionThreshold == no_th)
+ return;
+
+ // bookkeeping for how many options used, pruned
+ size_t total = 0;
+ size_t totalPruned = 0;
+
+ // loop through all spans
+ size_t size = m_source.GetSize();
+ for (size_t sPos = 0 ; sPos < size; ++sPos)
+ {
+ BOOST_FOREACH(TranslationOptionList& fullList, m_collection[sPos])
+ {
+ total += fullList.size();
+ totalPruned += fullList.SelectNBest(m_maxNoTransOptPerCoverage);
+ totalPruned += fullList.PruneByThreshold(m_translationOptionThreshold);
+ }
}
+
+ VERBOSE(2," Total translation options: " << total << std::endl
+ << "Total translation options pruned: " << totalPruned << std::endl);
+ }
- // threshold pruning
- if (fullList.size() > 1 && m_translationOptionThreshold != -std::numeric_limits<float>::infinity()) {
- // first, find the best score
- float bestScore = -std::numeric_limits<float>::infinity();
- for (size_t i=0; i < fullList.size() ; ++i) {
- if (fullList.Get(i)->GetFutureScore() > bestScore)
- bestScore = fullList.Get(i)->GetFutureScore();
- }
- //std::cerr << "best score for span " << startPos << "-" << endPos << " is " << bestScore << "\n";
- // then, remove items that are worse than best score + threshold
- for (size_t i=0; i < fullList.size() ; ++i) {
- if (fullList.Get(i)->GetFutureScore() < bestScore + m_translationOptionThreshold) {
- //std::cerr << "\tremoving item " << i << ", score " << fullList.Get(i)->GetFutureScore() << ": " << fullList.Get(i)->GetTargetPhrase() << "\n";
- delete fullList.Get(i);
- fullList.Remove(i);
- total--;
- totalPruned++;
- i--;
- }
- //else
- //{
- // std::cerr << "\tkeeping item " << i << ", score " << fullList.Get(i)->GetFutureScore() << ": " << fullList.Get(i)->GetTargetPhrase() << "\n";
- //}
- }
- } // end of threshold pruning
- }
- } // end of loop through all spans
-
- VERBOSE(2," Total translation options: " << total << std::endl
- << "Total translation options pruned: " << totalPruned << std::endl);
-}
-
-/** Force a creation of a translation option where there are none for a particular source position.
-* ie. where a source word has not been translated, create a translation option by
-* 1. not observing the table limits on phrase/generation tables
-* 2. using the handler ProcessUnknownWord()
-* Call this function once translation option collection has been filled with translation options
-*
-* This function calls for unknown words is complicated by the fact it must handle different input types.
-* The call stack is
-* Base::ProcessUnknownWord()
-* Inherited::ProcessUnknownWord(position)
-* Base::ProcessOneUnknownWord()
-*
-*/
-
-void TranslationOptionCollection::ProcessUnknownWord()
-{
- const vector<DecodeGraph*>& decodeGraphList = StaticData::Instance().GetDecodeGraphs();
- size_t size = m_source.GetSize();
- // try to translation for coverage with no trans by expanding table limit
- for (size_t graphInd = 0 ; graphInd < decodeGraphList.size() ; graphInd++) {
- const DecodeGraph &decodeGraph = *decodeGraphList[graphInd];
- for (size_t pos = 0 ; pos < size ; ++pos) {
- TranslationOptionList &fullList = GetTranslationOptionList(pos, pos);
- size_t numTransOpt = fullList.size();
- if (numTransOpt == 0) {
- CreateTranslationOptionsForRange(decodeGraph, pos, pos, false, graphInd);
+ /** Force a creation of a translation option where there are none for a
+ * particular source position. ie. where a source word has not been
+ * translated, create a translation option by
+ * 1. not observing the table limits on phrase/generation tables
+ * 2. using the handler ProcessUnknownWord()
+ * Call this function once translation option collection has been filled with
+ * translation options
+ *
+ * This function calls for unknown words is complicated by the fact it must
+ * handle different input types. The call stack is
+ * Base::ProcessUnknownWord()
+ * Inherited::ProcessUnknownWord(position)
+ * Base::ProcessOneUnknownWord()
+ *
+ */
+
+ void
+ TranslationOptionCollection::
+ ProcessUnknownWord()
+ {
+ const vector<DecodeGraph*>& decodeGraphList
+ = StaticData::Instance().GetDecodeGraphs();
+ size_t size = m_source.GetSize();
+ // try to translation for coverage with no trans by expanding table limit
+ for (size_t graphInd = 0 ; graphInd < decodeGraphList.size() ; graphInd++) {
+ const DecodeGraph &decodeGraph = *decodeGraphList[graphInd];
+ for (size_t pos = 0 ; pos < size ; ++pos) {
+ TranslationOptionList* fullList = GetTranslationOptionList(pos, pos);
+ // size_t numTransOpt = fullList.size();
+ if (!fullList || fullList->size() == 0) {
+ CreateTranslationOptionsForRange(decodeGraph, pos, pos, false, graphInd);
+ }
}
}
- }
- bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
- // create unknown words for 1 word coverage where we don't have any trans options
- for (size_t pos = 0 ; pos < size ; ++pos) {
- TranslationOptionList &fullList = GetTranslationOptionList(pos, pos);
- if (fullList.size() == 0 || alwaysCreateDirectTranslationOption)
- ProcessUnknownWord(pos);
- }
-}
-
-/** special handling of ONE unknown words. Either add temporarily add word to translation table,
- * or drop the translation.
- * This function should be called by the ProcessOneUnknownWord() in the inherited class
- * At the moment, this unknown word handler is a bit of a hack, if copies over each factor from source
- * to target word, or uses the 'UNK' factor.
- * Ideally, this function should be in a class which can be expanded upon, for example,
- * to create a morphologically aware handler.
- *
- * \param sourceWord the unknown word
- * \param sourcePos
- * \param length length covered by this word (may be > 1 for lattice input)
- * \param inputScores a set of scores associated with unknown word (input scores from latties/CNs)
- */
-void TranslationOptionCollection::ProcessOneUnknownWord(const InputPath &inputPath,
- size_t sourcePos,
- size_t length,
- const ScorePair *inputScores)
-{
- const StaticData &staticData = StaticData::Instance();
- const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance();
- float unknownScore = FloorScore(TransformScore(0));
- const Word &sourceWord = inputPath.GetPhrase().GetWord(0);
-
- // hack. Once the OOV FF is a phrase table, get rid of this
- PhraseDictionary *firstPt = NULL;
- if (PhraseDictionary::GetColl().size() == 0) {
- firstPt = PhraseDictionary::GetColl()[0];
+ bool alwaysCreateDirectTranslationOption
+ = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
+ // create unknown words for 1 word coverage where we don't have any trans options
+ for (size_t pos = 0 ; pos < size ; ++pos) {
+ TranslationOptionList* fullList = GetTranslationOptionList(pos, pos);
+ if (!fullList || fullList->size() == 0 || alwaysCreateDirectTranslationOption)
+ ProcessUnknownWord(pos);
+ }
}
- // unknown word, add as trans opt
- FactorCollection &factorCollection = FactorCollection::Instance();
-
- size_t isDigit = 0;
-
- const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
- const StringPiece s = f->GetString();
- bool isEpsilon = (s=="" || s==EPSILON);
- if (StaticData::Instance().GetDropUnknown()) {
-
-
- isDigit = s.find_first_of("0123456789");
- if (isDigit == string::npos)
- isDigit = 0;
- else
- isDigit = 1;
- // modify the starting bitmap
- }
+ /** special handling of ONE unknown words. Either add temporarily add word to
+ * translation table, or drop the translation. This function should be
+ * called by the ProcessOneUnknownWord() in the inherited class At the
+ * moment, this unknown word handler is a bit of a hack, if copies over
+ * each factor from source to target word, or uses the 'UNK' factor.
+ * Ideally, this function should be in a class which can be expanded
+ * upon, for example, to create a morphologically aware handler.
+ *
+ * \param sourceWord the unknown word
+ * \param sourcePos
+ * \param length length covered by this word (may be > 1 for lattice input)
+ * \param inputScores a set of scores associated with unknown word (input scores from latties/CNs)
+ */
+ void
+ TranslationOptionCollection::
+ ProcessOneUnknownWord(const InputPath &inputPath, size_t sourcePos,
+ size_t length, const ScorePair *inputScores)
+ {
+ const StaticData &staticData = StaticData::Instance();
+ const UnknownWordPenaltyProducer&
+ unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance();
+ float unknownScore = FloorScore(TransformScore(0));
+ const Word &sourceWord = inputPath.GetPhrase().GetWord(0);
+
+ // hack. Once the OOV FF is a phrase table, get rid of this
+ PhraseDictionary *firstPt = NULL;
+ if (PhraseDictionary::GetColl().size() == 0) {
+ firstPt = PhraseDictionary::GetColl()[0];
+ }
- TargetPhrase targetPhrase(firstPt);
+ // unknown word, add as trans opt
+ FactorCollection &factorCollection = FactorCollection::Instance();
- if (!(staticData.GetDropUnknown() || isEpsilon) || isDigit) {
- // add to dictionary
+ size_t isDigit = 0;
- Word &targetWord = targetPhrase.AddWord();
- targetWord.SetIsOOV(true);
+ const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
+ const StringPiece s = f->GetString();
+ bool isEpsilon = (s=="" || s==EPSILON);
+ if (StaticData::Instance().GetDropUnknown()) {
- for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
- FactorType factorType = static_cast<FactorType>(currFactor);
- const Factor *sourceFactor = sourceWord[currFactor];
- if (sourceFactor == NULL)
- targetWord[factorType] = factorCollection.AddFactor(UNKNOWN_FACTOR);
+ isDigit = s.find_first_of("0123456789");
+ if (isDigit == string::npos)
+ isDigit = 0;
else
- targetWord[factorType] = factorCollection.AddFactor(sourceFactor->GetString());
+ isDigit = 1;
+ // modify the starting bitmap
}
- //create a one-to-one alignment between UNKNOWN_FACTOR and its verbatim translation
- targetPhrase.SetAlignmentInfo("0-0");
+ TargetPhrase targetPhrase(firstPt);
- } else {
- // drop source word. create blank trans opt
+ if (!(staticData.GetDropUnknown() || isEpsilon) || isDigit) {
+ // add to dictionary
- //targetPhrase.SetAlignment();
+ Word &targetWord = targetPhrase.AddWord();
+ targetWord.SetIsOOV(true);
- }
-
- targetPhrase.GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
+ for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
+ FactorType factorType = static_cast<FactorType>(currFactor);
- // source phrase
- const Phrase &sourcePhrase = inputPath.GetPhrase();
- m_unksrcs.push_back(&sourcePhrase);
- WordsRange range(sourcePos, sourcePos + length - 1);
+ const Factor *sourceFactor = sourceWord[currFactor];
+ if (sourceFactor == NULL)
+ targetWord[factorType] = factorCollection.AddFactor(UNKNOWN_FACTOR);
+ else
+ targetWord[factorType] = factorCollection.AddFactor(sourceFactor->GetString());
+ }
+ //create a one-to-one alignment between UNKNOWN_FACTOR and its verbatim translation
- targetPhrase.EvaluateInIsolation(sourcePhrase);
+ targetPhrase.SetAlignmentInfo("0-0");
+
+ }
+
+ targetPhrase.GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
- TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
- transOpt->SetInputPath(inputPath);
- Add(transOpt);
+ // source phrase
+ const Phrase &sourcePhrase = inputPath.GetPhrase();
+ m_unksrcs.push_back(&sourcePhrase);
+ WordsRange range(sourcePos, sourcePos + length - 1);
+ targetPhrase.EvaluateInIsolation(sourcePhrase);
-}
+ TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
+ transOpt->SetInputPath(inputPath);
+ Add(transOpt);
-/** compute future score matrix in a dynamic programming fashion.
- * This matrix used in search.
- * Call this function once translation option collection has been filled with translation options
-*/
-void TranslationOptionCollection::CalcFutureScore()
-{
- // setup the matrix (ignore lower triangle, set upper triangle to -inf
- size_t size = m_source.GetSize(); // the width of the matrix
- for(size_t row=0; row<size; row++) {
- for(size_t col=row; col<size; col++) {
- m_futureScore.SetScore(row, col, -numeric_limits<float>::infinity());
- }
}
- // walk all the translation options and record the cheapest option for each span
- for (size_t startPos = 0 ; startPos < size ; ++startPos) {
- size_t maxSize = m_source.GetSize() - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
-
- for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) {
- TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
-
- TranslationOptionList::const_iterator iterTransOpt;
- for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt) {
- const TranslationOption &transOpt = **iterTransOpt;
- float score = transOpt.GetFutureScore();
- if (score > m_futureScore.GetScore(startPos, endPos))
- m_futureScore.SetScore(startPos, endPos, score);
+ /** compute future score matrix in a dynamic programming fashion.
+ * This matrix used in search.
+ * Call this function once translation option collection has been filled with translation options
+ */
+ void
+ TranslationOptionCollection::
+ CalcFutureScore()
+ {
+ // setup the matrix (ignore lower triangle, set upper triangle to -inf
+ size_t size = m_source.GetSize(); // the width of the matrix
+
+ for(size_t row=0; row < size; row++) {
+ for(size_t col=row; col<size; col++) {
+ m_futureScore.SetScore(row, col, -numeric_limits<float>::infinity());
}
}
- }
- // now fill all the cells in the strictly upper triangle
- // there is no way to modify the diagonal now, in the case
- // where no translation option covers a single-word span,
- // we leave the +inf in the matrix
- // like in chart parsing we want each cell to contain the highest score
- // of the full-span trOpt or the sum of scores of joining two smaller spans
-
- for(size_t colstart = 1; colstart < size ; colstart++) {
- for(size_t diagshift = 0; diagshift < size-colstart ; diagshift++) {
- size_t startPos = diagshift;
- size_t endPos = colstart+diagshift;
- for(size_t joinAt = startPos; joinAt < endPos ; joinAt++) {
- float joinedScore = m_futureScore.GetScore(startPos, joinAt)
- + m_futureScore.GetScore(joinAt+1, endPos);
- /* // uncomment to see the cell filling scheme
- TRACE_ERR( "[" <<startPos<<","<<endPos<<"] <-? ["<<startPos<<","<<joinAt<<"]+["<<joinAt+1<<","<<endPos
- << "] (colstart: "<<colstart<<", diagshift: "<<diagshift<<")"<<endl);
- */
- if (joinedScore > m_futureScore.GetScore(startPos, endPos))
- m_futureScore.SetScore(startPos, endPos, joinedScore);
+ // walk all the translation options and record the cheapest option for each span
+ for (size_t sPos = 0 ; sPos < size ; ++sPos)
+ {
+ size_t ePos = sPos;
+ BOOST_FOREACH(TranslationOptionList& tol, m_collection[sPos])
+ {
+ TranslationOptionList::const_iterator toi;
+ for(toi = tol.begin() ; toi != tol.end() ; ++toi) {
+ const TranslationOption& to = **toi;
+ float score = to.GetFutureScore();
+ if (score > m_futureScore.GetScore(sPos, ePos))
+ m_futureScore.SetScore(sPos, ePos, score);
+ }
+ ++ePos;
+ }
}
- }
- }
-
- IFVERBOSE(3) {
- int total = 0;
- for(size_t row=0; row<size; row++) {
- size_t maxSize = size - row;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
- for(size_t col=row; col<row+maxSize; col++) {
- int count = GetTranslationOptionList(row, col).size();
- TRACE_ERR( "translation options spanning from "
- << row <<" to "<< col <<" is "
- << count <<endl);
- total += count;
+ // now fill all the cells in the strictly upper triangle
+ // there is no way to modify the diagonal now, in the case
+ // where no translation option covers a single-word span,
+ // we leave the +inf in the matrix
+ // like in chart parsing we want each cell to contain the highest score
+ // of the full-span trOpt or the sum of scores of joining two smaller spans
+
+ for(size_t colstart = 1; colstart < size ; colstart++) {
+ for(size_t diagshift = 0; diagshift < size-colstart ; diagshift++) {
+ size_t sPos = diagshift;
+ size_t ePos = colstart+diagshift;
+ for(size_t joinAt = sPos; joinAt < ePos ; joinAt++) {
+ float joinedScore = m_futureScore.GetScore(sPos, joinAt)
+ + m_futureScore.GetScore(joinAt+1, ePos);
+ // uncomment to see the cell filling scheme
+ // TRACE_ERR("[" << sPos << "," << ePos << "] <-? ["
+ // << sPos << "," << joinAt << "]+["
+ // << joinAt+1 << "," << ePos << "] (colstart: "
+ // << colstart << ", diagshift: " << diagshift << ")"
+ // << endl);
+
+ if (joinedScore > m_futureScore.GetScore(sPos, ePos))
+ m_futureScore.SetScore(sPos, ePos, joinedScore);
+ }
}
}
- TRACE_ERR( "translation options generated in total: "<< total << endl);
-
- for(size_t row=0; row<size; row++)
- for(size_t col=row; col<size; col++)
- TRACE_ERR( "future cost from "<< row <<" to "<< col <<" is "<< m_futureScore.GetScore(row, col) <<endl);
+
+ IFVERBOSE(3)
+ {
+ int total = 0;
+ for(size_t row = 0; row < size; row++)
+ {
+ size_t col = row;
+ BOOST_FOREACH(TranslationOptionList& tol, m_collection[row])
+ {
+ // size_t maxSize = size - row;
+ // size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ // maxSize = std::min(maxSize, maxSizePhrase);
+
+ // for(size_t col=row; col<row+maxSize; col++) {
+ int count = tol.size();
+ TRACE_ERR( "translation options spanning from "
+ << row <<" to "<< col <<" is "
+ << count <<endl);
+ total += count;
+ ++col;
+ }
+ }
+ TRACE_ERR( "translation options generated in total: "<< total << endl);
+
+ for(size_t row=0; row<size; row++)
+ for(size_t col=row; col<size; col++)
+ TRACE_ERR( "future cost from "<< row <<" to "<< col <<" is "
+ << m_futureScore.GetScore(row, col) <<endl);
+ }
}
-}
-
-
-
-/** Create all possible translations from the phrase tables
- * for a particular input sentence. This implies applying all
- * translation and generation steps. Also computes future cost matrix.
- */
-void TranslationOptionCollection::CreateTranslationOptions()
-{
- // loop over all substrings of the source sentence, look them up
- // in the phraseDictionary (which is the- possibly filtered-- phrase
- // table loaded on initialization), generate TranslationOption objects
- // for all phrases
- // there may be multiple decoding graphs (factorizations of decoding)
- const vector <DecodeGraph*> &decodeGraphList = StaticData::Instance().GetDecodeGraphs();
- // length of the sentence
- const size_t size = m_source.GetSize();
- // loop over all decoding graphs, each generates translation options
- for (size_t graphInd = 0 ; graphInd < decodeGraphList.size() ; graphInd++) {
- if (decodeGraphList.size() > 1) {
- VERBOSE(3,"Creating translation options from decoding graph " << graphInd << endl);
- }
-
- const DecodeGraph &decodeGraph = *decodeGraphList[graphInd];
- size_t backoff = decodeGraph.GetBackoff();
- // generate phrases that start at startPos ...
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() graphInd:" << graphInd << endl);
- for (size_t startPos = 0 ; startPos < size; startPos++) {
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() startPos:" << startPos << endl);
- size_t maxSize = size - startPos; // don't go over end of sentence
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
-
- // ... and that end at endPos
- for (size_t endPos = startPos ; endPos < startPos + maxSize ; endPos++) {
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() endPos:" << endPos << endl);
- if (graphInd > 0 && // only skip subsequent graphs
- backoff != 0 && // use of backoff specified
- (endPos-startPos+1 > backoff || // size exceeds backoff limit or ...
- m_collection[startPos][endPos-startPos].size() > 0)) { // no phrases found so far
- VERBOSE(3,"No backoff to graph " << graphInd << " for span [" << startPos << ";" << endPos << "]" << endl);
- // do not create more options
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() continue:" << endl);
- continue;
- }
-
- // create translation options for that range
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() before CreateTranslationOptionsForRange" << endl);
- CreateTranslationOptionsForRange( decodeGraph, startPos, endPos, true, graphInd);
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptions() after CreateTranslationOptionsForRange" << endl);
+ /** Create all possible translations from the phrase tables
+ * for a particular input sentence. This implies applying all
+ * translation and generation steps. Also computes future cost matrix.
+ */
+ void
+ TranslationOptionCollection::
+ CreateTranslationOptions()
+ {
+ // loop over all substrings of the source sentence, look them up
+ // in the phraseDictionary (which is the- possibly filtered-- phrase
+ // table loaded on initialization), generate TranslationOption objects
+ // for all phrases
+
+ // there may be multiple decoding graphs (factorizations of decoding)
+ const vector <DecodeGraph*> &decodeGraphList
+ = StaticData::Instance().GetDecodeGraphs();
+
+ // length of the sentence
+ const size_t size = m_source.GetSize();
+
+ // loop over all decoding graphs, each generates translation options
+ for (size_t gidx = 0 ; gidx < decodeGraphList.size() ; gidx++)
+ {
+ if (decodeGraphList.size() > 1)
+ VERBOSE(3,"Creating translation options from decoding graph " << gidx << endl);
+
+ const DecodeGraph& dg = *decodeGraphList[gidx];
+ size_t backoff = dg.GetBackoff();
+ // iterate over spans
+ for (size_t sPos = 0 ; sPos < size; sPos++)
+ {
+ size_t maxSize = size - sPos; // don't go over end of sentence
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = std::min(maxSize, maxSizePhrase);
+
+ for (size_t ePos = sPos ; ePos < sPos + maxSize ; ePos++)
+ {
+ if (gidx && backoff &&
+ (ePos-sPos+1 <= backoff || // size exceeds backoff limit (HUH? UG) or ...
+ m_collection[sPos][ePos-sPos].size() > 0))
+ {
+ VERBOSE(3,"No backoff to graph " << gidx << " for span [" << sPos << ";" << ePos << "]" << endl);
+ continue;
+ }
+ CreateTranslationOptionsForRange(dg, sPos, ePos, true, gidx);
+ }
+ }
}
- }
+ VERBOSE(3,"Translation Option Collection\n " << *this << endl);
+ ProcessUnknownWord();
+ EvaluateWithSourceContext();
+ Prune();
+ Sort();
+ CalcFutureScore(); // future score matrix
+ CacheLexReordering(); // Cached lex reodering costs
}
- VERBOSE(3,"Translation Option Collection\n " << *this << endl);
-
- ProcessUnknownWord();
- EvaluateWithSourceContext();
-
- // Prune
- Prune();
-
- Sort();
-
- // future score matrix
- CalcFutureScore();
-
- // Cached lex reodering costs
- CacheLexReordering();
-}
-
-void TranslationOptionCollection::CreateTranslationOptionsForRange(
- const DecodeGraph &decodeGraph
- , size_t startPos
- , size_t endPos
- , bool adhereTableLimit
- , size_t graphInd
- , InputPath &inputPath)
-{
-//VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() START startPos:" << startPos << " endPos:" << endPos << endl);
- if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) {
-
- // partial trans opt stored in here
- PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
- size_t totalEarlyPruned = 0;
-
- // initial translation step
- list <const DecodeStep* >::const_iterator iterStep = decodeGraph.begin();
- const DecodeStep &decodeStep = **iterStep;
-
- const PhraseDictionary &phraseDictionary = *decodeStep.GetPhraseDictionaryFeature();
- const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(phraseDictionary);
-
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() before ProcessInitialTranslation" << endl);
- static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation
- (m_source, *oldPtoc
- , startPos, endPos, adhereTableLimit
- , inputPath, targetPhrases);
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() after ProcessInitialTranslation" << endl);
-
- SetInputScore(inputPath, *oldPtoc);
-
- // do rest of decode steps
- int indexStep = 0;
-
- for (++iterStep ; iterStep != decodeGraph.end() ; ++iterStep) {
-
- const DecodeStep *decodeStep = *iterStep;
- PartialTranslOptColl* newPtoc = new PartialTranslOptColl;
-
- // go thru each intermediate trans opt just created
- const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();
- vector<TranslationOption*>::const_iterator iterPartialTranslOpt;
- for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt) {
- TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
-
- if (const DecodeStepTranslation *translateStep = dynamic_cast<const DecodeStepTranslation*>(decodeStep) ) {
- const PhraseDictionary &phraseDictionary = *translateStep->GetPhraseDictionaryFeature();
- const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(phraseDictionary);
- translateStep->Process(inputPartialTranslOpt
- , *decodeStep
- , *newPtoc
- , this
- , adhereTableLimit
- , targetPhrases);
- } else {
- const DecodeStepGeneration *genStep = dynamic_cast<const DecodeStepGeneration*>(decodeStep);
- assert(genStep);
- genStep->Process(inputPartialTranslOpt
- , *decodeStep
- , *newPtoc
- , this
- , adhereTableLimit);
- }
+ bool
+ TranslationOptionCollection::
+ CreateTranslationOptionsForRange
+ (const DecodeGraph& dgraph, size_t sPos, size_t ePos,
+ bool adhereTableLimit, size_t gidx, InputPath &inputPath)
+ {
+ typedef DecodeStepTranslation Tstep;
+ typedef DecodeStepGeneration Gstep;
+ if ((StaticData::Instance().GetXmlInputType() != XmlExclusive)
+ || !HasXmlOptionsOverlappingRange(sPos,ePos))
+ {
+
+ // partial trans opt stored in here
+ PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
+ size_t totalEarlyPruned = 0;
+
+ // initial translation step
+ list <const DecodeStep* >::const_iterator d = dgraph.begin();
+ const DecodeStep &dstep = **d;
+
+ const PhraseDictionary &pdict = *dstep.GetPhraseDictionaryFeature();
+ const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict);
+
+ static_cast<const Tstep&>(dstep).ProcessInitialTranslation
+ (m_source, *oldPtoc, sPos, ePos, adhereTableLimit, inputPath, targetPhrases);
+
+ SetInputScore(inputPath, *oldPtoc);
+
+ // do rest of decode steps
+ int indexStep = 0;
+
+ for (++d ; d != dgraph.end() ; ++d)
+ {
+ const DecodeStep *dstep = *d;
+ PartialTranslOptColl* newPtoc = new PartialTranslOptColl;
+
+ // go thru each intermediate trans opt just created
+ const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList();
+ vector<TranslationOption*>::const_iterator pto;
+ for (pto = partTransOptList.begin() ; pto != partTransOptList.end() ; ++pto)
+ {
+ TranslationOption &inputPartialTranslOpt = **pto;
+ if (const Tstep *tstep = dynamic_cast<const Tstep*>(dstep))
+ {
+ const PhraseDictionary &pdict = *tstep->GetPhraseDictionaryFeature();
+ const TargetPhraseCollection *targetPhrases = inputPath.GetTargetPhrases(pdict);
+ tstep->Process(inputPartialTranslOpt, *dstep, *newPtoc,
+ this, adhereTableLimit, targetPhrases);
+ }
+ else
+ {
+ const Gstep *genStep = dynamic_cast<const Gstep*>(dstep);
+ UTIL_THROW_IF2(!genStep, "Decode steps must be either "
+ << "Translation or Generation Steps!");
+ genStep->Process(inputPartialTranslOpt, *dstep, *newPtoc,
+ this, adhereTableLimit);
+ }
+ }
+
+ // last but 1 partial trans not required anymore
+ totalEarlyPruned += newPtoc->GetPrunedCount();
+ delete oldPtoc;
+ oldPtoc = newPtoc;
+
+ indexStep++;
+ } // for (++d
+
+ // add to fully formed translation option list
+ PartialTranslOptColl &lastPartialTranslOptColl = *oldPtoc;
+ const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();
+ vector<TranslationOption*>::const_iterator c;
+ for (c = partTransOptList.begin() ; c != partTransOptList.end() ; ++c)
+ {
+ TranslationOption *transOpt = *c;
+ if (StaticData::Instance().GetXmlInputType() != XmlConstraint
+ || !ViolatesXmlOptionsConstraint(sPos,ePos,transOpt))
+ {
+ Add(transOpt);
+ }
+ }
+ lastPartialTranslOptColl.DetachAll();
+ totalEarlyPruned += oldPtoc->GetPrunedCount();
+ delete oldPtoc;
+ // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
+ } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(sPos,ePos))
+
+ if (gidx == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough
+ && HasXmlOptionsOverlappingRange(sPos,ePos))
+ {
+ CreateXmlOptionsForRange(sPos, ePos);
}
- // last but 1 partial trans not required anymore
- totalEarlyPruned += newPtoc->GetPrunedCount();
- delete oldPtoc;
- oldPtoc = newPtoc;
-
- indexStep++;
- } // for (++iterStep
-
- // add to fully formed translation option list
- PartialTranslOptColl &lastPartialTranslOptColl = *oldPtoc;
- const vector<TranslationOption*>& partTransOptList = lastPartialTranslOptColl.GetList();
- vector<TranslationOption*>::const_iterator iterColl;
- for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl) {
- TranslationOption *transOpt = *iterColl;
- if (StaticData::Instance().GetXmlInputType() != XmlConstraint || !ViolatesXmlOptionsConstraint(startPos,endPos,transOpt)) {
- Add(transOpt);
- }
- }
-
- lastPartialTranslOptColl.DetachAll();
- totalEarlyPruned += oldPtoc->GetPrunedCount();
- delete oldPtoc;
- // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
- } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
-
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() before CreateXmlOptionsForRange" << endl);
- if (graphInd == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough && HasXmlOptionsOverlappingRange(startPos,endPos)) {
- CreateXmlOptionsForRange(startPos, endPos);
+ return true;
}
-// VERBOSE(1,"TranslationOptionCollection::CreateTranslationOptionsForRange() after CreateXmlOptionsForRange" << endl);
-}
-void TranslationOptionCollection::SetInputScore(const InputPath &inputPath, PartialTranslOptColl &oldPtoc)
-{
- const ScorePair *inputScore = inputPath.GetInputScore();
- if (inputScore == NULL) {
- return;
+ void
+ TranslationOptionCollection::
+ SetInputScore(const InputPath &inputPath, PartialTranslOptColl &oldPtoc)
+ {
+ const ScorePair* inputScore = inputPath.GetInputScore();
+ if (inputScore == NULL) return;
+
+ const InputFeature &inputFeature = InputFeature::Instance();
+
+ const std::vector<TranslationOption*> &transOpts = oldPtoc.GetList();
+ for (size_t i = 0; i < transOpts.size(); ++i) {
+ TranslationOption &transOpt = *transOpts[i];
+
+ ScoreComponentCollection &scores = transOpt.GetScoreBreakdown();
+ scores.PlusEquals(&inputFeature, *inputScore);
+
+ }
}
- const InputFeature &inputFeature = InputFeature::Instance();
-
- const std::vector<TranslationOption*> &transOpts = oldPtoc.GetList();
- for (size_t i = 0; i < transOpts.size(); ++i) {
- TranslationOption &transOpt = *transOpts[i];
-
- ScoreComponentCollection &scores = transOpt.GetScoreBreakdown();
- scores.PlusEquals(&inputFeature, *inputScore);
-
+ void
+ TranslationOptionCollection::
+ EvaluateWithSourceContext()
+ {
+ const size_t size = m_source.GetSize();
+ for (size_t sPos = 0 ; sPos < size ; ++sPos)
+ {
+ BOOST_FOREACH(TranslationOptionList& tol, m_collection[sPos])
+ {
+ typedef TranslationOptionList::const_iterator to_iter;
+ for(to_iter i = tol.begin() ; i != tol.end() ; ++i)
+ (*i)->EvaluateWithSourceContext(m_source);
+ }
+ }
}
-}
-
-void TranslationOptionCollection::EvaluateWithSourceContext()
-{
- const size_t size = m_source.GetSize();
-
- for (size_t startPos = 0 ; startPos < size ; ++startPos) {
- size_t maxSize = m_source.GetSize() - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
-
- for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) {
- TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
-
- TranslationOptionList::const_iterator iterTransOpt;
- for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt) {
- TranslationOption &transOpt = **iterTransOpt;
- transOpt.EvaluateWithSourceContext(m_source);
+
+ void
+ TranslationOptionCollection::
+ Sort()
+ {
+ static TranslationOption::Better cmp;
+ size_t size = m_source.GetSize();
+ for (size_t sPos = 0 ; sPos < size; ++sPos)
+ {
+ BOOST_FOREACH(TranslationOptionList& tol, m_collection.at(sPos))
+ {
+ // cerr << sPos << ": " << tol.size() << " "
+ // << __FILE__ << ":" << __LINE__ << endl;
+ // size_t nulls=0;
+ // BOOST_FOREACH(TranslationOption const* t, tol)
+ // if (t == NULL) ++nulls;
+ // cerr << nulls << " null pointers ;"
+ // << __FILE__ << ":" << __LINE__ << endl;
+ std::sort(tol.begin(), tol.end(), cmp);
+ }
}
-
- EvaluateTranslatonOptionListWithSourceContext(transOptList);
- }
}
-}
-
-void TranslationOptionCollection::EvaluateTranslatonOptionListWithSourceContext(
- TranslationOptionList &translationOptionList)
-{
-
- const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
- const StaticData &staticData = StaticData::Instance();
- for (size_t i = 0; i < ffs.size(); ++i) {
- const FeatureFunction &ff = *ffs[i];
- if (! staticData.IsFeatureFunctionIgnored(ff)) {
- ff.EvaluateTranslationOptionListWithSourceContext(m_source, translationOptionList);
- }
+
+ /** Check if this range overlaps with any XML options. This doesn't need to be an exact match, only an overlap.
+ * by default, we don't support XML options. subclasses need to override this function.
+ * called by CreateTranslationOptionsForRange()
+ * \param sPos first position in input sentence
+ * \param lastPos last position in input sentence
+ */
+ bool
+ TranslationOptionCollection::
+ HasXmlOptionsOverlappingRange(size_t, size_t) const
+ { return false; }
+
+ /** Check if an option conflicts with any constraint XML options. Okay, if XML option is substring in source and target.
+ * by default, we don't support XML options. subclasses need to override this function.
+ * called by CreateTranslationOptionsForRange()
+ * \param sPos first position in input sentence
+ * \param lastPos last position in input sentence
+ */
+ bool
+ TranslationOptionCollection::
+ ViolatesXmlOptionsConstraint(size_t, size_t, TranslationOption*) const
+ { return false; }
+
+ /** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing.
+ * called by CreateTranslationOptionsForRange()
+ * \param sPos first position in input sentence
+ * \param lastPos last position in input sentence
+ */
+ void
+ TranslationOptionCollection::
+ CreateXmlOptionsForRange(size_t, size_t)
+ { }
+
+
+ /** Add translation option to the list
+ * \param translationOption translation option to be added */
+ void
+ TranslationOptionCollection::
+ Add(TranslationOption *translationOption)
+ {
+ const WordsRange &coverage = translationOption->GetSourceWordsRange();
+ size_t const s = coverage.GetStartPos();
+ size_t const e = coverage.GetEndPos();
+ size_t const i = e - s;
+
+ UTIL_THROW_IF2(e >= m_source.GetSize(),
+ "Coverage exceeds input size:" << coverage << "\n"
+ << "translationOption=" << *translationOption);
+
+ vector<TranslationOptionList>& v = m_collection[s];
+ while (i >= v.size()) v.push_back(TranslationOptionList());
+ v[i].Add(translationOption);
}
-
-}
-
-void TranslationOptionCollection::Sort()
-{
- size_t size = m_source.GetSize();
- for (size_t startPos = 0 ; startPos < size; ++startPos) {
- size_t maxSize = size - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
-
- for (size_t endPos = startPos ; endPos < startPos + maxSize; ++endPos) {
- TranslationOptionList &transOptList = GetTranslationOptionList(startPos, endPos);
- std::sort(transOptList.begin(), transOptList.end(), CompareTranslationOption);
- }
+
+ TO_STRING_BODY(TranslationOptionCollection);
+
+ std::ostream&
+ operator<<(std::ostream& out, const TranslationOptionCollection& coll)
+ {
+ size_t stop = coll.m_source.GetSize();
+ TranslationOptionList const* tol;
+ for (size_t sPos = 0 ; sPos < stop ; ++sPos)
+ {
+ for (size_t ePos = sPos;
+ (tol = coll.GetTranslationOptionList(sPos, ePos)) != NULL;
+ ++ePos)
+ {
+ BOOST_FOREACH(TranslationOption const* to, *tol)
+ out << *to << std::endl;
+ }
+ }
+ return out;
}
-}
-
-/** Check if this range overlaps with any XML options. This doesn't need to be an exact match, only an overlap.
- * by default, we don't support XML options. subclasses need to override this function.
- * called by CreateTranslationOptionsForRange()
- * \param startPos first position in input sentence
- * \param lastPos last position in input sentence
- */
-bool TranslationOptionCollection::HasXmlOptionsOverlappingRange(size_t, size_t) const
-{
- return false;
- //not implemented for base class
-}
-
-/** Check if an option conflicts with any constraint XML options. Okay, if XML option is substring in source and target.
- * by default, we don't support XML options. subclasses need to override this function.
- * called by CreateTranslationOptionsForRange()
- * \param startPos first position in input sentence
- * \param lastPos last position in input sentence
- */
-bool TranslationOptionCollection::ViolatesXmlOptionsConstraint(size_t, size_t, TranslationOption *) const
-{
- return false;
- //not implemented for base class
-}
-
-/** Populates the current Collection with XML options exactly covering the range specified. Default implementation does nothing.
- * called by CreateTranslationOptionsForRange()
- * \param startPos first position in input sentence
- * \param lastPos last position in input sentence
- */
-void TranslationOptionCollection::CreateXmlOptionsForRange(size_t, size_t)
-{
- //not implemented for base class
-};
-
-
-/** Add translation option to the list
- * \param translationOption translation option to be added */
-void TranslationOptionCollection::Add(TranslationOption *translationOption)
-{
- const WordsRange &coverage = translationOption->GetSourceWordsRange();
-
- if (coverage.GetEndPos() - coverage.GetStartPos() >= m_collection[coverage.GetStartPos()].size()) {
- cerr << "translationOption=" << *translationOption << endl;
- cerr << "coverage=" << coverage << endl;
+
+ void
+ TranslationOptionCollection::
+ CacheLexReordering()
+ {
+ typedef StatefulFeatureFunction sfFF;
+ std::vector<const sfFF*> const& all_sfff
+ = sfFF::GetStatefulFeatureFunctions();
+ size_t const stop = m_source.GetSize();
+
+ BOOST_FOREACH(sfFF const* ff, all_sfff)
+ {
+ if (typeid(*ff) != typeid(LexicalReordering)) continue;
+ LexicalReordering const& lr = static_cast<const LexicalReordering&>(*ff);
+ for (size_t s = 0 ; s < stop ; s++)
+ {
+ BOOST_FOREACH(TranslationOptionList const& tol, m_collection[s])
+ {
+ BOOST_FOREACH(TranslationOption* to, tol)
+ {
+ Phrase const& sphrase = to->GetInputPath().GetPhrase();
+ Phrase const& tphrase = to->GetTargetPhrase();
+ Scores score = lr.GetProb(sphrase,tphrase);
+ if (!score.empty()) to->CacheLexReorderingScores(lr, score);
+ }
+ }
+ }
+ }
}
-
- UTIL_THROW_IF2(coverage.GetEndPos() - coverage.GetStartPos() >= m_collection[coverage.GetStartPos()].size(),
- "Out of bound access: " << coverage);
- m_collection[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()].Add(translationOption);
-}
-
-TO_STRING_BODY(TranslationOptionCollection);
-
-std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll)
-{
- size_t size = coll.m_source.GetSize();
- for (size_t startPos = 0 ; startPos < size ; ++startPos) {
- size_t maxSize = size - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
-
- for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) {
- const TranslationOptionList& fullList = coll.GetTranslationOptionList(startPos, endPos);
- size_t sizeFull = fullList.size();
- for (size_t i = 0; i < sizeFull; i++) {
- out << *fullList.Get(i) << std::endl;
- }
- }
+
+ //! list of trans opt for a particular span
+ TranslationOptionList*
+ TranslationOptionCollection::
+ GetTranslationOptionList(size_t const sPos, size_t const ePos)
+ {
+ UTIL_THROW_IF2(sPos >= m_collection.size(), "Out of bound access.");
+ vector<TranslationOptionList>& tol = m_collection[sPos];
+ size_t idx = ePos - sPos;
+ return idx < tol.size() ? &tol[idx] : NULL;
}
-
- //std::vector< std::vector< TranslationOptionList > >::const_iterator i = coll.m_collection.begin();
- //size_t j = 0;
- //for (; i!=coll.m_collection.end(); ++i) {
- //out << "s[" << j++ << "].size=" << i->size() << std::endl;
- //}
-
- return out;
-}
-
-void TranslationOptionCollection::CacheLexReordering()
-{
- size_t size = m_source.GetSize();
-
- const std::vector<const StatefulFeatureFunction*> &ffs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
- std::vector<const StatefulFeatureFunction*>::const_iterator iter;
- for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
- const StatefulFeatureFunction &ff = **iter;
- if (typeid(ff) == typeid(LexicalReordering)) {
- const LexicalReordering &lexreordering = static_cast<const LexicalReordering&>(ff);
- for (size_t startPos = 0 ; startPos < size ; startPos++) {
- size_t maxSize = size - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
-
- for (size_t endPos = startPos ; endPos < startPos + maxSize; endPos++) {
- TranslationOptionList &transOptList = GetTranslationOptionList( startPos, endPos);
- TranslationOptionList::iterator iterTransOpt;
- for(iterTransOpt = transOptList.begin() ; iterTransOpt != transOptList.end() ; ++iterTransOpt) {
- TranslationOption &transOpt = **iterTransOpt;
- //Phrase sourcePhrase = m_source.GetSubString(WordsRange(startPos,endPos));
- const Phrase &sourcePhrase = transOpt.GetInputPath().GetPhrase();
- Scores score = lexreordering.GetProb(sourcePhrase
- , transOpt.GetTargetPhrase());
- if (!score.empty())
- transOpt.CacheLexReorderingScores(lexreordering, score);
- } // for(iterTransOpt
- } // for (size_t endPos = startPos ; endPos < startPos + maxSize; endPos++) {
- } // for (size_t startPos = 0 ; startPos < size ; startPos++) {
- } // if (typeid(ff) == typeid(LexicalReordering)) {
- } // for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
-}
-
-//! list of trans opt for a particular span
-TranslationOptionList &TranslationOptionCollection::GetTranslationOptionList(size_t startPos, size_t endPos)
-{
- size_t maxSize = endPos - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
-
- UTIL_THROW_IF2(maxSize >= m_collection[startPos].size(),
- "Out of bound access: " << maxSize);
-
- return m_collection[startPos][maxSize];
-}
-const TranslationOptionList &TranslationOptionCollection::GetTranslationOptionList(size_t startPos, size_t endPos) const
-{
- size_t maxSize = endPos - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = std::min(maxSize, maxSizePhrase);
-
- UTIL_THROW_IF2(maxSize >= m_collection[startPos].size(),
- "Out of bound access: " << maxSize);
- return m_collection[startPos][maxSize];
-}
-
-void TranslationOptionCollection::GetTargetPhraseCollectionBatch()
-{
- const vector <DecodeGraph*> &decodeGraphList = StaticData::Instance().GetDecodeGraphs();
- for (size_t graphInd = 0 ; graphInd < decodeGraphList.size() ; graphInd++) {
- const DecodeGraph &decodeGraph = *decodeGraphList[graphInd];
-
- list <const DecodeStep* >::const_iterator iterStep;
- for (iterStep = decodeGraph.begin(); iterStep != decodeGraph.end() ; ++iterStep) {
- const DecodeStep &decodeStep = **iterStep;
- const DecodeStepTranslation *transStep = dynamic_cast<const DecodeStepTranslation *>(&decodeStep);
- if (transStep) {
- const PhraseDictionary &phraseDictionary = *transStep->GetPhraseDictionaryFeature();
- phraseDictionary.GetTargetPhraseCollectionBatch(m_inputPathQueue);
+
+ TranslationOptionList const*
+ TranslationOptionCollection::
+ GetTranslationOptionList(size_t sPos, size_t ePos) const
+ {
+ UTIL_THROW_IF2(sPos >= m_collection.size(), "Out of bound access.");
+ vector<TranslationOptionList> const& tol = m_collection[sPos];
+ size_t idx = ePos - sPos;
+ return idx < tol.size() ? &tol[idx] : NULL;
+ }
+
+ void
+ TranslationOptionCollection::
+ GetTargetPhraseCollectionBatch()
+ {
+ typedef DecodeStepTranslation Tstep;
+ const vector <DecodeGraph*> &dgl = StaticData::Instance().GetDecodeGraphs();
+ BOOST_FOREACH(DecodeGraph const* dgraph, dgl)
+ {
+ typedef list <const DecodeStep* >::const_iterator dsiter;
+ for (dsiter i = dgraph->begin(); i != dgraph->end() ; ++i)
+ {
+ const Tstep* tstep = dynamic_cast<const Tstep *>(*i);
+ if (tstep)
+ {
+ const PhraseDictionary &pdict = *tstep->GetPhraseDictionaryFeature();
+ pdict.GetTargetPhraseCollectionBatch(m_inputPathQueue);
+ }
+ }
}
- }
}
-}
-
+
} // namespace
diff --git a/moses/TranslationOptionCollection.h b/moses/TranslationOptionCollection.h
index 2db0df34a..562912b18 100644
--- a/moses/TranslationOptionCollection.h
+++ b/moses/TranslationOptionCollection.h
@@ -1,3 +1,4 @@
+// -*- c++ -*-
// $Id$
/***********************************************************************
@@ -88,9 +89,17 @@ protected:
//! sort all trans opt in each list for cube pruning */
void Sort();
+public:
+ // is there any good reason not to make these public? UG
+
//! list of trans opt for a particular span
- TranslationOptionList &GetTranslationOptionList(size_t startPos, size_t endPos);
- const TranslationOptionList &GetTranslationOptionList(size_t startPos, size_t endPos) const;
+ TranslationOptionList*
+ GetTranslationOptionList(size_t startPos, size_t endPos);
+
+ TranslationOptionList const*
+ GetTranslationOptionList(size_t startPos, size_t endPos) const;
+
+protected:
void Add(TranslationOption *translationOption);
//! implemented by inherited class, called by this class
@@ -104,7 +113,7 @@ protected:
void GetTargetPhraseCollectionBatch();
- void CreateTranslationOptionsForRange(
+ bool CreateTranslationOptionsForRange(
const DecodeGraph &decodeGraph
, size_t startPos
, size_t endPos
@@ -129,15 +138,20 @@ public:
//! Create all possible translations from the phrase tables
virtual void CreateTranslationOptions();
- //! Create translation options that exactly cover a specific input span.
- virtual void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
- , size_t startPosition
- , size_t endPosition
- , bool adhereTableLimit
- , size_t graphInd) = 0;
+ //! Create translation options that exactly cover a specific input span.
+ virtual
+ bool
+ CreateTranslationOptionsForRange
+ (const DecodeGraph &decodeStepList,
+ size_t startPosition, size_t endPosition,
+ bool adhereTableLimit, size_t graphInd) = 0;
+
//!Check if this range has XML options
- virtual bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;
+ virtual
+ bool
+ HasXmlOptionsOverlappingRange(size_t startPosition,
+ size_t endPosition) const;
//! Check if a subsumed XML option constraint is satisfied
virtual bool ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const;
@@ -152,7 +166,9 @@ public:
}
//! list of trans opt for a particular span
- const TranslationOptionList &GetTranslationOptionList(const WordsRange &coverage) const {
+ TranslationOptionList const*
+ GetTranslationOptionList(const WordsRange &coverage) const
+ {
return GetTranslationOptionList(coverage.GetStartPos(), coverage.GetEndPos());
}
diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp
index e03f074b0..698cf51c2 100644
--- a/moses/TranslationOptionCollectionConfusionNet.cpp
+++ b/moses/TranslationOptionCollectionConfusionNet.cpp
@@ -177,52 +177,51 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptions()
* \param startPos first position in input sentence
* \param lastPos last position in input sentence
* \param adhereTableLimit whether phrase & generation table limits are adhered to
+ * \return true if there is at least one path for the range has matches
+ * in the source side of the parallel data, i.e., the phrase prefix exists
+ * (abortion condition for trie-based lookup if false)
*/
-void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRange(
- const DecodeGraph &decodeGraph
- , size_t startPos
- , size_t endPos
- , bool adhereTableLimit
- , size_t graphInd)
+bool
+TranslationOptionCollectionConfusionNet::
+CreateTranslationOptionsForRange(const DecodeGraph &decodeGraph,
+ size_t startPos, size_t endPos,
+ bool adhereTableLimit, size_t graphInd)
{
if (StaticData::Instance().GetUseLegacyPT()) {
- CreateTranslationOptionsForRangeLEGACY(decodeGraph, startPos, endPos, adhereTableLimit, graphInd);
+ return CreateTranslationOptionsForRangeLEGACY(decodeGraph, startPos, endPos,
+ adhereTableLimit, graphInd);
} else {
- CreateTranslationOptionsForRangeNew(decodeGraph, startPos, endPos, adhereTableLimit, graphInd);
+ return CreateTranslationOptionsForRangeNew(decodeGraph, startPos, endPos,
+ adhereTableLimit, graphInd);
}
}
-void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeNew(
- const DecodeGraph &decodeGraph
- , size_t startPos
- , size_t endPos
- , bool adhereTableLimit
- , size_t graphInd)
+bool
+TranslationOptionCollectionConfusionNet::
+CreateTranslationOptionsForRangeNew(const DecodeGraph &decodeGraph, size_t startPos,
+ size_t endPos, bool adhereTableLimit, size_t graphInd)
{
InputPathList &inputPathList = GetInputPathList(startPos, endPos);
+ if (inputPathList.size() == 0) return false; // no input path matches!
InputPathList::iterator iter;
for (iter = inputPathList.begin(); iter != inputPathList.end(); ++iter) {
InputPath &inputPath = **iter;
- TranslationOptionCollection::CreateTranslationOptionsForRange(decodeGraph
- , startPos
- , endPos
- , adhereTableLimit
- , graphInd
- , inputPath);
-
+ TranslationOptionCollection::CreateTranslationOptionsForRange
+ (decodeGraph, startPos, endPos, adhereTableLimit, graphInd, inputPath);
}
+ return true;
}
-void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLEGACY(
- const DecodeGraph &decodeGraph
- , size_t startPos
- , size_t endPos
- , bool adhereTableLimit
- , size_t graphInd)
+bool
+TranslationOptionCollectionConfusionNet::
+CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t startPos,
+ size_t endPos, bool adhereTableLimit, size_t graphInd)
{
- if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) {
+ bool retval = true;
+ XmlInputType intype = StaticData::Instance().GetXmlInputType();
+ if ((intype != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) {
InputPathList &inputPathList = GetInputPathList(startPos, endPos);
-
+
// partial trans opt stored in here
PartialTranslOptColl* oldPtoc = new PartialTranslOptColl;
size_t totalEarlyPruned = 0;
@@ -232,8 +231,7 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLE
const DecodeStep &decodeStep = **iterStep;
static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslationLEGACY
- (m_source, *oldPtoc
- , startPos, endPos, adhereTableLimit, inputPathList );
+ (m_source, *oldPtoc, startPos, endPos, adhereTableLimit, inputPathList);
// do rest of decode steps
int indexStep = 0;
@@ -292,11 +290,14 @@ void TranslationOptionCollectionConfusionNet::CreateTranslationOptionsForRangeLE
delete oldPtoc;
// TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
- } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
-
- if (graphInd == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough && HasXmlOptionsOverlappingRange(startPos,endPos)) {
+ } // if ((intype != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos))
+
+
+ if (graphInd == 0 && intype != XmlPassThrough &&
+ HasXmlOptionsOverlappingRange(startPos,endPos)) {
CreateXmlOptionsForRange(startPos, endPos);
}
+ return retval;
}
diff --git a/moses/TranslationOptionCollectionConfusionNet.h b/moses/TranslationOptionCollectionConfusionNet.h
index ee53f35eb..cf01ebdb1 100644
--- a/moses/TranslationOptionCollectionConfusionNet.h
+++ b/moses/TranslationOptionCollectionConfusionNet.h
@@ -22,13 +22,13 @@ protected:
InputPathMatrix m_inputPathMatrix; /*< contains translation options */
InputPathList &GetInputPathList(size_t startPos, size_t endPos);
- void CreateTranslationOptionsForRangeNew(const DecodeGraph &decodeStepList
+ bool CreateTranslationOptionsForRangeNew(const DecodeGraph &decodeStepList
, size_t startPosition
, size_t endPosition
, bool adhereTableLimit
, size_t graphInd);
- void CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeStepList
+ bool CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeStepList
, size_t startPosition
, size_t endPosition
, bool adhereTableLimit
@@ -39,12 +39,12 @@ public:
void ProcessUnknownWord(size_t sourcePos);
void CreateTranslationOptions();
- void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
- , size_t startPosition
- , size_t endPosition
- , bool adhereTableLimit
- , size_t graphInd);
+ bool
+ CreateTranslationOptionsForRange
+ (const DecodeGraph &decodeStepList, size_t spos, size_t epos,
+ bool adhereTableLimit, size_t graphInd);
+
protected:
};
diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp
index d20e07fbf..6f9de7836 100644
--- a/moses/TranslationOptionCollectionLattice.cpp
+++ b/moses/TranslationOptionCollectionLattice.cpp
@@ -170,18 +170,22 @@ void TranslationOptionCollectionLattice::CreateTranslationOptions()
}
-void TranslationOptionCollectionLattice::ProcessUnknownWord(size_t sourcePos)
+void
+TranslationOptionCollectionLattice::
+ProcessUnknownWord(size_t sourcePos)
{
UTIL_THROW(util::Exception, "ProcessUnknownWord() not implemented for lattice");
+ // why??? UG
}
-void TranslationOptionCollectionLattice::CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
- , size_t startPosition
- , size_t endPosition
- , bool adhereTableLimit
- , size_t graphInd)
+bool
+TranslationOptionCollectionLattice::
+CreateTranslationOptionsForRange
+(const DecodeGraph &decodeStepList, size_t startPosition, size_t endPosition,
+ bool adhereTableLimit, size_t graphInd)
{
- UTIL_THROW(util::Exception, "CreateTranslationOptionsForRange() not implemented for lattice");
+ UTIL_THROW(util::Exception,
+ "CreateTranslationOptionsForRange() not implemented for lattice");
}
} // namespace
diff --git a/moses/TranslationOptionCollectionLattice.h b/moses/TranslationOptionCollectionLattice.h
index 09efb4b3c..cea90f11e 100644
--- a/moses/TranslationOptionCollectionLattice.h
+++ b/moses/TranslationOptionCollectionLattice.h
@@ -26,11 +26,10 @@ public:
void CreateTranslationOptions();
- void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
- , size_t startPosition
- , size_t endPosition
- , bool adhereTableLimit
- , size_t graphInd); // do not implement
+ bool
+ CreateTranslationOptionsForRange
+ (const DecodeGraph &decodeStepList, size_t startPosition, size_t endPosition,
+ bool adhereTableLimit, size_t graphInd); // do not implement
protected:
void Extend(const InputPath &prevPath, const WordLattice &input);
diff --git a/moses/TranslationOptionCollectionText.cpp b/moses/TranslationOptionCollectionText.cpp
index 2db62fc8f..0f7671a70 100644
--- a/moses/TranslationOptionCollectionText.cpp
+++ b/moses/TranslationOptionCollectionText.cpp
@@ -171,21 +171,18 @@ void TranslationOptionCollectionText::CreateTranslationOptions()
* \param lastPos last position in input sentence
* \param adhereTableLimit whether phrase & generation table limits are adhered to
*/
-void TranslationOptionCollectionText::CreateTranslationOptionsForRange(
- const DecodeGraph &decodeGraph
- , size_t startPos
- , size_t endPos
- , bool adhereTableLimit
- , size_t graphInd)
+bool
+TranslationOptionCollectionText::
+CreateTranslationOptionsForRange
+(const DecodeGraph &decodeGraph, size_t startPos, size_t endPos,
+ bool adhereTableLimit, size_t graphInd)
{
InputPath &inputPath = GetInputPath(startPos, endPos);
-
- TranslationOptionCollection::CreateTranslationOptionsForRange(decodeGraph
- , startPos
- , endPos
- , adhereTableLimit
- , graphInd
- , inputPath);
+
+ return
+ TranslationOptionCollection::
+ CreateTranslationOptionsForRange
+ (decodeGraph, startPos, endPos, adhereTableLimit, graphInd, inputPath);
}
diff --git a/moses/TranslationOptionCollectionText.h b/moses/TranslationOptionCollectionText.h
index 6ba5598ef..cdb35963e 100644
--- a/moses/TranslationOptionCollectionText.h
+++ b/moses/TranslationOptionCollectionText.h
@@ -56,7 +56,7 @@ public:
void CreateTranslationOptions();
- void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
+ bool CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
, size_t startPosition
, size_t endPosition
, bool adhereTableLimit
diff --git a/moses/TranslationOptionList.cpp b/moses/TranslationOptionList.cpp
index 1d99729fe..4e6449b5b 100644
--- a/moses/TranslationOptionList.cpp
+++ b/moses/TranslationOptionList.cpp
@@ -1,39 +1,90 @@
-
#include "TranslationOptionList.h"
#include "Util.h"
#include "TranslationOption.h"
+#include <boost/foreach.hpp>
using namespace std;
namespace Moses
{
-TranslationOptionList::TranslationOptionList(const TranslationOptionList &copy)
-{
- const_iterator iter;
- for (iter = copy.begin(); iter != copy.end(); ++iter) {
- const TranslationOption &origTransOpt = **iter;
- TranslationOption *newTransOpt = new TranslationOption(origTransOpt);
- Add(newTransOpt);
+ TranslationOptionList::
+ TranslationOptionList(const TranslationOptionList &copy)
+ {
+ const_iterator iter;
+ for (iter = copy.begin(); iter != copy.end(); ++iter) {
+ const TranslationOption &origTransOpt = **iter;
+ TranslationOption *newTransOpt = new TranslationOption(origTransOpt);
+ Add(newTransOpt);
+ }
}
-}
-TranslationOptionList::~TranslationOptionList()
-{
- RemoveAllInColl(m_coll);
-}
+ TranslationOptionList::
+ ~TranslationOptionList()
+ {
+ RemoveAllInColl(m_coll);
+ }
-TO_STRING_BODY(TranslationOptionList);
+ TO_STRING_BODY(TranslationOptionList);
-std::ostream& operator<<(std::ostream& out, const TranslationOptionList& coll)
-{
- TranslationOptionList::const_iterator iter;
- for (iter = coll.begin(); iter != coll.end(); ++iter) {
- const TranslationOption &transOpt = **iter;
- out << transOpt << endl;
+ std::ostream& operator<<(std::ostream& out, const TranslationOptionList& coll)
+ {
+ TranslationOptionList::const_iterator iter;
+ for (iter = coll.begin(); iter != coll.end(); ++iter) {
+ const TranslationOption &transOpt = **iter;
+ out << transOpt << endl;
+ }
+
+ return out;
+ }
+
+ size_t
+ TranslationOptionList::
+ SelectNBest(size_t const N)
+ {
+ if (N == 0 || N >= m_coll.size()) return 0;
+ static TranslationOption::Better cmp;
+ NTH_ELEMENT4(m_coll.begin(), m_coll.begin() + N, m_coll.end(), cmp);
+ // delete the rest
+ for (size_t i = N ; i < m_coll.size() ; ++i) delete m_coll[i];
+ size_t ret = m_coll.size() - N;
+ m_coll.resize(N);
+ return ret;
+ }
+
+ size_t
+ TranslationOptionList::
+ PruneByThreshold(float const th)
+ {
+ if (m_coll.size() <= 1) return 0;
+ if (th == -std::numeric_limits<float>::infinity()) return 0;
+
+ // first, find the best score
+ float bestScore = -std::numeric_limits<float>::infinity();
+ BOOST_FOREACH(TranslationOption const* t, m_coll)
+ {
+ if (t->GetFutureScore() > bestScore)
+ bestScore = t->GetFutureScore();
+ }
+
+ size_t old_size = m_coll.size();
+
+ // then, remove items that are worse than best score + threshold
+ // why '+' th ??? Does this ever hold?
+ for (size_t i=0; i < m_coll.size() ; ++i)
+ {
+ if (m_coll[i]->GetFutureScore() < bestScore + th)
+ {
+ delete m_coll[i];
+ if(i + 1 < m_coll.size())
+ std::swap(m_coll[i],m_coll.back());
+ m_coll.pop_back();
+ }
+ }
+
+ m_coll.resize(m_coll.size());
+ return old_size - m_coll.size();
}
- return out;
-}
} // namespace
diff --git a/moses/TranslationOptionList.h b/moses/TranslationOptionList.h
index 39ab526f9..c12d108af 100644
--- a/moses/TranslationOptionList.h
+++ b/moses/TranslationOptionList.h
@@ -1,5 +1,5 @@
-#ifndef moses_TranslationOptionList_h
-#define moses_TranslationOptionList_h
+// -*- c++ -*-
+#pragma once
#include <vector>
#include "util/exception.hh"
@@ -9,62 +9,63 @@
namespace Moses
{
-class TranslationOption;
+ class TranslationOption;
-/** wrapper around vector of translation options
- */
-class TranslationOptionList
-{
- friend std::ostream& operator<<(std::ostream& out, const TranslationOptionList& coll);
-
-protected:
- typedef std::vector<TranslationOption*> CollType;
- CollType m_coll;
-
-public:
- typedef CollType::iterator iterator;
- typedef CollType::const_iterator const_iterator;
- const_iterator begin() const {
- return m_coll.begin();
- }
- const_iterator end() const {
- return m_coll.end();
- }
- iterator begin() {
- return m_coll.begin();
- }
- iterator end() {
- return m_coll.end();
- }
-
- TranslationOptionList() {
- }
- TranslationOptionList(const TranslationOptionList &copy);
- ~TranslationOptionList();
+ /** wrapper around vector of translation options
+ */
+ class TranslationOptionList
+ {
+ friend std::ostream& operator<<(std::ostream& out, const TranslationOptionList& coll);
- void resize(size_t newSize) {
- m_coll.resize(newSize);
- }
- size_t size() const {
- return m_coll.size();
- }
+ protected:
+ typedef std::vector<TranslationOption*> CollType;
+ CollType m_coll;
+
+ public:
+ typedef CollType::iterator iterator;
+ typedef CollType::const_iterator const_iterator;
+ const_iterator begin() const {
+ return m_coll.begin();
+ }
+ const_iterator end() const {
+ return m_coll.end();
+ }
+ iterator begin() {
+ return m_coll.begin();
+ }
+ iterator end() {
+ return m_coll.end();
+ }
+
+ TranslationOptionList() {
+ }
+ TranslationOptionList(const TranslationOptionList &copy);
+ ~TranslationOptionList();
+
+ void resize(size_t newSize) {
+ m_coll.resize(newSize);
+ }
+ size_t size() const {
+ return m_coll.size();
+ }
+
+ const TranslationOption *Get(size_t ind) const {
+ return m_coll.at(ind);
+ }
+ void Remove( size_t ind ) {
+ UTIL_THROW_IF2(ind >= m_coll.size(),
+ "Out of bound index " << ind);
+ m_coll.erase( m_coll.begin()+ind );
+ }
+ void Add(TranslationOption *transOpt) {
+ UTIL_THROW_IF2(!transOpt, "Not a valid translation option!");
+ m_coll.push_back(transOpt);
+ }
+
+ TO_STRING();
- const TranslationOption *Get(size_t ind) const {
- return m_coll.at(ind);
- }
- void Remove( size_t ind ) {
- UTIL_THROW_IF2(ind >= m_coll.size(),
- "Out of bound index " << ind);
- m_coll.erase( m_coll.begin()+ind );
- }
- void Add(TranslationOption *transOpt) {
- m_coll.push_back(transOpt);
- }
-
- TO_STRING();
-
-};
+ size_t SelectNBest(size_t const N);
+ size_t PruneByThreshold(float const th);
+ };
}
-
-#endif
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 7c629db7f..eff0588b6 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -61,7 +61,7 @@ void TranslationTask::Run()
// which manager
BaseManager *manager;
- if (!staticData.IsChart()) {
+ if (!staticData.IsSyntax()) {
// phrase-based
manager = new Manager(*m_source);
} else if (staticData.GetSearchAlgorithm() == SyntaxF2S ||
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index a56b3fbac..a619639bc 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -140,6 +140,9 @@ enum DictionaryFind {
,All = 1
};
+// Note: StaticData uses SearchAlgorithm to determine whether the translation
+// model is phrase-based or syntax-based. If you add a syntax-based search
+// algorithm here then you should also update StaticData::IsSyntax().
enum SearchAlgorithm {
Normal = 0
,CubePruning = 1
diff --git a/moses/Util.h b/moses/Util.h
index beefa53da..48e6a51ae 100644
--- a/moses/Util.h
+++ b/moses/Util.h
@@ -48,6 +48,11 @@ namespace Moses
* when compiling for a gui front-end so that running gui won't generate
* output on command line
* */
+
+// TRACE_ERR might have been defined by IRSTLM
+#ifdef TRACE_ERR
+#undef TRACE_ERR
+#endif
#ifdef TRACE_ENABLE
#define TRACE_ERR(str) do { std::cerr << str; } while (false)
#else
@@ -57,7 +62,16 @@ namespace Moses
/** verbose macros
* */
+// VERBOSE might have been defined by IRSTLM
+#ifdef VERBOSE
+#undef VERBOSE
+#endif
#define VERBOSE(level,str) { IFVERBOSE(level) { TRACE_ERR(str); } }
+
+// VERBOSE might have been defined by IRSTLM
+#ifdef IFVERBOSE
+#undef IFVERBOSE
+#endif
#define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
#define XVERBOSE(level,str) VERBOSE(level, "[" << HERE << "] " << str)
#define HERE __FILE__ << ":" << __LINE__