Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Denkowski <mdenkows@amazon.com>2015-10-26 18:24:53 +0300
committerMichael Denkowski <mdenkows@amazon.com>2015-10-26 19:42:42 +0300
commitd3f3389f20bb49830b3837370ca96543e4093660 (patch)
treef75bac6d6952b756a115e34cde051a17f8fc4473
parent6a37dfd2ce279e8493e151b505215eb9f21865f9 (diff)
More deterministic tie-breaking for cube pruning (--cbds)
Doesn't slow down regular non-deterministic cube pruning
-rw-r--r--moses/BitmapContainer.cpp22
-rw-r--r--moses/BitmapContainer.h34
-rw-r--r--moses/HypothesisStackCubePruning.cpp5
-rw-r--r--moses/SearchCubePruning.cpp5
4 files changed, 55 insertions, 11 deletions
diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp
index 54207ba07..7adadee82 100644
--- a/moses/BitmapContainer.cpp
+++ b/moses/BitmapContainer.cpp
@@ -51,9 +51,14 @@ public:
class HypothesisScoreOrdererWithDistortion
{
+private:
+ bool m_deterministic;
+
public:
- HypothesisScoreOrdererWithDistortion(const Range* transOptRange) :
- m_transOptRange(transOptRange) {
+ HypothesisScoreOrdererWithDistortion(const Range* transOptRange,
+ const bool deterministic = false)
+ : m_transOptRange(transOptRange)
+ , m_deterministic(deterministic) {
m_totalWeightDistortion = 0;
const StaticData &staticData = StaticData::Instance();
@@ -97,6 +102,11 @@ public:
} else if (scoreA < scoreB) {
return false;
} else {
+ if (m_deterministic) {
+ // Equal scores: break ties by comparing target phrases
+ return (hypoA->GetCurrTargetPhrase().Compare(hypoB->GetCurrTargetPhrase()) < 0);
+ }
+ // Fallback: non-deterministic sort
return hypoA < hypoB;
}
}
@@ -111,13 +121,15 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
, BitmapContainer &parent
, const TranslationOptionList &translations
, const SquareMatrix &futureScores,
- const InputType& itype)
+ const InputType& itype,
+ const bool deterministic)
: m_initialized(false)
, m_prevBitmapContainer(prevBitmapContainer)
, m_parent(parent)
, m_translations(translations)
, m_futureScores(futureScores)
, m_seenPosition()
+ , m_deterministic(deterministic)
{
// If either dimension is empty, we haven't got anything to do.
@@ -174,7 +186,7 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
<< m_hypotheses[1]->GetTotalScore());
}
- HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
+ HypothesisScoreOrdererWithDistortion orderer (&transOptRange, m_deterministic);
std::sort(m_hypotheses.begin(), m_hypotheses.end(), orderer);
// std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());
@@ -478,7 +490,7 @@ BitmapContainer::ProcessBestHypothesis()
void
BitmapContainer::SortHypotheses()
{
- std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrderer());
+ std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrderer(m_deterministic));
}
}
diff --git a/moses/BitmapContainer.h b/moses/BitmapContainer.h
index 2840e62d9..5f301a1e8 100644
--- a/moses/BitmapContainer.h
+++ b/moses/BitmapContainer.h
@@ -118,6 +118,11 @@ public:
return false;
} else {
// Equal scores: break ties by comparing target phrases (if they exist)
+ // *Important*: these are pointers to copies of the target phrases from the
+ // hypotheses. This class is used to keep priority queues ordered in the
+ // background, so comparisons made as those data structures are cleaned up
+ // may occur *after* the target phrases in hypotheses have been cleaned up,
+ // leading to segfaults if relying on hypotheses to provide target phrases.
boost::shared_ptr<TargetPhrase> phrA = itemA->GetTargetPhrase();
boost::shared_ptr<TargetPhrase> phrB = itemB->GetTargetPhrase();
if (!phrA || !phrB) {
@@ -137,12 +142,30 @@ public:
class HypothesisScoreOrderer
{
+private:
+ bool m_deterministic;
+
public:
+ HypothesisScoreOrderer(const bool deterministic = false)
+ : m_deterministic(deterministic) {}
+
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
+
float scoreA = hypoA->GetTotalScore();
float scoreB = hypoB->GetTotalScore();
- return (scoreA > scoreB);
+ if (scoreA > scoreB) {
+ return true;
+ } else if (scoreA < scoreB) {
+ return false;
+ } else {
+ if (m_deterministic) {
+ // Equal scores: break ties by comparing target phrases
+ return (hypoA->GetCurrTargetPhrase().Compare(hypoB->GetCurrTargetPhrase()) < 0);
+ }
+ // Fallback: scoreA > scoreB == false, non-deterministic sort
+ return false;
+ }
}
};
@@ -164,6 +187,8 @@ private:
const SquareMatrix &m_futureScores;
float m_futureScore;
+ bool m_deterministic;
+
std::vector< const Hypothesis* > m_hypotheses;
boost::unordered_set< int > m_seenPosition;
@@ -181,8 +206,9 @@ public:
BackwardsEdge(const BitmapContainer &prevBitmapContainer
, BitmapContainer &parent
, const TranslationOptionList &translations
- , const SquareMatrix &futureScores,
- const InputType& source);
+ , const SquareMatrix &futureScores
+ , const InputType& source
+ , const bool deterministic = false);
~BackwardsEdge();
bool GetInitialized();
@@ -216,7 +242,7 @@ private:
public:
BitmapContainer(const Bitmap &bitmap
, HypothesisStackCubePruning &stack
- , bool deterministic_sort = false);
+ , bool deterministic = false);
// The destructor will also delete all the edges that are
// connected to this BitmapContainer.
diff --git a/moses/HypothesisStackCubePruning.cpp b/moses/HypothesisStackCubePruning.cpp
index 32dd99d31..a460a3f0b 100644
--- a/moses/HypothesisStackCubePruning.cpp
+++ b/moses/HypothesisStackCubePruning.cpp
@@ -259,8 +259,9 @@ void HypothesisStackCubePruning::SetBitmapAccessor(const Bitmap &newBitmap
BackwardsEdge *edge = new BackwardsEdge(bitmapContainer
, *bmContainer
, transOptList
- , futureScore,
- m_manager.GetSource());
+ , futureScore
+ , m_manager.GetSource()
+ , m_deterministic);
bmContainer->AddBackwardsEdge(edge);
}
diff --git a/moses/SearchCubePruning.cpp b/moses/SearchCubePruning.cpp
index 4be9914b9..920f264f9 100644
--- a/moses/SearchCubePruning.cpp
+++ b/moses/SearchCubePruning.cpp
@@ -33,6 +33,11 @@ public:
return false;
} else {
// Equal scores: break ties by comparing target phrases (if they exist)
+ // *Important*: these are pointers to copies of the target phrases from the
+ // hypotheses. This class is used to keep priority queues ordered in the
+ // background, so comparisons made as those data structures are cleaned up
+ // may occur *after* the target phrases in hypotheses have been cleaned up,
+ // leading to segfaults if relying on hypotheses to provide target phrases.
boost::shared_ptr<TargetPhrase> phrA = A->Top()->GetTargetPhrase();
boost::shared_ptr<TargetPhrase> phrB = B->Top()->GetTargetPhrase();
if (!phrA || !phrB) {