Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorMichael Denkowski <mdenkows@amazon.com>2015-09-25 12:03:23 +0300
committerMichael Denkowski <mdenkows@amazon.com>2015-09-25 12:03:23 +0300
commit56e3bc1ea209540bc0ade9797e6dfba578738e1d (patch)
treea3594bc0cd9783e3d109f374a9e20b323e736a5e /moses
parentd2a6aa752e91ce62bd746a704b1a7e7534fdbb59 (diff)
Reintroduce deterministic cube pruning as option
Use --cube-pruning-deterministic-search or --cbds
Diffstat (limited to 'moses')
-rw-r--r--moses/BitmapContainer.cpp9
-rw-r--r--moses/BitmapContainer.h50
-rw-r--r--moses/HypothesisStackCubePruning.cpp5
-rw-r--r--moses/HypothesisStackCubePruning.h1
-rw-r--r--moses/Parameter.cpp1
-rw-r--r--moses/SearchCubePruning.cpp9
-rw-r--r--moses/parameters/CubePruningOptions.cpp42
-rw-r--r--moses/parameters/CubePruningOptions.h1
8 files changed, 79 insertions, 39 deletions
diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp
index 40ec74153..ae7c03990 100644
--- a/moses/BitmapContainer.cpp
+++ b/moses/BitmapContainer.cpp
@@ -273,9 +273,11 @@ BackwardsEdge::PushSuccessors(const size_t x, const size_t y)
////////////////////////////////////////////////////////////////////////////////
BitmapContainer::BitmapContainer(const WordsBitmap &bitmap
- , HypothesisStackCubePruning &stack)
+ , HypothesisStackCubePruning &stack
+ , bool deterministic)
: m_bitmap(bitmap)
, m_stack(stack)
+ , m_deterministic(deterministic)
, m_numStackInsertions(0)
{
m_hypotheses = HypothesisSet();
@@ -309,10 +311,13 @@ BitmapContainer::Enqueue(int hypothesis_pos
, Hypothesis *hypothesis
, BackwardsEdge *edge)
{
+ // Only supply target phrase if running deterministic search mode
+ const TargetPhrase *target_phrase = m_deterministic ? &(hypothesis->GetCurrTargetPhrase()) : NULL;
HypothesisQueueItem *item = new HypothesisQueueItem(hypothesis_pos
, translation_pos
, hypothesis
- , edge);
+ , edge
+ , target_phrase);
IFVERBOSE(2) {
item->GetHypothesis()->GetManager().GetSentenceStats().StartTimeManageCubes();
}
diff --git a/moses/BitmapContainer.h b/moses/BitmapContainer.h
index 500059081..88bc79efe 100644
--- a/moses/BitmapContainer.h
+++ b/moses/BitmapContainer.h
@@ -61,6 +61,7 @@ private:
size_t m_hypothesis_pos, m_translation_pos;
Hypothesis *m_hypothesis;
BackwardsEdge *m_edge;
+ boost::shared_ptr<TargetPhrase> m_target_phrase;
HypothesisQueueItem();
@@ -68,11 +69,16 @@ public:
HypothesisQueueItem(const size_t hypothesis_pos
, const size_t translation_pos
, Hypothesis *hypothesis
- , BackwardsEdge *edge)
+ , BackwardsEdge *edge
+ , const TargetPhrase *target_phrase = NULL)
: m_hypothesis_pos(hypothesis_pos)
, m_translation_pos(translation_pos)
, m_hypothesis(hypothesis)
, m_edge(edge) {
+ if (target_phrase != NULL)
+ {
+ m_target_phrase.reset(new TargetPhrase(*target_phrase));
+ }
}
~HypothesisQueueItem() {
@@ -93,6 +99,10 @@ public:
BackwardsEdge *GetBackwardsEdge() {
return m_edge;
}
+
+ boost::shared_ptr<TargetPhrase> GetTargetPhrase() {
+ return m_target_phrase;
+ }
};
//! Allows comparison of two HypothesisQueueItem objects by the corresponding scores.
@@ -103,20 +113,26 @@ public:
float scoreA = itemA->GetHypothesis()->GetTotalScore();
float scoreB = itemB->GetHypothesis()->GetTotalScore();
- return (scoreA < scoreB);
-
- /*
+ if (scoreA < scoreB)
{
- return true;
+ return true;
}
- else if (scoreA < scoreB)
+ else if (scoreA > scoreB)
{
- return false;
+ return false;
}
else
{
- return itemA < itemB;
- }*/
+ // Equal scores: break ties by comparing target phrases (if they exist)
+ boost::shared_ptr<TargetPhrase> phrA = itemA->GetTargetPhrase();
+ boost::shared_ptr<TargetPhrase> phrB = itemB->GetTargetPhrase();
+ if (!phrA || !phrB)
+ {
+ // Fallback: scoreA < scoreB == false, non-deterministic sort
+ return false;
+ }
+ return (phrA->Compare(*phrB) < 0);
+ }
}
};
@@ -134,18 +150,6 @@ public:
float scoreB = hypoB->GetTotalScore();
return (scoreA > scoreB);
- /*
- {
- return true;
- }
- else if (scoreA < scoreB)
- {
- return false;
- }
- else
- {
- return hypoA < hypoB;
- }*/
}
};
@@ -210,13 +214,15 @@ private:
BackwardsEdgeSet m_edges;
HypothesisQueue m_queue;
size_t m_numStackInsertions;
+ bool m_deterministic;
// We always require a corresponding bitmap to be supplied.
BitmapContainer();
BitmapContainer(const BitmapContainer &);
public:
BitmapContainer(const WordsBitmap &bitmap
- , HypothesisStackCubePruning &stack);
+ , HypothesisStackCubePruning &stack
+ , bool deterministic_sort = false);
// The destructor will also delete all the edges that are
// connected to this BitmapContainer.
diff --git a/moses/HypothesisStackCubePruning.cpp b/moses/HypothesisStackCubePruning.cpp
index 23fc2b01a..ff8b0dafb 100644
--- a/moses/HypothesisStackCubePruning.cpp
+++ b/moses/HypothesisStackCubePruning.cpp
@@ -39,6 +39,7 @@ HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity();
+ m_deterministic = manager.options().cube.deterministic_search;
}
/** remove all hypotheses from the collection */
@@ -148,7 +149,7 @@ void HypothesisStackCubePruning::AddInitial(Hypothesis *hypo)
"Should have added hypothesis " << *hypo);
const WordsBitmap &bitmap = hypo->GetWordsBitmap();
- m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this);
+ m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this, m_deterministic);
}
void HypothesisStackCubePruning::PruneToSize(size_t newSize)
@@ -258,7 +259,7 @@ void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap
BitmapContainer *bmContainer;
if (bcExists == m_bitmapAccessor.end()) {
- bmContainer = new BitmapContainer(newBitmap, stack);
+ bmContainer = new BitmapContainer(newBitmap, stack, m_deterministic);
m_bitmapAccessor[newBitmap] = bmContainer;
} else {
bmContainer = bcExists->second;
diff --git a/moses/HypothesisStackCubePruning.h b/moses/HypothesisStackCubePruning.h
index 6dc973ed3..c8b0d6bef 100644
--- a/moses/HypothesisStackCubePruning.h
+++ b/moses/HypothesisStackCubePruning.h
@@ -52,6 +52,7 @@ protected:
float m_beamWidth; /**< minimum score due to threashold pruning */
size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
+ bool m_deterministic; /**< flag to determine whether to sort hypotheses deterministically */
/** add hypothesis to stack. Prune if necessary.
* Returns false if equiv hypo exists in collection, otherwise returns true
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index d0b6d6374..564b2e506 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -116,6 +116,7 @@ Parameter::Parameter()
AddParam(cube_opts,"cube-pruning-pop-limit", "cbp", "How many hypotheses should be popped for each stack. (default = 1000)");
AddParam(cube_opts,"cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)");
AddParam(cube_opts,"cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped");
+ AddParam(cube_opts,"cube-pruning-deterministic-search", "cbds", "Break ties deterministically during search");
///////////////////////////////////////////////////////////////////////////////////////
// minimum bayes risk decoding
diff --git a/moses/SearchCubePruning.cpp b/moses/SearchCubePruning.cpp
index 7219a40f7..bbbfde197 100644
--- a/moses/SearchCubePruning.cpp
+++ b/moses/SearchCubePruning.cpp
@@ -32,7 +32,14 @@ public:
} else if (scoreA > scoreB) {
return false;
} else {
- return A < B;
+ // Equal scores: break ties by comparing target phrases (if they exist)
+ boost::shared_ptr<TargetPhrase> phrA = A->Top()->GetTargetPhrase();
+ boost::shared_ptr<TargetPhrase> phrB = B->Top()->GetTargetPhrase();
+ if (!phrA || !phrB) {
+ // Fallback: compare pointers, non-deterministic sort
+ return A < B;
+ }
+ return (phrA->Compare(*phrB) < 0);
}
}
};
diff --git a/moses/parameters/CubePruningOptions.cpp b/moses/parameters/CubePruningOptions.cpp
index a8710c681..b4ebb0d5e 100644
--- a/moses/parameters/CubePruningOptions.cpp
+++ b/moses/parameters/CubePruningOptions.cpp
@@ -13,6 +13,7 @@ namespace Moses
param.SetParameter(diversity, "cube-pruning-diversity",
DEFAULT_CUBE_PRUNING_DIVERSITY);
param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false);
+ param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false);
return true;
}
@@ -30,20 +31,37 @@ namespace Moses
if (si != params.end()) diversity = xmlrpc_c::value_int(si->second);
si = params.find("cube-pruning-lazy-scoring");
- if (si != params.end())
- {
- std::string spec = xmlrpc_c::value_string(si->second);
- if (spec == "true" or spec == "on" or spec == "1")
- lazy_scoring = true;
- else if (spec == "false" or spec == "off" or spec == "0")
- lazy_scoring = false;
- else
+ if (si != params.end())
{
- char const* msg
- = "Error parsing specification for cube-pruning-lazy-scoring";
- xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
+ std::string spec = xmlrpc_c::value_string(si->second);
+ if (spec == "true" or spec == "on" or spec == "1")
+ lazy_scoring = true;
+ else if (spec == "false" or spec == "off" or spec == "0")
+ lazy_scoring = false;
+ else
+ {
+ char const* msg
+ = "Error parsing specification for cube-pruning-lazy-scoring";
+ xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
+ }
}
- }
+
+ si = params.find("cube-pruning-deterministic-search");
+ if (si != params.end())
+ {
+ std::string spec = xmlrpc_c::value_string(si->second);
+ if (spec == "true" or spec == "on" or spec == "1")
+ deterministic_search = true;
+ else if (spec == "false" or spec == "off" or spec == "0")
+ deterministic_search = false;
+ else
+ {
+ char const* msg
+ = "Error parsing specification for cube-pruning-deterministic-search";
+ xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
+ }
+ }
+
return true;
}
#endif
diff --git a/moses/parameters/CubePruningOptions.h b/moses/parameters/CubePruningOptions.h
index 5d27be6a9..961b1a479 100644
--- a/moses/parameters/CubePruningOptions.h
+++ b/moses/parameters/CubePruningOptions.h
@@ -12,6 +12,7 @@ namespace Moses
size_t pop_limit;
size_t diversity;
bool lazy_scoring;
+ bool deterministic_search;
bool init(Parameter const& param);
CubePruningOptions(Parameter const& param);