diff options
author | Michael Denkowski <mdenkows@amazon.com> | 2015-09-25 12:03:23 +0300 |
---|---|---|
committer | Michael Denkowski <mdenkows@amazon.com> | 2015-09-25 12:03:23 +0300 |
commit | 56e3bc1ea209540bc0ade9797e6dfba578738e1d (patch) | |
tree | a3594bc0cd9783e3d109f374a9e20b323e736a5e /moses | |
parent | d2a6aa752e91ce62bd746a704b1a7e7534fdbb59 (diff) |
Reintroduce deterministic cube pruning as option
Use --cube-pruning-deterministic-search or --cbds
Diffstat (limited to 'moses')
-rw-r--r-- | moses/BitmapContainer.cpp | 9 | ||||
-rw-r--r-- | moses/BitmapContainer.h | 50 | ||||
-rw-r--r-- | moses/HypothesisStackCubePruning.cpp | 5 | ||||
-rw-r--r-- | moses/HypothesisStackCubePruning.h | 1 | ||||
-rw-r--r-- | moses/Parameter.cpp | 1 | ||||
-rw-r--r-- | moses/SearchCubePruning.cpp | 9 | ||||
-rw-r--r-- | moses/parameters/CubePruningOptions.cpp | 42 | ||||
-rw-r--r-- | moses/parameters/CubePruningOptions.h | 1 |
8 files changed, 79 insertions, 39 deletions
diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp index 40ec74153..ae7c03990 100644 --- a/moses/BitmapContainer.cpp +++ b/moses/BitmapContainer.cpp @@ -273,9 +273,11 @@ BackwardsEdge::PushSuccessors(const size_t x, const size_t y) //////////////////////////////////////////////////////////////////////////////// BitmapContainer::BitmapContainer(const WordsBitmap &bitmap - , HypothesisStackCubePruning &stack) + , HypothesisStackCubePruning &stack + , bool deterministic) : m_bitmap(bitmap) , m_stack(stack) + , m_deterministic(deterministic) , m_numStackInsertions(0) { m_hypotheses = HypothesisSet(); @@ -309,10 +311,13 @@ BitmapContainer::Enqueue(int hypothesis_pos , Hypothesis *hypothesis , BackwardsEdge *edge) { + // Only supply target phrase if running deterministic search mode + const TargetPhrase *target_phrase = m_deterministic ? &(hypothesis->GetCurrTargetPhrase()) : NULL; HypothesisQueueItem *item = new HypothesisQueueItem(hypothesis_pos , translation_pos , hypothesis - , edge); + , edge + , target_phrase); IFVERBOSE(2) { item->GetHypothesis()->GetManager().GetSentenceStats().StartTimeManageCubes(); } diff --git a/moses/BitmapContainer.h b/moses/BitmapContainer.h index 500059081..88bc79efe 100644 --- a/moses/BitmapContainer.h +++ b/moses/BitmapContainer.h @@ -61,6 +61,7 @@ private: size_t m_hypothesis_pos, m_translation_pos; Hypothesis *m_hypothesis; BackwardsEdge *m_edge; + boost::shared_ptr<TargetPhrase> m_target_phrase; HypothesisQueueItem(); @@ -68,11 +69,16 @@ public: HypothesisQueueItem(const size_t hypothesis_pos , const size_t translation_pos , Hypothesis *hypothesis - , BackwardsEdge *edge) + , BackwardsEdge *edge + , const TargetPhrase *target_phrase = NULL) : m_hypothesis_pos(hypothesis_pos) , m_translation_pos(translation_pos) , m_hypothesis(hypothesis) , m_edge(edge) { + if (target_phrase != NULL) + { + m_target_phrase.reset(new TargetPhrase(*target_phrase)); + } } ~HypothesisQueueItem() { @@ -93,6 +99,10 @@ public: BackwardsEdge *GetBackwardsEdge() { return m_edge; } + + boost::shared_ptr<TargetPhrase> GetTargetPhrase() { + return m_target_phrase; + } }; //! Allows comparison of two HypothesisQueueItem objects by the corresponding scores. @@ -103,20 +113,26 @@ public: float scoreA = itemA->GetHypothesis()->GetTotalScore(); float scoreB = itemB->GetHypothesis()->GetTotalScore(); - return (scoreA < scoreB); - - /* + if (scoreA < scoreB) { - return true; + return true; } - else if (scoreA < scoreB) + else if (scoreA > scoreB) { - return false; + return false; } else { - return itemA < itemB; - }*/ + // Equal scores: break ties by comparing target phrases (if they exist) + boost::shared_ptr<TargetPhrase> phrA = itemA->GetTargetPhrase(); + boost::shared_ptr<TargetPhrase> phrB = itemB->GetTargetPhrase(); + if (!phrA || !phrB) + { + // Fallback: scoreA < scoreB == false, non-deterministic sort + return false; + } + return (phrA->Compare(*phrB) < 0); + } } }; @@ -134,18 +150,6 @@ public: float scoreB = hypoB->GetTotalScore(); return (scoreA > scoreB); - /* - { - return true; - } - else if (scoreA < scoreB) - { - return false; - } - else - { - return hypoA < hypoB; - }*/ } }; @@ -210,13 +214,15 @@ private: BackwardsEdgeSet m_edges; HypothesisQueue m_queue; size_t m_numStackInsertions; + bool m_deterministic; // We always require a corresponding bitmap to be supplied. BitmapContainer(); BitmapContainer(const BitmapContainer &); public: BitmapContainer(const WordsBitmap &bitmap - , HypothesisStackCubePruning &stack); + , HypothesisStackCubePruning &stack + , bool deterministic_sort = false); // The destructor will also delete all the edges that are // connected to this BitmapContainer. diff --git a/moses/HypothesisStackCubePruning.cpp b/moses/HypothesisStackCubePruning.cpp index 23fc2b01a..ff8b0dafb 100644 --- a/moses/HypothesisStackCubePruning.cpp +++ b/moses/HypothesisStackCubePruning.cpp @@ -39,6 +39,7 @@ HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) : m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled; m_bestScore = -std::numeric_limits<float>::infinity(); m_worstScore = -std::numeric_limits<float>::infinity(); + m_deterministic = manager.options().cube.deterministic_search; } /** remove all hypotheses from the collection */ @@ -148,7 +149,7 @@ void HypothesisStackCubePruning::AddInitial(Hypothesis *hypo) "Should have added hypothesis " << *hypo); const WordsBitmap &bitmap = hypo->GetWordsBitmap(); - m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this); + m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this, m_deterministic); } void HypothesisStackCubePruning::PruneToSize(size_t newSize) @@ -258,7 +259,7 @@ void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap BitmapContainer *bmContainer; if (bcExists == m_bitmapAccessor.end()) { - bmContainer = new BitmapContainer(newBitmap, stack); + bmContainer = new BitmapContainer(newBitmap, stack, m_deterministic); m_bitmapAccessor[newBitmap] = bmContainer; } else { bmContainer = bcExists->second; diff --git a/moses/HypothesisStackCubePruning.h b/moses/HypothesisStackCubePruning.h index 6dc973ed3..c8b0d6bef 100644 --- a/moses/HypothesisStackCubePruning.h +++ b/moses/HypothesisStackCubePruning.h @@ -52,6 +52,7 @@ protected: float m_beamWidth; /**< minimum score due to threashold pruning */ size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */ bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */ + bool m_deterministic; /**< flag to determine whether to sort hypotheses deterministically */ /** add hypothesis to stack. Prune if necessary. * Returns false if equiv hypo exists in collection, otherwise returns true diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index d0b6d6374..564b2e506 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -116,6 +116,7 @@ Parameter::Parameter() AddParam(cube_opts,"cube-pruning-pop-limit", "cbp", "How many hypotheses should be popped for each stack. (default = 1000)"); AddParam(cube_opts,"cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)"); AddParam(cube_opts,"cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped"); + AddParam(cube_opts,"cube-pruning-deterministic-search", "cbds", "Break ties deterministically during search"); /////////////////////////////////////////////////////////////////////////////////////// // minimum bayes risk decoding diff --git a/moses/SearchCubePruning.cpp b/moses/SearchCubePruning.cpp index 7219a40f7..bbbfde197 100644 --- a/moses/SearchCubePruning.cpp +++ b/moses/SearchCubePruning.cpp @@ -32,7 +32,14 @@ public: } else if (scoreA > scoreB) { return false; } else { - return A < B; + // Equal scores: break ties by comparing target phrases (if they exist) + boost::shared_ptr<TargetPhrase> phrA = A->Top()->GetTargetPhrase(); + boost::shared_ptr<TargetPhrase> phrB = B->Top()->GetTargetPhrase(); + if (!phrA || !phrB) { + // Fallback: compare pointers, non-deterministic sort + return A < B; + } + return (phrA->Compare(*phrB) < 0); } } }; diff --git a/moses/parameters/CubePruningOptions.cpp b/moses/parameters/CubePruningOptions.cpp index a8710c681..b4ebb0d5e 100644 --- a/moses/parameters/CubePruningOptions.cpp +++ b/moses/parameters/CubePruningOptions.cpp @@ -13,6 +13,7 @@ namespace Moses param.SetParameter(diversity, "cube-pruning-diversity", DEFAULT_CUBE_PRUNING_DIVERSITY); param.SetParameter(lazy_scoring, "cube-pruning-lazy-scoring", false); + param.SetParameter(deterministic_search, "cube-pruning-deterministic-search", false); return true; } @@ -30,20 +31,37 @@ namespace Moses if (si != params.end()) diversity = xmlrpc_c::value_int(si->second); si = params.find("cube-pruning-lazy-scoring"); - if (si != params.end()) - { - std::string spec = xmlrpc_c::value_string(si->second); - if (spec == "true" or spec == "on" or spec == "1") - lazy_scoring = true; - else if (spec == "false" or spec == "off" or spec == "0") - lazy_scoring = false; - else + if (si != params.end()) { - char const* msg - = "Error parsing specification for cube-pruning-lazy-scoring"; - xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); + std::string spec = xmlrpc_c::value_string(si->second); + if (spec == "true" or spec == "on" or spec == "1") + lazy_scoring = true; + else if (spec == "false" or spec == "off" or spec == "0") + lazy_scoring = false; + else + { + char const* msg + = "Error parsing specification for cube-pruning-lazy-scoring"; + xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); + } } - } + + si = params.find("cube-pruning-deterministic-search"); + if (si != params.end()) + { + std::string spec = xmlrpc_c::value_string(si->second); + if (spec == "true" or spec == "on" or spec == "1") + deterministic_search = true; + else if (spec == "false" or spec == "off" or spec == "0") + deterministic_search = false; + else + { + char const* msg + = "Error parsing specification for cube-pruning-deterministic-search"; + xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); + } + } + return true; } #endif diff --git a/moses/parameters/CubePruningOptions.h b/moses/parameters/CubePruningOptions.h index 5d27be6a9..961b1a479 100644 --- a/moses/parameters/CubePruningOptions.h +++ b/moses/parameters/CubePruningOptions.h @@ -12,6 +12,7 @@ namespace Moses size_t pop_limit; size_t diversity; bool lazy_scoring; + bool deterministic_search; bool init(Parameter const& param); CubePruningOptions(Parameter const& param); |