diff options
Diffstat (limited to 'moses/src/BitmapContainer.cpp')
-rw-r--r-- | moses/src/BitmapContainer.cpp | 496 |
1 files changed, 496 insertions, 0 deletions
diff --git a/moses/src/BitmapContainer.cpp b/moses/src/BitmapContainer.cpp new file mode 100644 index 000000000..790550ba9 --- /dev/null +++ b/moses/src/BitmapContainer.cpp @@ -0,0 +1,496 @@ +// $Id: BitmapContainer.cpp 1924 2008-10-30 18:43:17Z hieuhoang1972 $ +// vim:tabstop=2 +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2006 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include <algorithm> +#include <limits> +#include <utility> + +#include "BitmapContainer.h" +#include "HypothesisStackCubePruning.h" +#include "DummyScoreProducers.h" +#include "TranslationOptionList.h" + +namespace Moses +{ + +class HypothesisScoreOrdererNoDistortion +{ + public: + bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const + { + const float scoreA = hypoA->GetScore(); + const float scoreB = hypoB->GetScore(); + + if (scoreA > scoreB) + { + return true; + } + else if (scoreA < scoreB) + { + return false; + } + else + { + return hypoA < hypoB; + } + } +}; + +class HypothesisScoreOrdererWithDistortion +{ + public: + static const WordsRange *transOptRange; // TODO. HACK!! + + bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const + { + assert (transOptRange != NULL); + + const float weightDistortion = StaticData::Instance().GetWeightDistortion(); + const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer(); + const float distortionScoreA = dsp->CalculateDistortionScore( + hypoA->GetCurrSourceWordsRange(), + *transOptRange, + hypoA->GetWordsBitmap().GetFirstGapPos() + ); + const float distortionScoreB = dsp->CalculateDistortionScore( + hypoB->GetCurrSourceWordsRange(), + *transOptRange, + hypoB->GetWordsBitmap().GetFirstGapPos() + ); + + const float scoreA = hypoA->GetScore() + distortionScoreA * weightDistortion; + const float scoreB = hypoB->GetScore() + distortionScoreB * weightDistortion; + + if (scoreA > scoreB) + { + return true; + } + else if (scoreA < scoreB) + { + return false; + } + else + { + return hypoA < hypoB; + } + } + +}; + +const WordsRange *HypothesisScoreOrdererWithDistortion::transOptRange = NULL; + +//////////////////////////////////////////////////////////////////////////////// +// BackwardsEdge Code +//////////////////////////////////////////////////////////////////////////////// + +BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer + , BitmapContainer &parent + , const TranslationOptionList &translations + , const SquareMatrix &futureScore) + : m_initialized(false) + , m_prevBitmapContainer(prevBitmapContainer) + , m_parent(parent) + , m_translations(translations) + , m_futurescore(futureScore) + , m_seenPosition() +{ + + // If either dimension is empty, we haven't got anything to do. + if(m_prevBitmapContainer.GetHypotheses().size() == 0 || m_translations.size() == 0) { + VERBOSE(3, "Empty cube on BackwardsEdge" << std::endl); + return; + } + + // Fetch the things we need for distortion cost computation. + int maxDistortion = StaticData::Instance().GetMaxDistortion(); + + if (maxDistortion == -1) { + for (HypothesisSet::const_iterator iter = m_prevBitmapContainer.GetHypotheses().begin(); iter != m_prevBitmapContainer.GetHypotheses().end(); ++iter) + { + m_hypotheses.push_back(*iter); + } + return; + } + + const WordsRange &transOptRange = translations.Get(0)->GetSourceWordsRange(); + const InputType *itype = StaticData::Instance().GetInput(); + + HypothesisSet::const_iterator iterHypo = m_prevBitmapContainer.GetHypotheses().begin(); + HypothesisSet::const_iterator iterEnd = m_prevBitmapContainer.GetHypotheses().end(); + + while (iterHypo != iterEnd) + { + const Hypothesis &hypo = **iterHypo; + // Special case: If this is the first hypothesis used to seed the search, + // it doesn't have a valid range, and we create the hypothesis, if the + // initial position is not further into the sentence than the distortion limit. + if (hypo.GetWordsBitmap().GetNumWordsCovered() == 0) + { + if (transOptRange.GetStartPos() <= maxDistortion) + m_hypotheses.push_back(&hypo); + } + else + { + int distortionDistance = itype->ComputeDistortionDistance(hypo.GetCurrSourceWordsRange() + , transOptRange); + + if (distortionDistance <= maxDistortion) + m_hypotheses.push_back(&hypo); + } + + ++iterHypo; + } + + if (m_translations.size() > 1) + { + assert(m_translations.Get(0)->GetFutureScore() >= m_translations.Get(1)->GetFutureScore()); + } + + if (m_hypotheses.size() > 1) + { + assert(m_hypotheses[0]->GetTotalScore() >= m_hypotheses[1]->GetTotalScore()); + } + + HypothesisScoreOrdererWithDistortion::transOptRange = &transOptRange; + std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererWithDistortion()); + + // std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion()); +} + +BackwardsEdge::~BackwardsEdge() +{ + m_seenPosition.clear(); + m_hypotheses.clear(); +} + + +void +BackwardsEdge::Initialize() +{ + if(m_hypotheses.size() == 0 || m_translations.size() == 0) + { + m_initialized = true; + return; + } + + Hypothesis *expanded = CreateHypothesis(*m_hypotheses[0], *m_translations.Get(0)); + m_parent.Enqueue(0, 0, expanded, this); + SetSeenPosition(0, 0); + m_initialized = true; +} + +Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt) +{ + // create hypothesis and calculate all its scores + Hypothesis *newHypo = hypothesis.CreateNext(transOpt, NULL); // TODO FIXME This is absolutely broken - don't pass null here + + // expand hypothesis further if transOpt was linked + std::vector<TranslationOption*>::const_iterator iterLinked = transOpt.GetLinkedTransOpts().begin(); + std::vector<TranslationOption*>::const_iterator iterEnd = transOpt.GetLinkedTransOpts().end(); + + while (iterLinked != iterEnd) + { + const WordsBitmap hypoBitmap = newHypo->GetWordsBitmap(); + if (hypoBitmap.Overlap((**iterLinked).GetSourceWordsRange())) { + // don't want to add a hypothesis that has some but not all of a linked TO set, so return + delete newHypo; + return NULL; + } + else + { + newHypo->CalcScore(m_futurescore); + newHypo = newHypo->CreateNext(**iterLinked, NULL); // TODO FIXME This is absolutely broken - don't pass null here + } + + ++iterLinked; + } + + newHypo->CalcScore(m_futurescore); + + return newHypo; +} + +bool +BackwardsEdge::SeenPosition(const size_t x, const size_t y) +{ + std::set< int >::iterator iter = m_seenPosition.find((x<<16) + y); + return (iter != m_seenPosition.end()); +} + +void +BackwardsEdge::SetSeenPosition(const size_t x, const size_t y) +{ + assert(x < (1<<17)); + assert(y < (1<<17)); + + m_seenPosition.insert((x<<16) + y); +} + + +bool +BackwardsEdge::GetInitialized() +{ + return m_initialized; +} + +const BitmapContainer& +BackwardsEdge::GetBitmapContainer() const +{ + return m_prevBitmapContainer; +} + +void +BackwardsEdge::PushSuccessors(const size_t x, const size_t y) +{ + Hypothesis *newHypo; + + if(y + 1 < m_translations.size() && !SeenPosition(x, y + 1)) { + SetSeenPosition(x, y + 1); + newHypo = CreateHypothesis(*m_hypotheses[x], *m_translations.Get(y + 1)); + if(newHypo != NULL) + { + m_parent.Enqueue(x, y + 1, newHypo, (BackwardsEdge*)this); + } + } + + if(x + 1 < m_hypotheses.size() && !SeenPosition(x + 1, y)) { + SetSeenPosition(x + 1, y); + newHypo = CreateHypothesis(*m_hypotheses[x + 1], *m_translations.Get(y)); + if(newHypo != NULL) + { + m_parent.Enqueue(x + 1, y, newHypo, (BackwardsEdge*)this); + } + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// BitmapContainer Code +//////////////////////////////////////////////////////////////////////////////// + +BitmapContainer::BitmapContainer(const WordsBitmap &bitmap + , HypothesisStackCubePruning &stack) + : m_bitmap(bitmap) + , m_stack(stack) + , m_numStackInsertions(0) +{ + m_hypotheses = HypothesisSet(); + m_edges = BackwardsEdgeSet(); + m_queue = HypothesisQueue(); +} + +BitmapContainer::~BitmapContainer() +{ + // As we have created the square position objects we clean up now. + HypothesisQueueItem *item = NULL; + + while (!m_queue.empty()) + { + item = m_queue.top(); + FREEHYPO(item->GetHypothesis()); + delete item; + m_queue.pop(); + } + + // Delete all edges. + RemoveAllInColl(m_edges); + + m_hypotheses.clear(); + m_edges.clear(); +} + + +void +BitmapContainer::Enqueue(int hypothesis_pos + , int translation_pos + , Hypothesis *hypothesis + , BackwardsEdge *edge) +{ + HypothesisQueueItem *item = new HypothesisQueueItem(hypothesis_pos + , translation_pos + , hypothesis + , edge); + m_queue.push(item); +} + +HypothesisQueueItem* +BitmapContainer::Dequeue(bool keepValue) +{ + if (!m_queue.empty()) + { + HypothesisQueueItem *item = m_queue.top(); + + if (!keepValue) + { + m_queue.pop(); + } + + return item; + } + + return NULL; +} + +HypothesisQueueItem* +BitmapContainer::Top() const +{ + return m_queue.top(); +} + +size_t +BitmapContainer::Size() +{ + return m_queue.size(); +} + +bool +BitmapContainer::Empty() const +{ + return m_queue.empty(); +} + + +const WordsBitmap& +BitmapContainer::GetWordsBitmap() +{ + return m_bitmap; +} + +const HypothesisSet& +BitmapContainer::GetHypotheses() const +{ + return m_hypotheses; +} + +size_t +BitmapContainer::GetHypothesesSize() const +{ + return m_hypotheses.size(); +} + +const BackwardsEdgeSet& +BitmapContainer::GetBackwardsEdges() +{ + return m_edges; +} + +void +BitmapContainer::AddHypothesis(Hypothesis *hypothesis) +{ + bool itemExists = false; + HypothesisSet::const_iterator iter = m_hypotheses.begin(); + HypothesisSet::const_iterator iterEnd = m_hypotheses.end(); + + // cfedermann: do we actually need this check? + while (iter != iterEnd) + { + if (*iter == hypothesis) { + itemExists = true; + break; + } + + ++iter; + } + assert(itemExists == false); + m_hypotheses.push_back(hypothesis); +} + +void +BitmapContainer::AddBackwardsEdge(BackwardsEdge *edge) +{ + m_edges.insert(edge); +} + +void +BitmapContainer::InitializeEdges() +{ + BackwardsEdgeSet::iterator iter = m_edges.begin(); + BackwardsEdgeSet::iterator iterEnd = m_edges.end(); + + while (iter != iterEnd) + { + BackwardsEdge *edge = *iter; + edge->Initialize(); + + ++iter; + } +} + +void +BitmapContainer::EnsureMinStackHyps(const size_t minNumHyps) +{ + while ((!Empty()) && m_numStackInsertions < minNumHyps) + { + ProcessBestHypothesis(); + } +} + +void +BitmapContainer::ProcessBestHypothesis() +{ + if (m_queue.empty()) + { + return; + } + + // Get the currently best hypothesis from the queue. + HypothesisQueueItem *item = Dequeue(); + + // If the priority queue is exhausted, we are done and should have exited + assert(item != NULL); + + // check we are pulling things off of priority queue in right order + if (!Empty()) + { + HypothesisQueueItem *check = Dequeue(true); + assert(item->GetHypothesis()->GetTotalScore() >= check->GetHypothesis()->GetTotalScore()); + } + + // Logging for the criminally insane + IFVERBOSE(3) { + // const StaticData &staticData = StaticData::Instance(); + item->GetHypothesis()->PrintHypothesis(); + } + + // Add best hypothesis to hypothesis stack. + const bool newstackentry = m_stack.AddPrune(item->GetHypothesis()); + if (newstackentry) + m_numStackInsertions++; + + IFVERBOSE(3) { + TRACE_ERR("new stack entry flag is " << newstackentry << std::endl); + } + + // Create new hypotheses for the two successors of the hypothesis just added. + item->GetBackwardsEdge()->PushSuccessors(item->GetHypothesisPos(), item->GetTranslationPos()); + + // We are done with the queue item, we delete it. + delete item; +} + +void +BitmapContainer::SortHypotheses() +{ + std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrderer()); +} + +} + |