diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2012-11-12 23:56:18 +0400 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2012-11-12 23:56:18 +0400 |
commit | 5e3ef23cef6101d2c098eb3445f562e8f595655b (patch) | |
tree | b8c332b6fa82bae84ea4910967a10ba1b08a7107 /moses/HypothesisStackCubePruning.cpp | |
parent | 8c785cff2b1be3cccd76ea9026f71b649762dfc3 (diff) |
move moses/src/* to moses/
Diffstat (limited to 'moses/HypothesisStackCubePruning.cpp')
-rw-r--r-- | moses/HypothesisStackCubePruning.cpp | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/moses/HypothesisStackCubePruning.cpp b/moses/HypothesisStackCubePruning.cpp new file mode 100644 index 000000000..ca54bf944 --- /dev/null +++ b/moses/HypothesisStackCubePruning.cpp @@ -0,0 +1,296 @@ +// $Id$ + +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2006 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include <algorithm> +#include <set> +#include <queue> +#include "HypothesisStackCubePruning.h" +#include "TypeDef.h" +#include "Util.h" +#include "StaticData.h" +#include "Manager.h" + +using namespace std; + +namespace Moses +{ +HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) : + HypothesisStack(manager) +{ + m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled(); + m_bestScore = -std::numeric_limits<float>::infinity(); + m_worstScore = -std::numeric_limits<float>::infinity(); +} + +/** remove all hypotheses from the collection */ +void HypothesisStackCubePruning::RemoveAll() +{ + // delete all bitmap accessors; + _BMType::iterator iter; + for (iter = m_bitmapAccessor.begin(); iter != m_bitmapAccessor.end(); ++iter) { + delete iter->second; + } +} + +pair<HypothesisStackCubePruning::iterator, bool> HypothesisStackCubePruning::Add(Hypothesis *hypo) +{ + std::pair<iterator, bool> ret = m_hypos.insert(hypo); + + if (ret.second) { + // equiv hypo doesn't exists + VERBOSE(3,"added hyp to stack"); + + // Update best score, if this hypothesis is new best + if (hypo->GetTotalScore() > m_bestScore) { + VERBOSE(3,", best on stack"); + m_bestScore = hypo->GetTotalScore(); + // this may also affect the worst score + if ( m_bestScore + m_beamWidth > m_worstScore ) + m_worstScore = m_bestScore + m_beamWidth; + } + + // Prune only if stack is twice as big as needed (lazy pruning) + VERBOSE(3,", now size " << m_hypos.size()); + if (m_hypos.size() > 2*m_maxHypoStackSize-1) { + PruneToSize(m_maxHypoStackSize); + } else { + VERBOSE(3,std::endl); + } + } + + return ret; +} + +bool HypothesisStackCubePruning::AddPrune(Hypothesis *hypo) +{ + if (hypo->GetTotalScore() < m_worstScore) { + // too bad for stack. don't bother adding hypo into collection + m_manager.GetSentenceStats().AddDiscarded(); + VERBOSE(3,"discarded, too bad for stack" << std::endl); + FREEHYPO(hypo); + return false; + } + + // over threshold, try to add to collection + std::pair<iterator, bool> addRet = Add(hypo); + if (addRet.second) { + // nothing found. add to collection + return true; + } + + // equiv hypo exists, recombine with other hypo + iterator &iterExisting = addRet.first; + Hypothesis *hypoExisting = *iterExisting; + CHECK(iterExisting != m_hypos.end()); + + m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting); + + // found existing hypo with same target ending. + // keep the best 1 + if (hypo->GetTotalScore() > hypoExisting->GetTotalScore()) { + // incoming hypo is better than the one we have + VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, "); + if (m_nBestIsEnabled) { + hypo->AddArc(hypoExisting); + Detach(iterExisting); + } else { + Remove(iterExisting); + } + + bool added = Add(hypo).second; + if (!added) { + iterExisting = m_hypos.find(hypo); + TRACE_ERR("Offending hypo = " << **iterExisting << endl); + CHECK(false); + } + return false; + } else { + // already storing the best hypo. discard current hypo + VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl) + if (m_nBestIsEnabled) { + hypoExisting->AddArc(hypo); + } else { + FREEHYPO(hypo); + } + return false; + } +} + +void HypothesisStackCubePruning::AddInitial(Hypothesis *hypo) +{ + std::pair<iterator, bool> addRet = Add(hypo); + CHECK(addRet.second); + + const WordsBitmap &bitmap = hypo->GetWordsBitmap(); + m_bitmapAccessor[bitmap] = new BitmapContainer(bitmap, *this); +} + +void HypothesisStackCubePruning::PruneToSize(size_t newSize) +{ + if (m_hypos.size() > newSize) { // ok, if not over the limit + priority_queue<float> bestScores; + + // push all scores to a heap + // (but never push scores below m_bestScore+m_beamWidth) + iterator iter = m_hypos.begin(); + float score = 0; + while (iter != m_hypos.end()) { + Hypothesis *hypo = *iter; + score = hypo->GetTotalScore(); + if (score > m_bestScore+m_beamWidth) { + bestScores.push(score); + } + ++iter; + } + + // pop the top newSize scores (and ignore them, these are the scores of hyps that will remain) + // ensure to never pop beyond heap size + size_t minNewSizeHeapSize = newSize > bestScores.size() ? bestScores.size() : newSize; + for (size_t i = 1 ; i < minNewSizeHeapSize ; i++) + bestScores.pop(); + + // and remember the threshold + float scoreThreshold = bestScores.top(); + + // delete all hypos under score threshold + iter = m_hypos.begin(); + while (iter != m_hypos.end()) { + Hypothesis *hypo = *iter; + float score = hypo->GetTotalScore(); + if (score < scoreThreshold) { + iterator iterRemove = iter++; + Remove(iterRemove); + m_manager.GetSentenceStats().AddPruning(); + } else { + ++iter; + } + } + VERBOSE(3,", pruned to size " << size() << endl); + + IFVERBOSE(3) { + TRACE_ERR("stack now contains: "); + for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++) { + Hypothesis *hypo = *iter; + TRACE_ERR( hypo->GetId() << " (" << hypo->GetTotalScore() << ") "); + } + TRACE_ERR( endl); + } + + // set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack + m_worstScore = scoreThreshold; + } +} + +const Hypothesis *HypothesisStackCubePruning::GetBestHypothesis() const +{ + if (!m_hypos.empty()) { + const_iterator iter = m_hypos.begin(); + Hypothesis *bestHypo = *iter; + while (++iter != m_hypos.end()) { + Hypothesis *hypo = *iter; + if (hypo->GetTotalScore() > bestHypo->GetTotalScore()) + bestHypo = hypo; + } + return bestHypo; + } + return NULL; +} + +vector<const Hypothesis*> HypothesisStackCubePruning::GetSortedList() const +{ + vector<const Hypothesis*> ret; + ret.reserve(m_hypos.size()); + std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end())); + sort(ret.begin(), ret.end(), CompareHypothesisTotalScore()); + + return ret; +} + + +void HypothesisStackCubePruning::CleanupArcList() +{ + // only necessary if n-best calculations are enabled + if (!m_nBestIsEnabled) return; + + iterator iter; + for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) { + Hypothesis *mainHypo = *iter; + mainHypo->CleanupArcList(); + } +} + +void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap + , HypothesisStackCubePruning &stack + , const WordsRange &/*range*/ + , BitmapContainer &bitmapContainer + , const SquareMatrix &futureScore + , const TranslationOptionList &transOptList) +{ + _BMType::iterator bcExists = m_bitmapAccessor.find(newBitmap); + + BitmapContainer *bmContainer; + if (bcExists == m_bitmapAccessor.end()) { + bmContainer = new BitmapContainer(newBitmap, stack); + m_bitmapAccessor[newBitmap] = bmContainer; + } else { + bmContainer = bcExists->second; + } + + BackwardsEdge *edge = new BackwardsEdge(bitmapContainer + , *bmContainer + , transOptList + , futureScore, + m_manager.GetSource(), + m_manager.GetTranslationSystem()); + bmContainer->AddBackwardsEdge(edge); +} + + +TO_STRING_BODY(HypothesisStackCubePruning); + + +// friend +std::ostream& operator<<(std::ostream& out, const HypothesisStackCubePruning& hypoColl) +{ + HypothesisStackCubePruning::const_iterator iter; + + for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter) { + const Hypothesis &hypo = **iter; + out << hypo << endl; + + } + return out; +} + +void +HypothesisStackCubePruning::AddHypothesesToBitmapContainers() +{ + HypothesisStackCubePruning::const_iterator iter; + for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) { + Hypothesis *h = *iter; + const WordsBitmap &bitmap = h->GetWordsBitmap(); + BitmapContainer *container = m_bitmapAccessor[bitmap]; + container->AddHypothesis(h); + } +} + +} + |