diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2016-08-10 23:40:05 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2016-08-10 23:40:05 +0300 |
commit | 09c492415451812153a06a7ef26faf4bdbc0c5fc (patch) | |
tree | 557690f4f2012f59f74ecf4b1d53e9d93bbc2b64 /contrib/moses2/defer/CubePruningPerBitmap | |
parent | a4df68a9fa5078f515c776a33328b4f3667d0391 (diff) |
move moses2 to contrib
Diffstat (limited to 'contrib/moses2/defer/CubePruningPerBitmap')
-rw-r--r-- | contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp | 161 | ||||
-rw-r--r-- | contrib/moses2/defer/CubePruningPerBitmap/Misc.h | 113 | ||||
-rw-r--r-- | contrib/moses2/defer/CubePruningPerBitmap/Search.cpp | 273 | ||||
-rw-r--r-- | contrib/moses2/defer/CubePruningPerBitmap/Search.h | 66 | ||||
-rw-r--r-- | contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp | 72 | ||||
-rw-r--r-- | contrib/moses2/defer/CubePruningPerBitmap/Stacks.h | 51 |
6 files changed, 736 insertions, 0 deletions
diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp b/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp new file mode 100644 index 000000000..7b324e244 --- /dev/null +++ b/contrib/moses2/defer/CubePruningPerBitmap/Misc.cpp @@ -0,0 +1,161 @@ +/* + * CubePruning.cpp + * + * Created on: 27 Nov 2015 + * Author: hieu + */ + +#include "Misc.h" +#include "../Manager.h" +#include "../../MemPool.h" +#include "../../System.h" + +using namespace std; + +namespace Moses2 +{ + +namespace NSCubePruningPerBitmap +{ + +//////////////////////////////////////////////////////////////////////// +QueueItem *QueueItem::Create(QueueItem *currItem, + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque<QueueItem*> &queueItemRecycler) +{ + QueueItem *ret; + if (currItem) { + // reuse incoming queue item to create new item + ret = currItem; + ret->Init(mgr, edge, hypoIndex, tpIndex); + } + else if (!queueItemRecycler.empty()) { + // use item from recycle bin + ret = queueItemRecycler.back(); + ret->Init(mgr, edge, hypoIndex, tpIndex); + queueItemRecycler.pop_back(); + } + else { + // create new item + ret = new (mgr.GetPool().Allocate<QueueItem>()) QueueItem(mgr, edge, hypoIndex, tpIndex); + } + + return ret; +} + +QueueItem::QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) +:edge(&edge) +,hypoIndex(hypoIndex) +,tpIndex(tpIndex) +{ + CreateHypothesis(mgr); +} + +void QueueItem::Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex) +{ + this->edge = &edge; + this->hypoIndex = hypoIndex; + this->tpIndex = tpIndex; + + CreateHypothesis(mgr); +} + +void QueueItem::CreateHypothesis(Manager &mgr) +{ + const Hypothesis *prevHypo = edge->miniStack.GetSortedAndPruneHypos(mgr)[hypoIndex]; + const TargetPhrase &tp = edge->tps[tpIndex]; + + //cerr << "hypoIndex=" << hypoIndex << endl; + //cerr << "edge.hypos=" << edge.hypos.size() << endl; + //cerr << prevHypo << endl; + //cerr << *prevHypo << endl; + + hypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + hypo->Init(mgr, *prevHypo, edge->path, tp, edge->newBitmap, edge->estimatedScore); + hypo->EvaluateWhenApplied(); +} + +//////////////////////////////////////////////////////////////////////// +CubeEdge::CubeEdge( + Manager &mgr, + const NSCubePruningMiniStack::MiniStack &miniStack, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap) +:miniStack(miniStack) +,path(path) +,tps(tps) +,newBitmap(newBitmap) +{ + estimatedScore = mgr.GetEstimatedScores().CalcEstimatedScore(newBitmap); +} + +std::ostream& operator<<(std::ostream &out, const CubeEdge &obj) +{ + out << obj.newBitmap; + return out; +} + +bool +CubeEdge::SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const +{ + //UTIL_THROW_IF2(x >= (1<<17), "Error"); + //UTIL_THROW_IF2(y >= (1<<17), "Error"); + + SeenPositionItem val(this, (x<<16) + y); + std::pair<SeenPositions::iterator, bool> pairRet = seenPositions.insert(val); + return pairRet.second; +} + +void CubeEdge::CreateFirst(Manager &mgr, + Queue &queue, + SeenPositions &seenPositions, + std::deque<QueueItem*> &queueItemRecycler) +{ + if (miniStack.GetSortedAndPruneHypos(mgr).size()) { + assert(tps.GetSize()); + + QueueItem *item = QueueItem::Create(NULL, mgr, *this, 0, 0, queueItemRecycler); + queue.push(item); + bool setSeen = SetSeenPosition(0, 0, seenPositions); + assert(setSeen); + } +} + +void CubeEdge::CreateNext(Manager &mgr, + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque<QueueItem*> &queueItemRecycler) +{ + size_t hypoIndex = item->hypoIndex; + size_t tpIndex = item->tpIndex; + + if (hypoIndex + 1 < miniStack.GetSortedAndPruneHypos(mgr).size() && SetSeenPosition(hypoIndex + 1, tpIndex, seenPositions)) { + // reuse incoming queue item to create new item + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex + 1, tpIndex, queueItemRecycler); + assert(newItem == item); + queue.push(newItem); + item = NULL; + } + + if (tpIndex + 1 < tps.GetSize() && SetSeenPosition(hypoIndex, tpIndex + 1, seenPositions)) { + QueueItem *newItem = QueueItem::Create(item, mgr, *this, hypoIndex, tpIndex + 1, queueItemRecycler); + queue.push(newItem); + item = NULL; + } + + if (item) { + // recycle unused queue item + queueItemRecycler.push_back(item); + } +} + +} + +} + + diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Misc.h b/contrib/moses2/defer/CubePruningPerBitmap/Misc.h new file mode 100644 index 000000000..77b5ba9c3 --- /dev/null +++ b/contrib/moses2/defer/CubePruningPerBitmap/Misc.h @@ -0,0 +1,113 @@ +/* + * CubePruning.h + * + * Created on: 27 Nov 2015 + * Author: hieu + */ +#pragma once +#include <boost/pool/pool_alloc.hpp> +#include <boost/unordered_map.hpp> +#include <boost/unordered_set.hpp> +#include <vector> +#include <queue> +#include "../../legacy/Range.h" +#include "../Hypothesis.h" +#include "../../TypeDef.h" +#include "../../Vector.h" +#include "../CubePruningMiniStack/Stack.h" + +namespace Moses2 +{ + +class Manager; +class InputPath; +class TargetPhrases; +class Bitmap; + +namespace NSCubePruningPerBitmap +{ +class CubeEdge; + +/////////////////////////////////////////// +class QueueItem +{ + ~QueueItem(); // NOT IMPLEMENTED. Use MemPool +public: + static QueueItem *Create(QueueItem *currItem, + Manager &mgr, + CubeEdge &edge, + size_t hypoIndex, + size_t tpIndex, + std::deque<QueueItem*> &queueItemRecycler); + QueueItem(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + + void Init(Manager &mgr, CubeEdge &edge, size_t hypoIndex, size_t tpIndex); + + CubeEdge *edge; + size_t hypoIndex, tpIndex; + Hypothesis *hypo; + +protected: + void CreateHypothesis(Manager &mgr); +}; + +/////////////////////////////////////////// +class QueueItemOrderer +{ +public: + bool operator()(QueueItem* itemA, QueueItem* itemB) const { + HypothesisFutureScoreOrderer orderer; + return !orderer(itemA->hypo, itemB->hypo); + } +}; + +/////////////////////////////////////////// +class CubeEdge +{ + friend std::ostream& operator<<(std::ostream &, const CubeEdge &); + +public: + typedef std::priority_queue<QueueItem*, + std::vector<QueueItem*>, + QueueItemOrderer> Queue; + + typedef std::pair<const CubeEdge*, int> SeenPositionItem; + typedef boost::unordered_set<SeenPositionItem, + boost::hash<SeenPositionItem>, + std::equal_to<SeenPositionItem> + > SeenPositions; + + const NSCubePruningMiniStack::MiniStack &miniStack; + const InputPath &path; + const TargetPhrases &tps; + const Bitmap &newBitmap; + SCORE estimatedScore; + + CubeEdge(Manager &mgr, + const NSCubePruningMiniStack::MiniStack &miniStack, + const InputPath &path, + const TargetPhrases &tps, + const Bitmap &newBitmap); + + bool SetSeenPosition(const size_t x, const size_t y, SeenPositions &seenPositions) const; + + void CreateFirst(Manager &mgr, + Queue &queue, + SeenPositions &seenPositions, + std::deque<QueueItem*> &queueItemRecycler); + void CreateNext(Manager &mgr, + QueueItem *item, + Queue &queue, + SeenPositions &seenPositions, + std::deque<QueueItem*> &queueItemRecycler); + + +protected: + +}; + +} + +} + + diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp b/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp new file mode 100644 index 000000000..b0eddcc21 --- /dev/null +++ b/contrib/moses2/defer/CubePruningPerBitmap/Search.cpp @@ -0,0 +1,273 @@ +/* + * Search.cpp + * + * Created on: 16 Nov 2015 + * Author: hieu + */ +#include <boost/foreach.hpp> +#include "Search.h" +#include "../Manager.h" +#include "../Hypothesis.h" +#include "../../InputPaths.h" +#include "../../InputPath.h" +#include "../../System.h" +#include "../../Sentence.h" +#include "../../TranslationTask.h" +#include "../../legacy/Util2.h" + +using namespace std; + +namespace Moses2 +{ + +namespace NSCubePruningPerBitmap +{ + +//////////////////////////////////////////////////////////////////////// +Search::Search(Manager &mgr) +:Moses2::Search(mgr) +,m_stacks(mgr) + +,m_queue(QueueItemOrderer(), + std::vector<QueueItem*>() ) + +,m_seenPositions() +{ +} + +Search::~Search() +{ +} + +void Search::Decode() +{ + // init stacks + m_stacks.Init(mgr.GetInput().GetSize() + 1); + + const Bitmap &initBitmap = mgr.GetBitmaps().GetInitialBitmap(); + Hypothesis *initHypo = Hypothesis::Create(mgr.GetSystemPool(), mgr); + initHypo->Init(mgr, mgr.GetInputPaths().GetBlank(), mgr.GetInitPhrase(), initBitmap); + initHypo->EmptyHypothesisState(mgr.GetInput()); + + m_stacks.Add(initHypo, mgr.GetHypoRecycle()); + + for (size_t stackInd = 0; stackInd < m_stacks.GetSize() - 1; ++stackInd) { + CreateSearchGraph(stackInd); + } + + for (size_t stackInd = 1; stackInd < m_stacks.GetSize(); ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + Decode(stackInd); + + //cerr << m_stacks << endl; + } + + //DebugCounts(); +} + +void Search::Decode(size_t stackInd) +{ + NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd]; + + // FOR EACH BITMAP IN EACH STACK + boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> > uniqueBM; + + BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { + NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + + const Bitmap *bitmap = val.first.first; + uniqueBM[bitmap].push_back(&miniStack); + } + + // decode each bitmap + boost::unordered_map<const Bitmap*, vector<NSCubePruningMiniStack::MiniStack*> >::iterator iter; + for (iter = uniqueBM.begin(); iter != uniqueBM.end(); ++iter) { + const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks = iter->second; + Decode(miniStacks); + } + + /* + // FOR EACH STACK + vector<NSCubePruningMiniStack::MiniStack*> miniStacks; + BOOST_FOREACH(NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { + NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + + miniStacks.push_back(&miniStack); + } + Decode(miniStacks); + */ +} + +void Search::Decode(const vector<NSCubePruningMiniStack::MiniStack*> &miniStacks) +{ + Recycler<Hypothesis*> &hypoRecycler = mgr.GetHypoRecycle(); + + // reuse queue from previous stack. Clear it first + std::vector<QueueItem*> &container = Container(m_queue); + //cerr << "container=" << container.size() << endl; + BOOST_FOREACH(QueueItem *item, container) { + // recycle unused hypos from queue + Hypothesis *hypo = item->hypo; + hypoRecycler.Recycle(hypo); + + // recycle queue item + m_queueItemRecycler.push_back(item); + } + container.clear(); + + m_seenPositions.clear(); + + BOOST_FOREACH(NSCubePruningMiniStack::MiniStack *miniStack, miniStacks) { + // add top hypo from every edge into queue + CubeEdges &edges = *m_cubeEdges[miniStack]; + + BOOST_FOREACH(CubeEdge *edge, edges) { + //cerr << "edge=" << *edge << endl; + edge->CreateFirst(mgr, m_queue, m_seenPositions, m_queueItemRecycler); + } + } + + size_t pops = 0; + while (!m_queue.empty() && pops < mgr.system.popLimit) { + // get best hypo from queue, add to stack + //cerr << "queue=" << queue.size() << endl; + QueueItem *item = m_queue.top(); + m_queue.pop(); + + CubeEdge *edge = item->edge; + + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stacks.Add(hypo, hypoRecycler); + + edge->CreateNext(mgr, item, m_queue, m_seenPositions, m_queueItemRecycler); + + ++pops; + } + + /* + // create hypo from every edge. Increase diversity + while (!m_queue.empty()) { + QueueItem *item = m_queue.top(); + m_queue.pop(); + + if (item->hypoIndex == 0 && item->tpIndex == 0) { + CubeEdge &edge = item->edge; + + // add hypo to stack + Hypothesis *hypo = item->hypo; + //cerr << "hypo=" << *hypo << " " << hypo->GetBitmap() << endl; + m_stacks.Add(hypo, mgr.GetHypoRecycle()); + } + } + */ +} + + +void Search::CreateSearchGraph(size_t stackInd) +{ + NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd]; + MemPool &pool = mgr.GetPool(); + + BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { + const Bitmap &hypoBitmap = *val.first.first; + size_t hypoEndPos = val.first.second; + //cerr << "key=" << hypoBitmap << " " << hypoEndPos << endl; + + // create edges to next hypos from existing hypos + const InputPaths &paths = mgr.GetInputPaths(); + + BOOST_FOREACH(const InputPath *path, paths) { + const Range &pathRange = path->range; + //cerr << "pathRange=" << pathRange << endl; + + if (!path->IsUsed()) { + continue; + } + if (!CanExtend(hypoBitmap, hypoEndPos, pathRange)) { + continue; + } + + const Bitmap &newBitmap = mgr.GetBitmaps().GetBitmap(hypoBitmap, pathRange); + + // sort hypo for a particular bitmap and hypoEndPos + const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + + + // add cube edge + size_t numPt = mgr.system.mappings.size(); + for (size_t i = 0; i < numPt; ++i) { + const TargetPhrases *tps = path->targetPhrases[i]; + if (tps && tps->GetSize()) { + // create next mini stack + NSCubePruningMiniStack::MiniStack &nextMiniStack = m_stacks.GetMiniStack(newBitmap, pathRange); + + CubeEdge *edge = new (pool.Allocate<CubeEdge>()) CubeEdge(mgr, miniStack, *path, *tps, newBitmap); + + CubeEdges *edges; + boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*>::iterator iter = m_cubeEdges.find(&nextMiniStack); + if (iter == m_cubeEdges.end()) { + edges = new (pool.Allocate<CubeEdges>()) CubeEdges(); + m_cubeEdges[&nextMiniStack] = edges; + } + else { + edges = iter->second; + } + + edges->push_back(edge); + } + } + } + } + +} + + +const Hypothesis *Search::GetBestHypo() const +{ + const NSCubePruningMiniStack::Stack &lastStack = m_stacks.Back(); + std::vector<const Hypothesis*> sortedHypos = lastStack.GetBestHypos(1); + + const Hypothesis *best = NULL; + if (sortedHypos.size()) { + best = sortedHypos[0]; + } + return best; +} + +void Search::DebugCounts() +{ + std::map<size_t, size_t> counts; + + for (size_t stackInd = 0; stackInd < m_stacks.GetSize(); ++stackInd) { + //cerr << "stackInd=" << stackInd << endl; + const NSCubePruningMiniStack::Stack &stack = m_stacks[stackInd]; + BOOST_FOREACH(const NSCubePruningMiniStack::Stack::Coll::value_type &val, stack.GetColl()) { + const NSCubePruningMiniStack::MiniStack &miniStack = *val.second; + size_t count = miniStack.GetColl().size(); + + if (counts.find(count) == counts.end()) { + counts[count] = 0; + } + else { + ++counts[count]; + } + } + //cerr << m_stacks << endl; + } + + std::map<size_t, size_t>::const_iterator iter; + for (iter = counts.begin(); iter != counts.end(); ++iter) { + cerr << iter->first << "=" << iter->second << " "; + } + cerr << endl; +} + + + +} + +} + + diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Search.h b/contrib/moses2/defer/CubePruningPerBitmap/Search.h new file mode 100644 index 000000000..913095e25 --- /dev/null +++ b/contrib/moses2/defer/CubePruningPerBitmap/Search.h @@ -0,0 +1,66 @@ +/* + * Search.h + * + * Created on: 16 Nov 2015 + * Author: hieu + */ + +#pragma once +#include <boost/pool/pool_alloc.hpp> +#include <boost/unordered_map.hpp> +#include "../Search.h" +#include "Misc.h" +#include "Stacks.h" +#include "../../legacy/Range.h" + +namespace Moses2 +{ + +class Bitmap; +class Hypothesis; +class InputPath; +class TargetPhrases; + +namespace NSCubePruningMiniStack +{ +class MiniStack; +} + +namespace NSCubePruningPerBitmap +{ + +class Search : public Moses2::Search +{ +public: + Search(Manager &mgr); + virtual ~Search(); + + virtual void Decode(); + const Hypothesis *GetBestHypo() const; + +protected: + Stacks m_stacks; + + CubeEdge::Queue m_queue; + CubeEdge::SeenPositions m_seenPositions; + + // CUBE PRUNING VARIABLES + // setup + typedef std::vector<CubeEdge*> CubeEdges; + boost::unordered_map<NSCubePruningMiniStack::MiniStack*, CubeEdges*> m_cubeEdges; + + std::deque<QueueItem*> m_queueItemRecycler; + + // CUBE PRUNING + // decoding + void CreateSearchGraph(size_t stackInd); + void Decode(size_t stackInd); + void Decode(const std::vector<NSCubePruningMiniStack::MiniStack*> &miniStacks); + + void DebugCounts(); +}; + +} + +} + diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp b/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp new file mode 100644 index 000000000..ca29f52c0 --- /dev/null +++ b/contrib/moses2/defer/CubePruningPerBitmap/Stacks.cpp @@ -0,0 +1,72 @@ +/* + * Stacks.cpp + * + * Created on: 6 Nov 2015 + * Author: hieu + */ + +#include "Stacks.h" +#include "../../System.h" +#include "../Manager.h" + +using namespace std; + +namespace Moses2 +{ + +namespace NSCubePruningPerBitmap +{ + +Stacks::Stacks(const Manager &mgr) +:m_mgr(mgr) +{ +} + +Stacks::~Stacks() +{ +} + +void Stacks::Init(size_t numStacks) +{ + m_stacks.resize(numStacks); + for (size_t i = 0; i < m_stacks.size(); ++i) { + m_stacks[i] = new (m_mgr.GetPool().Allocate<NSCubePruningMiniStack::Stack>()) NSCubePruningMiniStack::Stack(m_mgr); + } +} + + +std::ostream& operator<<(std::ostream &out, const Stacks &obj) +{ + for (size_t i = 0; i < obj.GetSize(); ++i) { + const NSCubePruningMiniStack::Stack &stack = *obj.m_stacks[i]; + out << stack.GetHypoSize() << " "; + } + + return out; +} + +void Stacks::Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle) +{ + size_t numWordsCovered = hypo->GetBitmap().GetNumWordsCovered(); + //cerr << "numWordsCovered=" << numWordsCovered << endl; + NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; + stack.Add(hypo, hypoRecycle); + +} + +NSCubePruningMiniStack::MiniStack &Stacks::GetMiniStack(const Bitmap &newBitmap, const Range &pathRange) +{ + size_t numWordsCovered = newBitmap.GetNumWordsCovered(); + //cerr << "numWordsCovered=" << numWordsCovered << endl; + NSCubePruningMiniStack::Stack &stack = *m_stacks[numWordsCovered]; + + NSCubePruningMiniStack::Stack::HypoCoverage key(&newBitmap, pathRange.GetEndPos()); + stack.GetMiniStack(key); + +} + +} + +} + + diff --git a/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h b/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h new file mode 100644 index 000000000..5729fa613 --- /dev/null +++ b/contrib/moses2/defer/CubePruningPerBitmap/Stacks.h @@ -0,0 +1,51 @@ +/* + * Stacks.h + * + * Created on: 6 Nov 2015 + * Author: hieu + */ + +#pragma once + +#include <vector> +#include "../CubePruningMiniStack/Stack.h" +#include "../../Recycler.h" + +namespace Moses2 +{ +class Manager; + +namespace NSCubePruningPerBitmap +{ + +class Stacks { + friend std::ostream& operator<<(std::ostream &, const Stacks &); +public: + Stacks(const Manager &mgr); + virtual ~Stacks(); + + void Init(size_t numStacks); + + size_t GetSize() const + { return m_stacks.size(); } + + const NSCubePruningMiniStack::Stack &Back() const + { return *m_stacks.back(); } + + NSCubePruningMiniStack::Stack &operator[](size_t ind) + { return *m_stacks[ind]; } + + void Add(const Hypothesis *hypo, Recycler<Hypothesis*> &hypoRecycle); + NSCubePruningMiniStack::MiniStack &GetMiniStack(const Bitmap &newBitmap, const Range &pathRange); + +protected: + const Manager &m_mgr; + std::vector<NSCubePruningMiniStack::Stack*> m_stacks; +}; + + +} + +} + + |