/*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2014- University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #ifndef MERT_HYPERGRAPH_H #define MERT_HYPERGRAPH_H #include #include #include #include #include #include #include "util/exception.hh" #include "util/file_piece.hh" #include "util/murmur_hash.hh" #include "util/pool.hh" #include "util/string_piece.hh" #include "FeatureStats.h" namespace MosesTuning { typedef unsigned int WordIndex; const WordIndex kMaxWordIndex = UINT_MAX; const FeatureStatsType kMinScore = -1e10; template class FixedAllocator : boost::noncopyable { public: FixedAllocator() : current_(NULL), end_(NULL) {} void Init(std::size_t count) { assert(!current_); array_.reset(new T[count]); current_ = array_.get(); end_ = current_ + count; } T &operator[](std::size_t idx) { return array_.get()[idx]; } const T &operator[](std::size_t idx) const { return array_.get()[idx]; } T *New() { T *ret = current_++; UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end"); return ret; } std::size_t Capacity() const { return end_ - array_.get(); } std::size_t Size() const { return current_ - array_.get(); } private: boost::scoped_array array_; T *current_, *end_; }; class Vocab { public: Vocab(); typedef std::pair Entry; const Entry &FindOrAdd(const StringPiece &str); const Entry& Bos() const { return bos_; } const Entry& Eos() const { return eos_; } private: util::Pool piece_backing_; struct Hash : public std::unary_function { std::size_t operator()(StringPiece str) const { return util::MurmurHashNative(str.data(), str.size()); } }; struct Equals : public std::binary_function { bool operator()(StringPiece first, StringPiece second) const { return first == second; } }; typedef boost::unordered_map Map; Map map_; Entry eos_; Entry bos_; }; typedef std::vector WordVec; class Vertex; //Use shared pointer to save copying when we prune typedef boost::shared_ptr FeaturePtr; /** * An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector. **/ class Edge { public: Edge() { features_.reset(new SparseVector()); } void AddWord(const Vocab::Entry *word) { words_.push_back(word); } void AddChild(size_t child) { children_.push_back(child); } void AddFeature(const StringPiece& name, FeatureStatsType value) { //TODO StringPiece interface features_->set(name.as_string(),value); } const WordVec &Words() const { return words_; } const FeaturePtr& Features() const { return features_; } void SetFeatures(const FeaturePtr& features) { features_ = features; } const std::vector& Children() const { return children_; } FeatureStatsType GetScore(const SparseVector& weights) const { return inner_product(*(features_.get()), weights); } private: // NULL for non-terminals. std::vector words_; std::vector children_; boost::shared_ptr features_; }; /* * A vertex has 0..n incoming edges **/ class Vertex { public: Vertex() : sourceCovered_(0) {} void AddEdge(const Edge* edge) { incoming_.push_back(edge); } void SetSourceCovered(size_t sourceCovered) { sourceCovered_ = sourceCovered; } const std::vector& GetIncoming() const { return incoming_; } size_t SourceCovered() const { return sourceCovered_; } private: std::vector incoming_; size_t sourceCovered_; }; class Graph : boost::noncopyable { public: Graph(Vocab& vocab) : vocab_(vocab) {} void SetCounts(std::size_t vertices, std::size_t edges) { vertices_.Init(vertices); edges_.Init(edges); } Vocab &MutableVocab() { return vocab_; } Edge *NewEdge() { return edges_.New(); } Vertex *NewVertex() { return vertices_.New(); } const Vertex &GetVertex(std::size_t index) const { return vertices_[index]; } Edge &GetEdge(std::size_t index) { return edges_[index]; } /* Created a pruned copy of this graph with minEdgeCount edges. Uses the scores in the max-product semiring to rank edges, as suggested by Colin Cherry */ void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const; std::size_t VertexSize() const { return vertices_.Size(); } std::size_t EdgeSize() const { return edges_.Size(); } bool IsBoundary(const Vocab::Entry* word) const { return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second; } private: FixedAllocator edges_; FixedAllocator vertices_; Vocab& vocab_; }; class HypergraphException : public util::Exception { public: HypergraphException() {} ~HypergraphException() throw() {} }; void ReadGraph(util::FilePiece &from, Graph &graph); }; #endif