Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Haddow <barry.haddow@gmail.com>2014-07-21 14:04:43 +0400
committerBarry Haddow <barry.haddow@gmail.com>2014-07-21 14:04:43 +0400
commitefee2695c31e1086af783c1b092fc842fb7bb1a4 (patch)
treee8324ea35cc92f0737c93f6b8fa8e23898e4d78a /mert/ForestRescore.h
parentc83c5a3ee6f3ef7480e7a782d2023af9e99c1711 (diff)
Merge 08811deb17337356cd8dae9c59c0160590679a35 from joshua
Diffstat (limited to 'mert/ForestRescore.h')
-rw-r--r--mert/ForestRescore.h120
1 files changed, 120 insertions, 0 deletions
diff --git a/mert/ForestRescore.h b/mert/ForestRescore.h
new file mode 100644
index 000000000..900275b74
--- /dev/null
+++ b/mert/ForestRescore.h
@@ -0,0 +1,120 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+#ifndef MERT_FOREST_RESCORE_H
+#define MERT_FOREST_RESCORE_H
+
+#include <valarray>
+#include <vector>
+
+#include <boost/unordered_set.hpp>
+
+#include "BleuScorer.h"
+#include "Hypergraph.h"
+
+namespace MosesTuning {
+
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
+
+struct NgramHash : public std::unary_function<const WordVec&, std::size_t> {
+ std::size_t operator()(const WordVec& ngram) const {
+ return util::MurmurHashNative(&(ngram[0]), ngram.size() * sizeof(WordVec::value_type));
+ }
+};
+
+struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&, bool> {
+ bool operator()(const WordVec& first, const WordVec& second) const {
+ if (first.size() != second.size()) return false;
+ return memcmp(&(first[0]), &(second[0]), first.size() * sizeof(WordVec::value_type)) == 0;
+ }
+};
+
+typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
+
+
+class ReferenceSet {
+
+
+public:
+
+ void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
+
+ void Load(const std::vector<std::string>& files, Vocab& vocab);
+
+ size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
+
+ size_t Length(size_t sentenceId) const {return lengths_[sentenceId];}
+
+private:
+ //ngrams to (clipped,unclipped) counts
+ typedef boost::unordered_map<WordVec, std::pair<std::size_t,std::size_t>, NgramHash,NgramEquals> NgramMap;
+ std::vector<NgramMap> ngramCounts_;
+ std::vector<size_t> lengths_;
+
+};
+
+struct VertexState {
+ VertexState();
+
+ std::vector<FeatureStatsType> bleuStats;
+ WordVec leftContext;
+ WordVec rightContext;
+ size_t targetLength;
+};
+
+/**
+ * Used to score an rule (ie edge) when we are applying it.
+**/
+class HgBleuScorer {
+ public:
+ HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
+ references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
+ backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
+ vertexStates_.resize(graph.VertexSize());
+ totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
+ }
+
+ FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
+
+ void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
+
+
+ private:
+ const ReferenceSet& references_;
+ std::vector<VertexState> vertexStates_;
+ size_t sentenceId_;
+ size_t totalSourceLength_;
+ const Graph& graph_;
+ std::vector<FeatureStatsType> backgroundBleu_;
+ FeatureStatsType backgroundRefLength_;
+
+ void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
+ size_t GetTargetLength(const Edge& edge) const;
+};
+
+struct HgHypothesis {
+ SparseVector featureVector;
+ WordVec text;
+ std::vector<FeatureStatsType> bleuStats;
+};
+
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo);
+
+};
+
+#endif