Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/TranslationModel/UG/sapt_pscore_base.h')
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_base.h103
1 files changed, 103 insertions, 0 deletions
diff --git a/moses/TranslationModel/UG/sapt_pscore_base.h b/moses/TranslationModel/UG/sapt_pscore_base.h
new file mode 100644
index 000000000..68a491145
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_base.h
@@ -0,0 +1,103 @@
+// -*- c++ -*-
+// Base classes for suffix array-based phrase scorers
+// written by Ulrich Germann
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+
+namespace Moses {
+ namespace bitext
+ {
+
+ // abstract base class that defines the common API for phrase scorers
+ template<typename Token>
+ class
+ PhraseScorer
+ {
+ protected:
+ int m_index;
+ int m_num_feats;
+ string m_tag;
+ vector<string> m_feature_names;
+ public:
+
+ virtual
+ void
+ operator()(Bitext<Token> const& pt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest=NULL)
+ const = 0;
+
+ void
+ setIndex(int const i) { m_index = i; }
+
+ int
+ getIndex() const { return m_index; }
+
+ int
+ fcnt() const { return m_num_feats; }
+
+ vector<string> const &
+ fnames() const { return m_feature_names; }
+
+ string const &
+ fname(int i) const
+ {
+ if (i < 0) i += m_num_feats;
+ UTIL_THROW_IF2(i < 0 || i >= m_num_feats,
+ "Feature name index out of range at " << HERE);
+ return m_feature_names.at(i);
+ }
+
+ virtual
+ bool
+ isLogVal(int i) const { return true; };
+ // is this feature log valued?
+
+ virtual
+ bool
+ isIntegerValued(int i) const { return false; };
+ // is this feature integer valued (e.g., count features)?
+
+ virtual
+ bool
+ allowPooling() const { return true; }
+ // does this feature function allow pooling of counts if
+ // there are no occurrences in the respective corpus?
+
+ };
+
+ // base class for 'families' of phrase scorers that have a single
+ template<typename Token>
+ class
+ SingleRealValuedParameterPhraseScorerFamily
+ : public PhraseScorer<Token>
+ {
+ protected:
+ vector<float> m_x;
+
+ virtual
+ void
+ init(string const specs)
+ {
+ using namespace boost;
+ UTIL_THROW_IF2(this->m_tag.size() == 0,
+ "m_tag must be initialized in constructor");
+ UTIL_THROW_IF2(specs.size() == 0,"empty specification string!");
+ UTIL_THROW_IF2(this->m_feature_names.size(),
+ "PhraseScorer can only be initialized once!");
+ this->m_index = -1;
+ float x; char c;
+ for (istringstream buf(specs); buf>>x; buf>>c)
+ {
+ this->m_x.push_back(x);
+ string fname = (format("%s-%.2f") % this->m_tag % x).str();
+ this->m_feature_names.push_back(fname);
+ }
+ this->m_num_feats = this->m_x.size();
+ }
+ };
+ } // namespace bitext
+} // namespace moses