Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2014-07-09 05:41:28 +0400
committerUlrich Germann <ugermann@inf.ed.ac.uk>2014-07-09 05:41:28 +0400
commit4d41211c2cd6eb75c5a229c10e98fdfa1acff3b4 (patch)
tree142dca24625b301147a931f728825dd40cf74e53 /moses/TranslationModel/UG/sapt_pscore_logcnt.h
parent28d64e23396cba53a83b75e01e9977db636e2285 (diff)
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes.
Diffstat (limited to 'moses/TranslationModel/UG/sapt_pscore_logcnt.h')
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_logcnt.h65
1 files changed, 65 insertions, 0 deletions
diff --git a/moses/TranslationModel/UG/sapt_pscore_logcnt.h b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
new file mode 100644
index 000000000..2790323ed
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
@@ -0,0 +1,65 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function x/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+ namespace bitext {
+
+ template<typename Token>
+ class
+ PScoreLogCnt : public PhraseScorer<Token>
+ {
+ string m_specs;
+ public:
+ PScoreLogCnt(string const specs)
+ {
+ this->m_index = -1;
+ this->m_specs = specs;
+ if (specs.find("r1") != string::npos) // raw source phrase counts
+ this->m_feature_names.push_back("log-r1");
+ if (specs.find("s1") != string::npos)
+ this->m_feature_names.push_back("log-s1"); // L1 sample size
+ if (specs.find("g1") != string::npos) // coherent phrases
+ this->m_feature_names.push_back("log-g1");
+ if (specs.find("j") != string::npos) // joint counts
+ this->m_feature_names.push_back("log-j");
+ if (specs.find("r2") != string::npos) // raw target phrase counts
+ this->m_feature_names.push_back("log-r2");
+ this->m_num_feats = this->m_feature_names.size();
+ }
+
+ bool
+ isIntegerValued(int i) const { return true; }
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest = NULL) const
+ {
+ if (!dest) dest = &pp.fvals;
+ assert(pp.raw1);
+ assert(pp.sample1);
+ assert(pp.good1);
+ assert(pp.joint);
+ assert(pp.raw2);
+ size_t i = this->m_index;
+ if (m_specs.find("r1") != string::npos)
+ (*dest)[i++] = log(pp.raw1);
+ if (m_specs.find("s1") != string::npos)
+ (*dest)[i++] = log(pp.sample1);
+ if (m_specs.find("g1") != string::npos)
+ (*dest)[i++] = log(pp.good1);
+ if (m_specs.find("j") != string::npos)
+ (*dest)[i++] = log(pp.joint);
+ if (m_specs.find("r2") != string::npos)
+ (*dest)[++i] = log(pp.raw2);
+ }
+ };
+ } // namespace bitext
+} // namespace Moses