Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/nplm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2013-10-29 22:00:37 +0400
committerKenneth Heafield <github@kheafield.com>2013-10-29 22:00:37 +0400
commit78eecfdd7ef4cc0aef575c828c6fef747c63da19 (patch)
treecbd1e84c871306a35e1352286f7749ccac4f60bc /src/multinomial.h
parente4138ba17732e70bfe9ad8e806173c083a9ddd0e (diff)
Copy nplm-0.1 after removing some executable bits
Diffstat (limited to 'src/multinomial.h')
-rw-r--r--src/multinomial.h135
1 files changed, 135 insertions, 0 deletions
diff --git a/src/multinomial.h b/src/multinomial.h
new file mode 100644
index 0000000..1314fcb
--- /dev/null
+++ b/src/multinomial.h
@@ -0,0 +1,135 @@
+#ifndef MULTINOMIAL_H
+#define MULTINOMIAL_H
+
+#include <vector>
+#include <set>
+#include <cassert>
+#include <cmath>
+
+#include <boost/random/uniform_int_distribution.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+
+namespace nplm
+{
+
+template <typename Count>
+class multinomial {
+ std::vector<int> J;
+ std::vector<double> q;
+ boost::random::uniform_int_distribution<Count> unif_int;
+ boost::random::uniform_real_distribution<> unif_real;
+ std::vector<double> m_prob, m_logprob;
+
+public:
+ multinomial() : unif_real(0.0, 1.0) { }
+ multinomial(const std::vector<Count> &counts) : unif_real(0.0, 1.0) { estimate(counts); }
+
+ void estimate(const std::vector<Count>& counts)
+ {
+ int k = counts.size();
+ Count n = 0;
+ m_prob.clear();
+ m_prob.resize(k, 0.0);
+ m_logprob.clear();
+ m_logprob.resize(k, 0.0);
+ for (int i=0; i<k; i++)
+ n += counts[i];
+ for (int i=0; i<k; i++)
+ {
+ m_prob[i] = static_cast<double>(counts[i]) / n;
+ m_logprob[i] = std::log(m_prob[i]);
+ }
+ setup(m_prob);
+ }
+
+ double prob(int i) const { return m_prob[i]; }
+ double logprob(int i) const { return m_logprob[i]; }
+
+ template <typename Engine>
+ int sample(Engine &eng) const
+ {
+ int m = unif_int(eng);
+ double p = unif_real(eng);
+ int s;
+ if (q[m] > p)
+ s = m;
+ else
+ s = J[m];
+ assert (s >= 0);
+ return s;
+ }
+
+private:
+ void setup(const std::vector<double>& probs)
+ {
+ int k = probs.size();
+
+ unif_int = boost::random::uniform_int_distribution<Count>(0, k-1);
+ J.resize(k, -1);
+ q.resize(k, 0);
+
+ // "small" outcomes (prob < 1/k)
+ std::set<int> S;
+ std::set<int>::iterator s_it;
+ // "large" outcomes (prob >= 1/k)
+ std::set<int> L;
+ std::set<int>::iterator l_it;
+ const double tol = 1e-3;
+
+ for (int i=0; i<k; i++)
+ {
+ q[i] = k*probs[i];
+ if (q[i] < 1.0)
+ {
+ S.insert(i);
+ }
+ else
+ {
+ L.insert(i);
+ }
+ }
+
+ while (S.size() > 0 && L.size() > 0)
+ {
+ // choose an arbitrary element s from S and l from L
+ s_it = S.begin();
+ int s = *s_it;
+ l_it = L.begin();
+ int l = *l_it;
+
+ // pair up s and (part of) l as its alias
+ J[s] = l;
+ S.erase(s_it);
+ //q[l] = q[l] - (1.0 - q[s]);
+ q[l] = q[l] + q[s] - 1.0; // more stable?
+
+ // move l from L to S if necessary
+ if (q[l] < 1.0)
+ {
+ S.insert(l);
+ L.erase(l_it);
+ }
+ }
+
+ // any remaining elements must have q/n close to 1, so we leave them alone
+ for (s_it = S.begin(); s_it != S.end(); ++s_it) {
+ //assert (fabs(q[*s_it] - 1) < tol);
+ if (std::fabs(q[*s_it] - 1) > tol)
+ {
+ std::cerr << "warning: multinomial: probability differs from one by " << std::fabs(q[*s_it]-1) << std::endl;
+ }
+ q[*s_it] = 1.0;
+ }
+ for (l_it = L.begin(); l_it != L.end(); ++l_it) {
+ if (std::fabs(q[*l_it] - 1) > tol)
+ {
+ std::cerr << "warning: multinomial: probability differs from one by " << std::fabs(q[*l_it]-1) << std::endl;
+ }
+ }
+ }
+
+};
+
+} // namespace nplm
+
+#endif