Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/TranslationModel/UG/mm/ug_bitext_pstats.h')
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_pstats.h63
1 files changed, 63 insertions, 0 deletions
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_pstats.h b/moses/TranslationModel/UG/mm/ug_bitext_pstats.h
new file mode 100644
index 000000000..c5b6c0152
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/ug_bitext_pstats.h
@@ -0,0 +1,63 @@
+// -*- c++ -*-
+#pragma once
+
+#include <boost/thread.hpp>
+#include <boost/unordered_map.hpp>
+
+#include "ug_typedefs.h"
+#include "ug_bitext_jstats.h"
+#include "moses/thread_safe_container.h"
+
+namespace Moses
+{
+ namespace bitext
+ {
+ struct
+ pstats
+ {
+ typedef boost::unordered_map<uint64_t, sptr<pstats> > map_t;
+ typedef ThreadSafeContainer<uint64_t, sptr<pstats>, map_t> cache_t;
+ typedef std::vector<uchar> alnvec;
+#if UG_BITEXT_TRACK_ACTIVE_THREADS
+ static ThreadSafeCounter active;
+#endif
+ boost::mutex lock; // for parallel gathering of stats
+ boost::condition_variable ready; // consumers can wait for me to be ready
+
+ size_t raw_cnt; // (approximate) raw occurrence count
+ size_t sample_cnt; // number of instances selected during sampling
+ size_t good; // number of selected instances with valid word alignments
+ size_t sum_pairs; // total number of target phrases extracted (can be > raw_cnt)
+ size_t in_progress; // how many threads are currently working on this?
+
+ uint32_t ofwd[Moses::LRModel::NONE+1]; // distribution of fwd phrase orientations
+ uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations
+
+ std::vector<uint32_t> indoc; // distribution over where samples came from
+
+ typedef std::map<uint64_t, jstats> trg_map_t;
+ trg_map_t trg;
+ pstats();
+ ~pstats();
+ void release();
+ void register_worker();
+ size_t count_workers() { return in_progress; }
+
+ bool
+ add(uint64_t const pid, // target phrase id
+ float const w, // sample weight (1./(# of phrases extractable))
+ alnvec const& a, // local alignment
+ uint32_t const cnt2, // raw target phrase count
+ uint32_t fwd_o, // fwd. phrase orientation
+ uint32_t bwd_o, // bwd. phrase orientation
+ int const docid); // document where sample was found
+
+ void
+ count_sample(int const docid, // document where sample was found
+ size_t const num_pairs, // # of phrases extractable here
+ int const po_fwd, // fwd phrase orientation
+ int const po_bwd); // bwd phrase orientation
+ };
+
+ }
+}