Welcome to mirror list, hosted at ThFree Co, Russian Federation.

ug_prep_phrases.h « mm « UG « TranslationModel « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 93a5ea82a53ea1272cda46219d8b432f0cb4c171 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
// -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; -*-
// Functions for multi-threaded pre-fetching of phrase table entries
// Author: Ulrich Germann

#include "moses/TranslationModel/UG/generic/threading/ug_thread_pool.h"
#include "moses/thread_safe_container.h"
#include "ug_bitext.h"
#include "ug_lru_cache.h"

namespace Moses {
namespace bitext { 

template<typename Token> // , typename BITEXT>
struct StatsCollector
{
  typedef    lru_cache::LRU_Cache< uint64_t, pstats  > hcache_t;
  typedef ThreadSafeContainer<uint64_t, sptr<pstats> > pcache_t;
  typedef                 map<uint64_t, sptr<pstats> > lcache_t;
  iptr<Bitext<Token> const> bitext; // underlying bitext
  sampling_method           method; // sampling method 
  size_t               sample_size; // sample size 
  sptr<SamplingBias const>    bias; // sampling bias
  hcache_t*                 hcache; // "history" cache
  pcache_t*                 pcache; // permanent cache
  size_t                 pcache_th; // threshold for adding items to pcache 
  sptr<lcache_t>            lcache; // local cache
  ug::ThreadPool*            tpool; // thread pool to run jobs on 
  
  StatsCollector(iptr<Bitext<Token> > xbitext, 
		 sptr<SamplingBias> const xbias) 
    : method(ranked_sampling)
    , sample_size(100)
    , bias(xbias)
    , hcache(NULL)
    , pcache(NULL)
    , pcache_th(10000)
    , tpool(NULL)
  { 
    bitext = xbitext;
  }

  void
  process(typename TSA<Token>::tree_iterator& m, 
	  typename TSA<Token>::tree_iterator& r) 
  {
    if (!lcache) lcache.reset(new lcache_t);
    if (m.down())
      {
        do 
          {
            if (!r.extend(m.getToken(-1)->id())) continue;
            this->process(m, r);
            uint64_t pid = r.getPid();
            sptr<pstats> stats;
            if (hcache) stats = hcache->get(pid); 
            if (!stats && pcache)
              {
                sptr<pstats> const* foo = pcache->get(pid);
                if (foo) stats = *foo; 
              }
            if (!stats) // need to sample
              {
                BitextSampler<Token> s(bitext.get(), r, bias, sample_size, method);
                stats = s.stats();
                if (hcache) hcache->set(pid,stats);
                if (pcache && r.ca() >= pcache_th) pcache->set(pid,stats);
                if (tpool) tpool->add(s);
                else s();
              }
            (*lcache)[pid] = stats;
            r.up();
          }
        while (m.over());
        m.up();
      }
  }
};
} // end of namespace bitext
} // end of namespace Moses

#if 0
#endif
	    // r.up();