moses/TranslationModel/UG/mm/ug_bitext_moses.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

// -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; cc-style: moses-cc-style -*-
#pragma once
#ifndef NO_MOSES
namespace sapt {

template<typename Token>
SPTR<pstats>
Bitext<Token>::
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample) const
{
  SPTR<pstats> ret = prep2(ttask, phrase, max_sample);
  UTIL_THROW_IF2(!ret, "Got NULL pointer where I expected a valid pointer.");
  
  // Why were we locking here?
  if (m_num_workers <= 1)
    {
      boost::unique_lock<boost::shared_mutex> guard(m_lock);
      typename agenda::worker(*this->ag)();
    }
  else
    {
      boost::unique_lock<boost::mutex> lock(ret->lock);
      while (ret->in_progress)
	ret->ready.wait(lock);
    }
  return ret;
}


template<typename Token>
void
Bitext<Token>::
prep(ttasksptr const& ttask, iter const& phrase) const
{
  prep2(ttask, phrase, m_default_sample_size);
}


// prep2 schedules a phrase for sampling, and returns immediately
// the member function lookup retrieves the respective pstats instance
// and waits until the sampling is finished before it returns.
// This allows sampling in the background
template<typename Token>
SPTR<pstats>
Bitext<Token>
::prep2
( ttasksptr const& ttask, iter const& phrase, int max_sample) const
{
  if (max_sample < 0) max_sample = m_default_sample_size;
  SPTR<SamplingBias> bias;
  SPTR<Moses::ContextScope> scope = ttask->GetScope();
  SPTR<ContextForQuery> context = scope->get<ContextForQuery>(this);
  if (context) bias = context->bias;
  SPTR<pstats::cache_t> cache;
  // - no caching for rare phrases and special requests (max_sample)
  //   (still need to test what a good caching threshold is ...)
  // - use the task-specific cache when there is a sampling bias
  if (max_sample == int(m_default_sample_size)
      && phrase.approxOccurrenceCount() > m_pstats_cache_threshold)
    {
      cache = (phrase.root == I1.get()
	       ? (bias ? context->cache1 : m_cache1)
	       : (bias ? context->cache2 : m_cache2));
    }
  SPTR<pstats> ret;
  SPTR<pstats> const* cached;
  
  if (cache && (cached = cache->get(phrase.getPid(), ret)) && *cached)
    return *cached;
  boost::unique_lock<boost::shared_mutex> guard(m_lock);
  if (!ag)
    {
      ag.reset(new agenda(*this));
      if (m_num_workers > 1)
	ag->add_workers(m_num_workers);
    }
  ret = ag->add_job(this, phrase, max_sample, bias);
  if (cache) cache->set(phrase.getPid(),ret);
  UTIL_THROW_IF2(ret == NULL, "Couldn't schedule sampling job.");
  return ret;
}


}
#endif