Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2015-07-05 15:08:57 +0300
committerUlrich Germann <ugermann@inf.ed.ac.uk>2015-07-05 15:08:57 +0300
commit4dd2ea3117bdc156cde45d9d126e07a245c6381d (patch)
tree82142945e1ba423ff6eb1165485bc9ddcc5d223a
parente1f31666c3c9b37ff299c7d12e3be1c1cd151f07 (diff)
Added random sampling to BitextSampler.ranked-sampling-v0.1.0
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_sampler.h557
-rw-r--r--moses/TranslationModel/UG/mmsapt.cpp34
2 files changed, 356 insertions, 235 deletions
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_sampler.h b/moses/TranslationModel/UG/mm/ug_bitext_sampler.h
index 9333dd879..d3845415d 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_sampler.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_sampler.h
@@ -1,8 +1,14 @@
// -*- mode: c++; tab-width: 2; indent-tabs-mode: nil -*-
#pragma once
+
+#include <algorithm>
+
+#include <boost/random.hpp>
#include <boost/thread.hpp>
#include <boost/thread/locks.hpp>
#include <boost/intrusive_ptr.hpp>
+#include <boost/math/distributions/binomial.hpp>
+
#include "ug_bitext.h"
#include "ug_bitext_pstats.h"
#include "ug_sampling_bias.h"
@@ -16,246 +22,357 @@ namespace Moses
namespace bitext
{
- enum sampling_method { full_coverage, random_sampling, ranked_sampling };
+enum sampling_method { full_coverage, random_sampling, ranked_sampling };
- typedef ugdiss::ttrack::Position TokenPosition;
- class CandidateSorter
- {
- SamplingBias const& score;
- public:
- CandidateSorter(SamplingBias const& s) : score(s) {}
- bool operator()(TokenPosition const& a, TokenPosition const& b) const
- { return score[a.sid] > score[b.sid]; }
- };
+typedef ugdiss::ttrack::Position TokenPosition;
+class CandidateSorter
+{
+ SamplingBias const& score;
+public:
+ CandidateSorter(SamplingBias const& s) : score(s) {}
+ bool operator()(TokenPosition const& a, TokenPosition const& b) const
+ { return score[a.sid] > score[b.sid]; }
+};
- template<typename Token>
- class
- BitextSampler : public reference_counter
- {
- typedef Bitext<Token> bitext;
- typedef TSA<Token> tsa;
- typedef SamplingBias bias;
- typedef typename Bitext<Token>::iter tsa_iter;
- mutable boost::condition_variable m_ready;
- mutable boost::mutex m_lock;
- // const members
- // sptr<bitext const> const m_bitext; // keep bitext alive while I am
- // should be an
- iptr<bitext const> const m_bitext; // keep bitext alive as long as I am
- size_t const m_plen; // length of lookup phrase
- bool const m_fwd; // forward or backward direction?
- sptr<tsa const> const m_root; // root of suffix array
- char const* m_next; // current position
- char const* m_stop; // end of search range
- sampling_method const m_method; /* look at all / random sample /
- * ranked samples */
- sptr<bias const> const m_bias; // bias over candidates
- size_t const m_samples; // how many samples at most
- // non-const members
- sptr<pstats> m_stats; // destination for phrase stats
- size_t m_ctr; // number of samples considered
- float m_total_bias; // for random sampling with bias
- bool m_finished;
- bool consider_sample(TokenPosition const& p);
- size_t perform_ranked_sampling();
+template<typename Token>
+class
+BitextSampler : public reference_counter
+{
+ typedef Bitext<Token> bitext;
+ typedef TSA<Token> tsa;
+ typedef SamplingBias bias;
+ typedef typename Bitext<Token>::iter tsa_iter;
+ mutable boost::condition_variable m_ready;
+ mutable boost::mutex m_lock;
+ // const members
+ // sptr<bitext const> const m_bitext; // keep bitext alive while I am
+ // should be an
+ iptr<bitext const> const m_bitext; // keep bitext alive as long as I am
+ size_t const m_plen; // length of lookup phrase
+ bool const m_fwd; // forward or backward direction?
+ sptr<tsa const> const m_root; // root of suffix array
+ char const* m_next; // current position
+ char const* m_stop; // end of search range
+ sampling_method const m_method; // look at all/random/ranked samples
+ sptr<bias const> const m_bias; // bias over candidates
+ size_t const m_samples; // how many samples at most
+ // non-const members
+ sptr<pstats> m_stats; // destination for phrase stats
+ size_t m_ctr; // number of samples considered
+ float m_total_bias; // for random sampling with bias
+ bool m_finished;
+
+ boost::taus88 m_rnd; // every job has its own pseudo random generator
+ double m_rnddenom; // denominator for scaling random sampling
+ double m_bias_total;
+
+ bool consider_sample(TokenPosition const& p);
+ size_t perform_ranked_sampling();
+ size_t perform_random_sampling();
+
+ int check_sample_distribution(uint64_t const& sid, uint64_t const& offset);
+ bool flip_coin(ugdiss::id_type & sid, ushort & offset);
- public:
- BitextSampler(BitextSampler const& other);
- BitextSampler const& operator=(BitextSampler const& other);
- BitextSampler(bitext const* const bitext, typename bitext::iter const& phrase,
- sptr<SamplingBias const> const& bias, size_t const max_samples,
- sampling_method const method);
- ~BitextSampler();
- bool operator()(); // run sampling
- sptr<pstats> stats();
- bool done() const;
- };
-
- template<typename Token>
- BitextSampler<Token>::
- BitextSampler(Bitext<Token> const* const bitext,
- typename bitext::iter const& phrase,
- sptr<SamplingBias const> const& bias, size_t const max_samples,
- sampling_method const method)
- : m_bitext(bitext)
- , m_plen(phrase.size())
- , m_fwd(phrase.root == bitext->I1.get())
- , m_root(m_fwd ? bitext->I1 : bitext->I2)
- , m_next(phrase.lower_bound(-1))
- , m_stop(phrase.upper_bound(-1))
- , m_method(method)
- , m_bias(bias)
- , m_samples(max_samples)
- , m_ctr(0)
- , m_total_bias(0)
- , m_finished(false)
- {
- m_stats.reset(new pstats);
- m_stats->raw_cnt = phrase.ca();
- m_stats->register_worker();
- // cerr << phrase.str(bitext->V1.get()) << " [" << HERE << "]" << endl;
- }
-
- template<typename Token>
- BitextSampler<Token>::
- BitextSampler(BitextSampler const& other)
- : m_bitext(other.m_bitext)
- , m_plen(other.m_plen)
- , m_fwd(other.m_fwd)
- , m_root(other.m_root)
- , m_next(other.m_next)
- , m_stop(other.m_stop)
- , m_method(other.m_method)
- , m_bias(other.m_bias)
- , m_samples(other.m_samples)
- {
- // lock both instances
- boost::unique_lock<boost::mutex> mylock(m_lock);
- boost::unique_lock<boost::mutex> yrlock(other.m_lock);
- // actually, BitextSamplers should only copied on job submission
- m_stats = other.m_stats;
- m_stats->register_worker();
- m_ctr = other.m_ctr;
- m_total_bias = other.m_total_bias;
- m_finished = other.m_finished;
- }
+public:
+ BitextSampler(BitextSampler const& other);
+ BitextSampler const& operator=(BitextSampler const& other);
+ BitextSampler(bitext const* const bitext, typename bitext::iter const& phrase,
+ sptr<SamplingBias const> const& bias, size_t const max_samples,
+ sampling_method const method);
+ ~BitextSampler();
+ bool operator()(); // run sampling
+ sptr<pstats> stats();
+ bool done() const;
+};
- // Ranked sampling sorts all samples by score and then considers the top-ranked
- // candidates for phrase extraction.
- template<typename Token>
- size_t
- BitextSampler<Token>::
- perform_ranked_sampling()
- {
- if (m_next == m_stop) return m_ctr;
- CandidateSorter sorter(*m_bias);
- // below: nbest size = 20 * m_samples to allow for failed phrase extraction
- NBestList<TokenPosition, CandidateSorter> nbest(20*m_samples,sorter);
- ugdiss::tsa::ArrayEntry I(m_next);
- while (I.next < m_stop)
- {
- ++m_ctr;
- nbest.add(m_root->readEntry(I.next,I));
- }
- size_t n = 0;
- for (size_t i = 0; n < m_samples && i < nbest.size(); ++i)
- if (consider_sample(nbest[i])) ++n;;
- // cerr << m_ctr << " samples considered at "
- // << __FILE__ << ":" << __LINE__ << endl;
- return m_ctr;
- }
-
- template<typename Token>
- bool
- BitextSampler<Token>::
- consider_sample(TokenPosition const& p)
- {
- std::vector<unsigned char> aln;
- bitvector full_aln(100*100);
- PhraseExtractionRecord rec(p.sid, p.offset, p.offset + m_plen,
- !m_fwd, &aln, &full_aln);
- int docid = m_bias ? m_bias->GetClass(p.sid) : -1;
+template<typename Token>
+int
+BitextSampler<Token>::
+check_sample_distribution(uint64_t const& sid, uint64_t const& offset)
+{ // ensure that the sampled distribution approximately matches the bias
+ // @return 0: SKIP this occurrence
+ // @return 1: consider this occurrence for sampling
+ // @return 2: include this occurrence in the sample by all means
+
+ typedef boost::math::binomial_distribution<> binomial;
+
+ // std::ostream* log = m_bias->loglevel > 1 ? m_bias->log : NULL;
+ std::ostream* log = NULL;
+
+ if (!m_bias) return 1;
- bool good = m_bitext->find_trg_phr_bounds(rec);
+ float p = (*m_bias)[sid];
+ id_type docid = m_bias->GetClass(sid);
+
+ std::map<uint32_t,uint32_t>::const_iterator m = m_stats->indoc.find(docid);
+ uint32_t k = m != m_stats->indoc.end() ? m->second : 0 ;
+
+ // always consider candidates from dominating documents and
+ // from documents that have not been considered at all yet
+ bool ret = (p > .5 || k == 0);
+
+ if (ret && !log) return 1;
+
+ uint32_t N = m_stats->good; // number of trials
+ float d = cdf(complement(binomial(N, p), k));
+ // d: probability that samples contains k or more instances from doc #docid
+ ret = ret || d >= .05;
#if 0
- cerr << p.sid << " " << docid << " "
- << (good ? "OK " : "bad ")
- << __FILE__ << ":" << __LINE__ << endl;
+ if (log)
+ {
+ Token const* t = m_root->getCorpus()->sntStart(sid)+offset;
+ Token const* x = t - min(offset,uint64_t(3));
+ Token const* e = t + 4;
+ if (e > m_root->getCorpus()->sntEnd(sid))
+ e = m_root->getCorpus()->sntEnd(sid);
+ *log << docid << ":" << sid << " " << size_t(k) << "/" << N
+ << " @" << p << " => " << d << " [";
+ std::map<uint32_t, uint32_t>::const_iterator m;
+ for (m = m_stats->indoc.begin(); m != m_stats->indoc.end(); ++m)
+ {
+ if (m != m_stats->indoc.begin()) *log << " ";
+ *log << m->first << ":" << m->second;
+ }
+ *log << "] ";
+ for (; x < e; ++x) *log << (*m_bitext->V1)[x->id()] << " ";
+ if (!ret) *log << "SKIP";
+ else if (p < .5 && d > .9) *log << "FORCE";
+ *log << std::endl;
+ }
#endif
- if (!good)
- { // no good, probably because phrase is not coherent
- m_stats->count_sample(docid, 0, rec.po_fwd, rec.po_bwd);
- return false;
- }
+ return (ret ? (p < .5 && d > .9) ? 2 : 1 : 0);
+}
+
+template<typename Token>
+bool
+BitextSampler<Token>::
+flip_coin(ugdiss::id_type & sid, ushort & offset)
+{
+ int no_maybe_yes = m_bias ? check_sample_distribution(sid, offset) : 1;
+ if (no_maybe_yes == 0) return false; // no
+ if (no_maybe_yes > 1) return true; // yes
+ // ... maybe: flip a coin
+ size_t options_chosen = m_stats->good;
+ size_t options_total = max(m_stats->raw_cnt, m_ctr);
+ size_t options_left = (options_total - m_ctr);
+ size_t random_number = options_left * (m_rnd()/(m_rnd.max()+1.));
+ size_t threshold;
+ if (m_bias_total) // we have a bias and there are candidates with non-zero prob
+ threshold = ((*m_bias)[sid]/m_bias_total * options_total * m_samples);
+ else // no bias, or all have prob 0 (can happen with a very opinionated bias)
+ threshold = m_samples;
+ return random_number + options_chosen < threshold;
+}
+
+
+
+
+template<typename Token>
+BitextSampler<Token>::
+BitextSampler(Bitext<Token> const* const bitext,
+ typename bitext::iter const& phrase,
+ sptr<SamplingBias const> const& bias, size_t const max_samples,
+ sampling_method const method)
+ : m_bitext(bitext)
+ , m_plen(phrase.size())
+ , m_fwd(phrase.root == bitext->I1.get())
+ , m_root(m_fwd ? bitext->I1 : bitext->I2)
+ , m_next(phrase.lower_bound(-1))
+ , m_stop(phrase.upper_bound(-1))
+ , m_method(method)
+ , m_bias(bias)
+ , m_samples(max_samples)
+ , m_ctr(0)
+ , m_total_bias(0)
+ , m_finished(false)
+{
+ m_stats.reset(new pstats);
+ m_stats->raw_cnt = phrase.ca();
+ m_stats->register_worker();
+ // cerr << phrase.str(bitext->V1.get()) << " [" << HERE << "]" << endl;
+}
+
+template<typename Token>
+BitextSampler<Token>::
+BitextSampler(BitextSampler const& other)
+ : m_bitext(other.m_bitext)
+ , m_plen(other.m_plen)
+ , m_fwd(other.m_fwd)
+ , m_root(other.m_root)
+ , m_next(other.m_next)
+ , m_stop(other.m_stop)
+ , m_method(other.m_method)
+ , m_bias(other.m_bias)
+ , m_samples(other.m_samples)
+{
+ // lock both instances
+ boost::unique_lock<boost::mutex> mylock(m_lock);
+ boost::unique_lock<boost::mutex> yrlock(other.m_lock);
+ // actually, BitextSamplers should only copied on job submission
+ m_stats = other.m_stats;
+ m_stats->register_worker();
+ m_ctr = other.m_ctr;
+ m_total_bias = other.m_total_bias;
+ m_finished = other.m_finished;
+}
+
+// Ranked sampling sorts all samples by score and then considers the top-ranked
+// candidates for phrase extraction.
+template<typename Token>
+size_t
+BitextSampler<Token>::
+perform_ranked_sampling()
+{
+ if (m_next == m_stop) return m_ctr;
+ CandidateSorter sorter(*m_bias);
+ // below: nbest size = 4 * m_samples to allow for failed phrase extraction
+ NBestList<TokenPosition, CandidateSorter> nbest(4*m_samples,sorter);
+ ugdiss::tsa::ArrayEntry I(m_next);
+ while (I.next < m_stop)
+ {
+ ++m_ctr;
+ nbest.add(m_root->readEntry(I.next,I));
+ }
+ for (size_t i = 0; m_stats->good < m_samples && i < nbest.size(); ++i)
+ consider_sample(nbest[i]);
+ return m_ctr;
+}
+
+// Ranked sampling sorts all samples by score and then considers the top-ranked
+// candidates for phrase extraction.
+template<typename Token>
+size_t
+BitextSampler<Token>::
+perform_random_sampling()
+{
+ if (m_next == m_stop) return m_ctr;
+ m_bias_total = 0;
+ if (m_bias)
+ {
+ m_stats->raw_cnt = 0;
+ for (ugdiss::tsa::ArrayEntry I(m_next); I.next < m_stop;)
+ {
+ m_root->readEntry(I.next,I);
+ ++m_stats->raw_cnt;
+ m_bias_total += (*m_bias)[I.sid];
+ }
+ }
+
+ ugdiss::tsa::ArrayEntry I(m_next);
+ while (m_stats->good < m_samples && I.next < m_stop)
+ {
+ ++m_ctr;
+ m_root->readEntry(I.next,I);
+ if (!flip_coin(I.sid, I.offset)) continue;
+ consider_sample(I);
+ }
+ return m_ctr;
+}
+
+template<typename Token>
+bool
+BitextSampler<Token>::
+consider_sample(TokenPosition const& p)
+{
+ std::vector<unsigned char> aln;
+ bitvector full_aln(100*100);
+ PhraseExtractionRecord
+ rec(p.sid, p.offset, p.offset + m_plen, !m_fwd, &aln, &full_aln);
+ int docid = m_bias ? m_bias->GetClass(p.sid) : -1;
+ if (!m_bitext->find_trg_phr_bounds(rec))
+ { // no good, probably because phrase is not coherent
+ m_stats->count_sample(docid, 0, rec.po_fwd, rec.po_bwd);
+ return false;
+ }
- // all good: register this sample as valid
- size_t num_pairs = (rec.s2 - rec.s1 + 1) * (rec.e2 - rec.e1 + 1);
- m_stats->count_sample(docid, num_pairs, rec.po_fwd, rec.po_bwd);
+ // all good: register this sample as valid
+ size_t num_pairs = (rec.s2 - rec.s1 + 1) * (rec.e2 - rec.e1 + 1);
+ m_stats->count_sample(docid, num_pairs, rec.po_fwd, rec.po_bwd);
- float sample_weight = 1./num_pairs;
- Token const* o = (m_fwd ? m_bitext->T2 : m_bitext->T1)->sntStart(rec.sid);
+ float sample_weight = 1./num_pairs;
+ Token const* o = (m_fwd ? m_bitext->T2 : m_bitext->T1)->sntStart(rec.sid);
- // adjust offsets in phrase-internal aligment
- for (size_t k = 1; k < aln.size(); k += 2) aln[k] += rec.s2 - rec.s1;
+ // adjust offsets in phrase-internal aligment
+ for (size_t k = 1; k < aln.size(); k += 2)
+ aln[k] += rec.s2 - rec.s1;
- vector<uint64_t> seen; seen.reserve(10);
- // It is possible that the phrase extraction extracts the same
- // phrase twice, e.g., when word a co-occurs with sequence b b b
- // but is aligned only to the middle word. We can only count
- // each phrase pair once per source phrase occurrence, or else
- // run the risk of having more joint counts than marginal
- // counts.
+ vector<uint64_t> seen; seen.reserve(10);
+ // It is possible that the phrase extraction extracts the same
+ // phrase twice, e.g., when word a co-occurs with sequence b b b but
+ // is aligned only to the middle word. We can only count each phrase
+ // pair once per source phrase occurrence, or else run the risk of
+ // having more joint counts than marginal counts.
- for (size_t s = rec.s1; s <= rec.s2; ++s)
- {
- TSA<Token> const& I = m_fwd ? *m_bitext->I2 : *m_bitext->I1;
- sptr<tsa_iter> b = I.find(o + s, rec.e1 - s);
- UTIL_THROW_IF2(!b || b->size() < rec.e1 - s, "target phrase not found");
+ for (size_t s = rec.s1; s <= rec.s2; ++s)
+ {
+ TSA<Token> const& I = m_fwd ? *m_bitext->I2 : *m_bitext->I1;
+ sptr<tsa_iter> b = I.find(o + s, rec.e1 - s);
+ UTIL_THROW_IF2(!b || b->size() < rec.e1 - s, "target phrase not found");
- for (size_t i = rec.e1; i <= rec.e2; ++i)
- {
- uint64_t tpid = b->getPid();
-
- // poor man's protection against over-counting
- size_t s = 0;
- while (s < seen.size() && seen[s] != tpid) ++s;
- if (s < seen.size()) continue;
- seen.push_back(tpid);
-
- size_t raw2 = b->approxOccurrenceCount();
- m_stats->add(tpid, sample_weight, m_bias ? (*m_bias)[p.sid] : 1,
- aln, raw2, rec.po_fwd, rec.po_bwd, docid);
- bool ok = (i == rec.e2) || b->extend(o[i].id());
- UTIL_THROW_IF2(!ok, "Could not extend target phrase.");
- }
- if (s < rec.s2) // shift phrase-internal alignments
- for (size_t k = 1; k < aln.size(); k += 2)
- --aln[k];
- }
- return true;
- }
+ for (size_t i = rec.e1; i <= rec.e2; ++i)
+ {
+ uint64_t tpid = b->getPid();
+ if (find(seen.begin(), seen.end(), tpid) != seen.end())
+ continue; // don't over-count
+ seen.push_back(tpid);
+ size_t raw2 = b->approxOccurrenceCount();
+ m_stats->add(tpid, sample_weight, m_bias ? (*m_bias)[p.sid] : 1,
+ aln, raw2, rec.po_fwd, rec.po_bwd, docid);
+ bool ok = (i == rec.e2) || b->extend(o[i].id());
+ UTIL_THROW_IF2(!ok, "Could not extend target phrase.");
+ }
+ if (s < rec.s2) // shift phrase-internal alignments
+ for (size_t k = 1; k < aln.size(); k += 2)
+ --aln[k];
+ }
+ return true;
+}
- template<typename Token>
- bool
- BitextSampler<Token>::
- operator()()
- {
- if (m_finished) return true;
- boost::unique_lock<boost::mutex> lock(m_lock);
+template<typename Token>
+bool
+BitextSampler<Token>::
+operator()()
+{
+ if (m_finished) return true;
+ boost::unique_lock<boost::mutex> lock(m_lock);
+ if (m_method == ranked_sampling)
perform_ranked_sampling();
- m_finished = true;
- m_ready.notify_all();
- return true;
- }
+ else if (m_method == random_sampling)
+ perform_random_sampling();
+ else UTIL_THROW2("Unsupported sampling method.");
+ m_finished = true;
+ m_ready.notify_all();
+ return true;
+}
- template<typename Token>
- bool
- BitextSampler<Token>::
- done() const
- {
- return m_next == m_stop;
- }
+template<typename Token>
+bool
+BitextSampler<Token>::
+done() const
+{
+ return m_next == m_stop;
+}
- template<typename Token>
- sptr<pstats>
- BitextSampler<Token>::
- stats()
- {
- // if (m_ctr == 0) (*this)();
- // boost::unique_lock<boost::mutex> lock(m_lock);
- // while (!m_finished)
- // m_ready.wait(lock);
- return m_stats;
- }
+template<typename Token>
+sptr<pstats>
+BitextSampler<Token>::
+stats()
+{
+ // if (m_ctr == 0) (*this)();
+ // boost::unique_lock<boost::mutex> lock(m_lock);
+ // while (!m_finished)
+ // m_ready.wait(lock);
+ return m_stats;
+}
- template<typename Token>
- BitextSampler<Token>::
- ~BitextSampler()
- {
- m_stats->release();
- }
+template<typename Token>
+BitextSampler<Token>::
+~BitextSampler()
+{
+ m_stats->release();
+}
} // end of namespace bitext
} // end of namespace Moses
diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index 3fa76f4d8..651621f96 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -833,17 +833,19 @@ namespace Moses
}
if (!context->cache1) context->cache1.reset(new pstats::cache_t);
if (!context->cache2) context->cache2.reset(new pstats::cache_t);
- } else if (!ttask->GetContextWeights().empty()) {
- if (m_bias_log)
- {
- *m_bias_log << HERE << endl
- << "BIAS FROM MAP LOOKUP" << endl;
- context->bias_log = m_bias_log;
- }
- context->bias
- = btfix->SetupDocumentBias(ttask->GetContextWeights(), m_bias_log);
- context->bias->loglevel = m_bias_loglevel;
- context->bias->log = m_bias_log;
+ }
+ else if (!ttask->GetContextWeights().empty())
+ {
+ if (m_bias_log)
+ {
+ *m_bias_log << HERE << endl
+ << "BIAS FROM MAP LOOKUP" << endl;
+ context->bias_log = m_bias_log;
+ }
+ context->bias
+ = btfix->SetupDocumentBias(ttask->GetContextWeights(), m_bias_log);
+ context->bias->loglevel = m_bias_loglevel;
+ context->bias->log = m_bias_log;
if (!context->cache1) context->cache1.reset(new pstats::cache_t);
if (!context->cache2) context->cache2.reset(new pstats::cache_t);
}
@@ -897,13 +899,15 @@ namespace Moses
Mmsapt::
InitializeForInput(ttasksptr const& ttask)
{
- set_bias_for_ranking(ttask, this->btfix);
- // to do: depending on method, set bias for ranking, via consulting the bias
- // server, or none at al.
-
sptr<ContextScope> const& scope = ttask->GetScope();
sptr<ContextForQuery> context = scope->get<ContextForQuery>(btfix.get(), true);
+ // set sampling bias, depending on sampling method specified
+ if (m_sampling_method == ranked_sampling)
+ set_bias_for_ranking(ttask, this->btfix);
+ else if (m_sampling_method == random_sampling)
+ set_bias_via_server(ttask);
+
boost::unique_lock<boost::shared_mutex> mylock(m_lock);
sptr<TPCollCache> localcache = scope->get<TPCollCache>(cache_key);
if (!localcache)