Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorUlrich Germann <Ulrich.Germann@gmail.com>2015-04-03 18:16:52 +0300
committerUlrich Germann <Ulrich.Germann@gmail.com>2015-04-03 18:16:52 +0300
commit93ce2423dfa741225837bb1a180cfcdc672c4d65 (patch)
tree55f1735e58abf1fe26bc6e35253fd40ec85f1894 /moses
parenta0c9d89a42d9260266f4ef9a09552d3bfc93f7f0 (diff)
1. A context string for biased sampling in Mmsapt can now be provided on the
command line with --context-string. Not available in server mode yet. 2. Numerous bug fixes related to biased sampling. 3. Biased sampling now checks that the sampling sticks to the bias. If the distribution of samples deviates too much from the bias, samples whose selection would push the sample distribution even further from the bias are not considered, even if that means that fewer samples are chosen in total.
Diffstat (limited to 'moses')
-rw-r--r--moses/ExportInterface.cpp4
-rw-r--r--moses/Parameter.cpp1
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.cc26
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.h155
-rw-r--r--moses/TranslationModel/UG/mm/ug_sampling_bias.cc3
-rw-r--r--moses/TranslationModel/UG/mmsapt.cpp62
-rw-r--r--moses/TranslationModel/UG/mmsapt.h10
-rw-r--r--moses/TranslationTask.cpp16
-rw-r--r--moses/TranslationTask.h6
9 files changed, 228 insertions, 55 deletions
diff --git a/moses/ExportInterface.cpp b/moses/ExportInterface.cpp
index 00850329e..b6a5ec255 100644
--- a/moses/ExportInterface.cpp
+++ b/moses/ExportInterface.cpp
@@ -207,6 +207,9 @@ batch_run()
ThreadPool pool(staticData.ThreadCount());
#endif
+ std::string context_string;
+ params.SetParameter(context_string,"context-string",string(""));
+
// main loop over set of input sentences
boost::shared_ptr<InputType> source;
@@ -219,6 +222,7 @@ batch_run()
// set up task of translating one sentence
boost::shared_ptr<TranslationTask>
task = TranslationTask::create(source, ioWrapper);
+ task->SetContextString(context_string);
// Allow for (sentence-)context-specific processing prior to
// decoding. This can be used, for example, for context-sensitive
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index e43c69d22..98ed1f439 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -239,6 +239,7 @@ Parameter::Parameter()
AddParam(misc_opts,"feature-name-overwrite", "Override feature name (NOT arguments). Eg. SRILM-->KENLM, PhraseDictionaryMemory-->PhraseDictionaryScope3");
AddParam(misc_opts,"feature", "All the feature functions should be here");
+ AddParam(misc_opts,"context-string", "A (tokenized) string containing context words for context-sensitive translation.");
// Compact phrase table and reordering table.
po::options_description cpt_opts("Options when using compact phrase and reordering tables.");
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.cc b/moses/TranslationModel/UG/mm/ug_bitext.cc
index c03909130..29104aaec 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext.cc
@@ -57,20 +57,27 @@ namespace Moses
bool
pstats::
- add(::uint64_t pid, float const w,
+ add(uint64_t pid, float const w,
vector<uchar> const& a,
uint32_t const cnt2,
uint32_t fwd_o,
- uint32_t bwd_o)
+ uint32_t bwd_o, int const docid)
{
boost::lock_guard<boost::mutex> guard(this->lock);
jstats& entry = this->trg[pid];
- entry.add(w,a,cnt2,fwd_o,bwd_o);
+ entry.add(w,a,cnt2,fwd_o,bwd_o,docid);
if (this->good < entry.rcnt())
{
UTIL_THROW(util::Exception, "more joint counts than good counts:"
<< entry.rcnt() << "/" << this->good << "!");
}
+
+ if (docid >= 0)
+ {
+ while (int(indoc.size()) <= docid) indoc.push_back(0);
+ ++indoc[docid];
+ }
+
return true;
}
@@ -80,7 +87,7 @@ namespace Moses
{
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0;
- my_aln.reserve(1);
+ my_aln.reserve(1);
}
jstats::
@@ -89,6 +96,7 @@ namespace Moses
my_rcnt = other.rcnt();
my_wcnt = other.wcnt();
my_aln = other.aln();
+ indoc = other.indoc;
for (int i = 0; i <= Moses::LRModel::NONE; i++)
{
ofwd[i] = other.ofwd[i];
@@ -115,7 +123,7 @@ namespace Moses
void
jstats::
add(float w, vector<uchar> const& a, uint32_t const cnt2,
- uint32_t fwd_orient, uint32_t bwd_orient)
+ uint32_t fwd_orient, uint32_t bwd_orient, int const docid)
{
boost::lock_guard<boost::mutex> lk(this->lock);
my_rcnt += 1;
@@ -135,6 +143,14 @@ namespace Moses
}
++ofwd[fwd_orient];
++obwd[bwd_orient];
+ if (docid >= 0)
+ {
+ while (int(indoc.size()) <= docid) indoc.push_back(0);
+ ++indoc[docid];
+
+ // cout << docid << " => " << indoc[docid] << " " << HERE << endl;
+
+ }
}
uint32_t
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.h b/moses/TranslationModel/UG/mm/ug_bitext.h
index 6387ba547..7354bf738 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext.h
@@ -31,6 +31,7 @@
#include <boost/thread.hpp>
#include <boost/random.hpp>
#include <boost/format.hpp>
+#include <boost/math/distributions/binomial.hpp>
#include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h"
#include "moses/TranslationModel/UG/generic/sampling/Sampling.h"
@@ -101,6 +102,7 @@ namespace Moses {
vector<pair<size_t, vector<uchar> > > my_aln;
uint32_t ofwd[Moses::LRModel::NONE+1], obwd[Moses::LRModel::NONE+1];
public:
+ vector<uint32_t> indoc;
jstats();
jstats(jstats const& other);
uint32_t rcnt() const;
@@ -109,7 +111,7 @@ namespace Moses {
vector<pair<size_t, vector<uchar> > > const & aln() const;
void add(float w, vector<uchar> const& a, uint32_t const cnt2,
- uint32_t fwd_orient, uint32_t bwd_orient);
+ uint32_t fwd_orient, uint32_t bwd_orient, int const docid);
void invalidate();
void validate();
bool valid();
@@ -138,6 +140,9 @@ namespace Moses {
// size_t Moses::LRModel::ReorderingType
uint32_t ofwd[Moses::LRModel::NONE+1], obwd[Moses::LRModel::NONE+1];
+
+ vector<uint32_t> indoc;
+
// typedef typename boost::unordered_map<typename uint64_t, jstats> trg_map_t;
typedef std::map<uint64_t, jstats> trg_map_t;
@@ -153,7 +158,7 @@ namespace Moses {
float const w,
vector<uchar> const& a,
uint32_t const cnt2,
- uint32_t fwd_o, uint32_t bwd_o);
+ uint32_t fwd_o, uint32_t bwd_o, int const docid);
};
struct
@@ -165,6 +170,8 @@ namespace Moses {
boost::shared_mutex lock;
sptr<SamplingBias> bias;
sptr<pstats::cache_t> cache1, cache2;
+ ostream* bias_log;
+ ContextForQuery() : bias_log(NULL) { }
};
template<typename Token>
@@ -200,6 +207,7 @@ namespace Moses {
vector<uchar> aln;
float score;
bool inverse;
+ vector<uint32_t> indoc;
PhrasePair() { };
PhrasePair(PhrasePair const& o);
@@ -241,6 +249,13 @@ namespace Moses {
int cmp(PhrasePair const& a, PhrasePair const& b) const;
bool operator()(PhrasePair const& a, PhrasePair const& b) const;
};
+
+ class SortDescendingByJointCount
+ {
+ public:
+ int cmp(PhrasePair const& a, PhrasePair const& b) const;
+ bool operator()(PhrasePair const& a, PhrasePair const& b) const;
+ };
};
template<typename Token>
@@ -296,6 +311,7 @@ namespace Moses {
dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd;
}
+ indoc = js.indoc;
return *this;
}
@@ -358,6 +374,7 @@ namespace Moses {
, aln(o.aln)
, score(o.score)
, inverse(o.inverse)
+ , indoc(o.indoc)
{
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
{
@@ -397,6 +414,26 @@ namespace Moses {
}
template<typename Token>
+ int
+ PhrasePair<Token>::
+ SortDescendingByJointCount::
+ cmp(PhrasePair const& a, PhrasePair const& b) const
+ {
+ size_t i = 0;
+ if (a.joint == b.joint) return 0;
+ return a.joint > b.joint ? -1 : 1;
+ }
+
+ template<typename Token>
+ bool
+ PhrasePair<Token>::
+ SortDescendingByJointCount::
+ operator()(PhrasePair const& a, PhrasePair const& b) const
+ {
+ return this->cmp(a,b) < 0;
+ }
+
+ template<typename Token>
void
PhrasePair<Token>::
init()
@@ -414,7 +451,6 @@ namespace Moses {
protected:
mutable boost::shared_mutex m_lock;
-
public:
typedef TKN Token;
typedef typename TSA<Token>::tree_iterator iter;
@@ -621,6 +657,7 @@ namespace Moses {
#if UG_BITEXT_TRACK_ACTIVE_THREADS
static ThreadSafeCounter active;
#endif
+ Bitext<Token> const* const m_bitext;
boost::mutex lock;
friend class agenda;
boost::taus88 rnd; // every job has its own pseudo random generator
@@ -642,7 +679,8 @@ namespace Moses {
float bias_total;
bool step(uint64_t & sid, uint64_t & offset); // select another occurrence
bool done() const;
- job(typename TSA<Token>::tree_iterator const& m,
+ job(Bitext<Token> const* const theBitext,
+ typename TSA<Token>::tree_iterator const& m,
sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd,
sptr<SamplingBias const> const& bias);
~job();
@@ -668,7 +706,8 @@ namespace Moses {
void add_workers(int n);
sptr<pstats>
- add_job(typename TSA<Token>::tree_iterator const& phrase,
+ add_job(Bitext<Token> const* const theBitext,
+ typename TSA<Token>::tree_iterator const& phrase,
size_t const max_samples, sptr<SamplingBias const> const& bias);
sptr<job> get_job();
@@ -700,6 +739,20 @@ namespace Moses {
{
next = root->readSid(next,stop,sid);
next = root->readOffset(next,stop,offset);
+ if (m_bias)
+ {
+ id_type docid = m_bias->GetClass(sid);
+ if (stats->indoc.size() > docid)
+ {
+ uint32_t N = stats->good;
+ float k = min(stats->indoc[docid],N);
+ float p = (*m_bias)[sid];
+
+ typedef boost::math::binomial_distribution<> binomial;
+ using namespace boost::math;
+ if (cdf(complement(binomial(N+1, p), k)) < .05) continue;
+ }
+ }
{ // brackets required for lock scoping;
// see sguard immediately below
boost::lock_guard<boost::mutex> sguard(stats->lock);
@@ -707,7 +760,7 @@ namespace Moses {
size_t scalefac = (stats->raw_cnt - ctr++);
size_t rnum = scalefac * (rnd()/(rnd.max()+1.));
size_t th = (bias_total
- ? ((*m_bias)[sid]/bias_total * m_bias->size()
+ ? ((*m_bias)[sid]/bias_total * stats->raw_cnt
* max_samples)
: max_samples);
#if 0
@@ -717,10 +770,19 @@ namespace Moses {
<< " th=" << th;
if (m_bias)
cerr << " with bias " << (*m_bias)[sid]
- << " => " << (*m_bias)[sid] * m_bias->size();
+ << " => " << th;
else cerr << " without bias";
cerr << endl;
#endif
+#if 0
+ cerr << "bias total: " << bias_total
+ << " bias local: " << (*m_bias)[sid]
+ << " rnum: " << rnum
+ << " good: " << stats->good
+ << " th: " << th
+ << " raw: " << stats->raw_cnt
+ << endl;
+#endif
if (rnum + stats->good < th)
{
stats->sample_cnt++;
@@ -792,6 +854,15 @@ namespace Moses {
bitvector full_alignment(100*100);
while (j->step(sid,offset))
{
+ int docid = j->m_bias ? j->m_bias->GetClass(sid) : -1;
+
+ Token const* t = ag.bt.T2->sntStart(sid);
+ Token const* eos = ag.bt.T2->sntEnd(sid);
+#if 0
+ cerr << "[" << j->stats->good + 1 << "] ";
+ while (t != eos) cerr << (*ag.bt.V2)[(t++)->id()] << " ";
+ cerr << "[" << docid << "]" << endl;
+#endif
aln.clear();
int po_fwd=Moses::LRModel::NONE,po_bwd=Moses::LRModel::NONE;
if (j->fwd)
@@ -855,7 +926,7 @@ namespace Moses {
seen.push_back(tpid);
if (! j->stats->add(tpid,sample_weight,aln,
b->approxOccurrenceCount(),
- po_fwd,po_bwd))
+ po_fwd,po_bwd,docid))
{
cerr << "FATAL ERROR AT " << __FILE__
<< ":" << __LINE__ << endl;
@@ -913,12 +984,14 @@ namespace Moses {
Bitext<Token>::
agenda::
job::
- job(typename TSA<Token>::tree_iterator const& m,
+ job(Bitext<Token> const* const theBitext,
+ typename TSA<Token>::tree_iterator const& m,
sptr<TSA<Token> > const& r, size_t maxsmpl,
bool isfwd, sptr<SamplingBias const> const& bias)
- : rnd(0)
+ : m_bitext(theBitext)
+ , rnd(0)
, rnddenom(rnd.max() + 1.)
- , min_diverse(10)
+ , min_diverse(1)
, workers(0)
, root(r)
, next(m.lower_bound(-1))
@@ -937,12 +1010,20 @@ namespace Moses {
// Profiling question: how much does that cost us?
if (m_bias)
{
+ int ctr = 0;
+ stats->raw_cnt = 0;
for (char const* x = m.lower_bound(-1); x < stop;)
{
uint32_t sid; ushort offset;
- next = root->readSid(next,stop,sid);
- next = root->readOffset(next,stop,offset);
+ x = root->readSid(x,stop,sid);
+ x = root->readOffset(x,stop,offset);
+#if 0
+ cerr << ctr++ << " " << m.str(m_bitext->V1.get())
+ << " " << sid << "/" << root->getCorpusSize()
+ << " " << offset << " " << stop-x << endl;
+#endif
bias_total += (*m_bias)[sid];
+ ++stats->raw_cnt;
}
}
#if UG_BITEXT_TRACK_ACTIVE_THREADS
@@ -956,13 +1037,15 @@ namespace Moses {
sptr<pstats>
Bitext<Token>::
agenda::
- add_job(typename TSA<Token>::tree_iterator const& phrase,
+ add_job(Bitext<Token> const* const theBitext,
+ typename TSA<Token>::tree_iterator const& phrase,
size_t const max_samples, sptr<SamplingBias const> const& bias)
{
boost::unique_lock<boost::mutex> lk(this->lock);
static boost::posix_time::time_duration nodelay(0,0,0,0);
bool fwd = phrase.root == bt.I1.get();
- sptr<job> j(new job(phrase, fwd ? bt.I1 : bt.I2, max_samples, fwd, bias));
+ sptr<job> j(new job(theBitext, phrase, fwd ? bt.I1 : bt.I2,
+ max_samples, fwd, bias));
j->stats->register_worker();
joblist.push_back(j);
@@ -1071,8 +1154,8 @@ namespace Moses {
size_t docid = this->m_docname2docid.size();
this->m_docname2docid[docname] = docid;
line >> b;
- cerr << "DOCUMENT MAP " << docname
- << " " << a << "-" << b+a << endl;
+ VERBOSE(1, "DOCUMENT MAP " << docname
+ << " " << a << "-" << b+a << endl);
for (b += a; a < b; ++a)
(*this->m_sid2docid)[a] = docid;
}
@@ -1507,6 +1590,7 @@ namespace Moses {
cache = (phrase.root == I1.get()
? (bias ? context->cache1 : m_cache1)
: (bias ? context->cache2 : m_cache2));
+ // if (bias) cerr << "Using bias." << endl;
}
sptr<pstats> ret;
sptr<pstats> const* cached;
@@ -1523,7 +1607,7 @@ namespace Moses {
// cerr << "NEW FREQUENT PHRASE: "
// << phrase.str(V1.get()) << " " << phrase.approxOccurrenceCount()
// << " at " << __FILE__ << ":" << __LINE__ << endl;
- ret = ag->add_job(phrase, max_sample, bias);
+ ret = ag->add_job(this, phrase, max_sample, bias);
if (cache) cache->set(phrase.getPid(),ret);
UTIL_THROW_IF2(ret == NULL, "Couldn't schedule sampling job.");
return ret;
@@ -1734,14 +1818,15 @@ namespace Moses {
template<typename Token>
void
expand(typename Bitext<Token>::iter const& m,
- Bitext<Token> const& bt,
- pstats const& ps, vector<PhrasePair<Token> >& dest)
+ Bitext<Token> const& bt, pstats const& ps,
+ vector<PhrasePair<Token> >& dest, ostream* log)
{
bool fwd = m.root == bt.I1.get();
dest.reserve(ps.trg.size());
PhrasePair<Token> pp;
pp.init(m.getPid(), !fwd, m.getToken(0), m.size(), &ps, 0);
- // cout << HERE << " " << toString(*(fwd ? bt.V1 : bt.V2), pp.start1,pp.len1) << endl;
+ // cout << HERE << " "
+ // << toString(*(fwd ? bt.V1 : bt.V2), pp.start1,pp.len1) << endl;
pstats::trg_map_t::const_iterator a;
for (a = ps.trg.begin(); a != ps.trg.end(); ++a)
{
@@ -1751,13 +1836,27 @@ namespace Moses {
len, a->second);
dest.push_back(pp);
}
-#if 0
- typename PhrasePair<Token>::SortByTargetIdSeq sorter;
- sort(dest.begin(), dest.end(),sorter);
- BOOST_FOREACH(PhrasePair<Token> const& p, dest)
- cout << toString (*(fwd ? bt.V1 : bt.V2),p.start1,p.len1) << " ::: "
- << toString (*(fwd ? bt.V2 : bt.V1),p.start2,p.len2) << " "
- << p.joint << endl;
+#if 1
+ if (log)
+ {
+ // typename PhrasePair<Token>::SortByTargetIdSeq sorter;
+ typename PhrasePair<Token>::SortDescendingByJointCount sorter;
+ sort(dest.begin(), dest.end(),sorter);
+ for (size_t i = 0; i < dest.size(); ++i)
+ {
+ PhrasePair<Token> const& p = dest[i];
+ if (i && p.joint <= 1) break;
+ *log << toString (*(fwd ? bt.V1 : bt.V2),p.start1,p.len1) << " ::: "
+ << toString (*(fwd ? bt.V2 : bt.V1),p.start2,p.len2) << " "
+ << p.joint << " [";
+ for (size_t i = 0; i < p.indoc.size(); ++i)
+ {
+ if (i) cout << " ";
+ *log << p.indoc[i];
+ }
+ *log << "]" << endl;
+ }
+ }
#endif
}
diff --git a/moses/TranslationModel/UG/mm/ug_sampling_bias.cc b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
index d0f03852f..ae0c85baf 100644
--- a/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
+++ b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
@@ -118,7 +118,8 @@ namespace Moses
DocumentBias
::operator[](id_type const idx) const
{
- UTIL_THROW_IF2(idx >= m_sid2docid.size(), "Out of bounds");
+ UTIL_THROW_IF2(idx >= m_sid2docid.size(),
+ "Out of bounds: " << idx << "/" << m_sid2docid.size());
return m_bias[m_sid2docid[idx]];
}
diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index 9594076fc..413cb6eec 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -54,7 +54,7 @@ namespace Moses
#if 0
Mmsapt::
Mmsapt(string const& description, string const& line)
- : PhraseDictionary(description,line), ofactor(1,0)
+ : PhraseDictionary(description,line), ofactor(1,0), m_bias_log(NULL)
{
this->init(line);
}
@@ -72,8 +72,9 @@ namespace Moses
: PhraseDictionary(line)
, ofactor(1,0)
// , m_tpc_ctr(0)
- , m_context_key(((char*)this)+1)
- , m_cache_key(((char*)this)+2)
+ , context_key(((char*)this)+1)
+ , cache_key(((char*)this)+2)
+ , m_bias_log(NULL)
{
this->init(line);
}
@@ -193,6 +194,20 @@ namespace Moses
if ((m = param.find("bias-server")) != param.end())
m_bias_server = m->second;
+ if ((m = param.find("bias-logfile")) != param.end())
+ {
+ m_bias_logfile = m->second;
+ if (m_bias_logfile == "/dev/stderr")
+ m_bias_log = &std::cerr;
+ else if (m_bias_logfile == "/dev/stdout")
+ m_bias_log = &std::cout;
+ else
+ {
+ m_bias_logger.reset(new ofstream(m_bias_logfile.c_str()));
+ m_bias_log = m_bias_logger.get();
+ }
+ }
+
if ((m = param.find("extra")) != param.end())
m_extra_data = m->second;
@@ -214,6 +229,7 @@ namespace Moses
known_parameters.push_back("base"); // alias for path
known_parameters.push_back("bias");
known_parameters.push_back("bias-server");
+ known_parameters.push_back("bias-logfile");
known_parameters.push_back("cache");
known_parameters.push_back("coh");
known_parameters.push_back("config");
@@ -258,6 +274,7 @@ namespace Moses
Mmsapt::
load_extra_data(string bname, bool locking = true)
{
+ using namespace boost;
// TO DO: ADD CHECKS FOR ROBUSTNESS
// - file existence?
// - same number of lines?
@@ -274,8 +291,8 @@ namespace Moses
while(getline(in2,line)) text2.push_back(line);
while(getline(ina,line)) symal.push_back(line);
- boost::scoped_ptr<boost::unique_lock<boost::shared_mutex> > guard;
- if (locking) guard.reset(new boost::unique_lock<boost::shared_mutex>(m_lock));
+ scoped_ptr<unique_lock<shared_mutex> > guard;
+ if (locking) guard.reset(new unique_lock<shared_mutex>(m_lock));
btdyn = btdyn->add(text1,text2,symal);
assert(btdyn);
cerr << "Loaded " << btdyn->T1->size() << " sentence pairs" << endl;
@@ -310,7 +327,8 @@ namespace Moses
template<typename fftype>
void
Mmsapt::
- check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry)
+ check_ff(string const ffname, float const xtra,
+ vector<sptr<pscorer> >* registry)
{
string const& spec = param[ffname];
if (spec == "" || spec == "0") return;
@@ -585,7 +603,7 @@ namespace Moses
// get context-specific cache of items previously looked up
sptr<ContextScope> const& scope = ttask->GetScope();
- sptr<TPCollCache> cache = scope->get<TPCollCache>(m_cache_key);
+ sptr<TPCollCache> cache = scope->get<TPCollCache>(cache_key);
TPCollWrapper* ret = cache->get(phrasekey, dyn->revision());
// TO DO: we should revise the revision mechanism: we take the length
// of the dynamic bitext (in sentences) at the time the PT entry
@@ -612,12 +630,12 @@ namespace Moses
PhrasePair<Token>::SortByTargetIdSeq sort_by_tgt_id;
if (sfix)
{
- expand(mfix, btfix, *sfix, ppfix);
+ expand(mfix, btfix, *sfix, ppfix, m_bias_log);
sort(ppfix.begin(), ppfix.end(),sort_by_tgt_id);
}
if (sdyn)
{
- expand(mdyn, *dyn, *sdyn, ppdyn);
+ expand(mdyn, *dyn, *sdyn, ppdyn, m_bias_log);
sort(ppdyn.begin(), ppdyn.end(),sort_by_tgt_id);
}
@@ -699,24 +717,34 @@ namespace Moses
InitializeForInput(ttasksptr const& ttask)
{
sptr<ContextScope> const& scope = ttask->GetScope();
- sptr<ContextForQuery> context = scope->get<ContextForQuery>(m_context_key, true);
+ sptr<ContextForQuery> context
+ = scope->get<ContextForQuery>(&btfix, true);
if (m_bias_server.size() && context->bias == NULL)
{ // we need to create the bias
boost::unique_lock<boost::shared_mutex> lock(context->lock);
- string line = ttask->GetSource()->ToString();
- cerr << HERE << endl;
- cerr << "BIAS LOOKUP CONTEXT: " << line << endl;
- context->bias = btfix.SetupDocumentBias(m_bias_server, line);
+ string const& context_words = ttask->GetContextString();
+ if (context_words.size())
+ {
+ if (m_bias_log)
+ {
+ *m_bias_log << HERE << endl
+ << "BIAS LOOKUP CONTEXT: "
+ << context_words << endl;
+ context->bias_log = m_bias_log;
+ }
+ context->bias
+ = btfix.SetupDocumentBias(m_bias_server, context_words);
+ }
if (!context->cache1) context->cache1.reset(new pstats::cache_t);
if (!context->cache2) context->cache2.reset(new pstats::cache_t);
}
boost::unique_lock<boost::shared_mutex> mylock(m_lock);
- sptr<TPCollCache> localcache = scope->get<TPCollCache>(m_cache_key);
+ sptr<TPCollCache> localcache = scope->get<TPCollCache>(cache_key);
if (!localcache)
{
if (context->bias) localcache.reset(new TPCollCache(m_cache_size));
else localcache = m_cache;
- scope->set<TPCollCache>(m_cache_key, localcache);
+ scope->set<TPCollCache>(cache_key, localcache);
}
}
@@ -764,7 +792,7 @@ namespace Moses
Mmsapt
::Release(ttasksptr const& ttask, TargetPhraseCollection*& tpc) const
{
- sptr<TPCollCache> cache = ttask->GetScope()->get<TPCollCache>(m_cache_key);
+ sptr<TPCollCache> cache = ttask->GetScope()->get<TPCollCache>(cache_key);
TPCollWrapper* foo = static_cast<TPCollWrapper*>(tpc);
if (cache) cache->release(foo);
tpc = NULL;
diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h
index 5e90ca2db..2f04f8d4e 100644
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@@ -73,9 +73,13 @@ namespace Moses
size_t m_default_sample_size;
size_t m_workers; // number of worker threads for sampling the bitexts
std::vector<std::string> m_feature_set_names; // one or more of: standard, datasource
-
- void* const m_cache_key; // for getting cache from ttask
- void* const m_context_key; // for context scope from ttask
+ std::string m_bias_logfile;
+ boost::scoped_ptr<ofstream> m_bias_logger; // for logging to a file
+ ostream* m_bias_log;
+ public:
+ void* const cache_key; // for getting cache from ttask
+ void* const context_key; // for context scope from ttask
+ private:
boost::shared_ptr<SamplingBias> m_bias; // for global default bias
boost::shared_ptr<TPCollCache> m_cache; // for global default bias
size_t m_cache_size; //
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 92bf12156..b8ad55978 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -23,6 +23,22 @@ using namespace std;
namespace Moses
{
+std::string const&
+TranslationTask
+::GetContextString() const
+{
+ return m_context_string;
+}
+
+void
+TranslationTask
+::SetContextString(std::string const& context)
+{
+ m_context_string = context;
+}
+
+
+
boost::shared_ptr<TranslationTask>
TranslationTask
::create(boost::shared_ptr<InputType> const& source)
diff --git a/moses/TranslationTask.h b/moses/TranslationTask.h
index c51750bdd..6679cb9b3 100644
--- a/moses/TranslationTask.h
+++ b/moses/TranslationTask.h
@@ -63,7 +63,8 @@ protected:
// function can determine (via a check on the weak pointer) if the
// task is still live or not, or maintain a shared_ptr to ensure the
// task stays alive till it's done with it.
-
+
+ std::string m_context_string;
public:
boost::shared_ptr<TranslationTask>
@@ -104,6 +105,9 @@ public:
return m_scope;
}
+ std::string const& GetContextString() const;
+ void SetContextString(std::string const& context);
+
protected:
boost::shared_ptr<Moses::InputType> m_source;
boost::shared_ptr<Moses::IOWrapper> m_ioWrapper;