Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/TranslationModel/UG/mmsapt.cpp')
-rw-r--r--moses/TranslationModel/UG/mmsapt.cpp72
1 files changed, 66 insertions, 6 deletions
diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index 024ae44d3..37cf95280 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -70,6 +70,7 @@ namespace Moses
, bias_key(((char*)this)+3)
, cache_key(((char*)this)+2)
, context_key(((char*)this)+1)
+ , m_track_coord(false)
// , m_tpc_ctr(0)
// , m_ifactor(1,0)
// , m_ofactor(1,0)
@@ -275,6 +276,40 @@ namespace Moses
m = param.find("name");
if (m != param.end()) m_name = m->second;
+ // Optional coordinates for training corpus
+ // Takes form coord=name1:file1.gz,name2:file2.gz,...
+ // Names should match with XML input (coord tag)
+ param.insert(pair<string,string>("coord","0"));
+ if(param["coord"] != "0")
+ {
+ m_track_coord = true;
+ vector<string> coord_instances = Tokenize(param["coord"], ",");
+ BOOST_FOREACH(std::string instance, coord_instances)
+ {
+ vector<string> toks = Moses::Tokenize(instance, ":");
+ string space = toks[0];
+ string file = toks[1];
+ // Register that this model uses the given space
+ m_coord_spaces.push_back(StaticData::InstanceNonConst().MapCoordSpace(space));
+ // Load sid coordinates from file
+ m_sid_coord_list.push_back(vector<SPTR<vector<float> > >());
+ vector<SPTR<vector<float> > >& sid_coord = m_sid_coord_list[m_sid_coord_list.size() - 1];
+ //TODO: support extra data for btdyn, here? extra?
+ sid_coord.reserve(btfix->T1->size());
+ string line;
+ cerr << "Loading coordinate lines for space \"" << space << "\" from " << file << endl;
+ iostreams::filtering_istream in;
+ ugdiss::open_input_stream(file, in);
+ while(getline(in, line))
+ {
+ SPTR<vector<float> > coord(new vector<float>);
+ Scan<float>(*coord, Tokenize(line));
+ sid_coord.push_back(coord);
+ }
+ cerr << "Loaded " << sid_coord.size() << " lines" << endl;
+ }
+ }
+
// check for unknown parameters
vector<string> known_parameters; known_parameters.reserve(50);
known_parameters.push_back("L1");
@@ -290,6 +325,7 @@ namespace Moses
known_parameters.push_back("cache");
known_parameters.push_back("coh");
known_parameters.push_back("config");
+ known_parameters.push_back("coord");
known_parameters.push_back("cumb");
known_parameters.push_back("extra");
known_parameters.push_back("feature-sets");
@@ -616,6 +652,29 @@ namespace Moses
}
#endif
+ // Track coordinates if requested
+ if (m_track_coord)
+ {
+ BOOST_FOREACH(uint32_t const sid, *pool.sids)
+ {
+ for(size_t i = 0; i < m_coord_spaces.size(); ++i)
+ {
+ tp->PushCoord(m_coord_spaces[i], m_sid_coord_list[i][sid]);
+ }
+ }
+ /*
+ cerr << btfix->toString(pool.p1, 0) << " ::: " << btfix->toString(pool.p2, 1);
+ BOOST_FOREACH(size_t id, m_coord_spaces)
+ {
+ cerr << " [" << id << "]";
+ vector<vector<float> const*> const* coordList = tp->GetCoordList(id);
+ BOOST_FOREACH(vector<float> const* coord, *coordList)
+ cerr << " : " << Join(" ", *coord);
+ }
+ cerr << endl;
+ */
+ }
+
return tp;
}
@@ -691,7 +750,7 @@ namespace Moses
SPTR<ContextScope> const& scope = ttask->GetScope();
SPTR<TPCollCache> cache = scope->get<TPCollCache>(cache_key);
if (!cache) cache = m_cache; // no context-specific cache, use global one
-
+
ret = cache->get(phrasekey, dyn->revision());
// TO DO: we should revise the revision mechanism: we take the
// length of the dynamic bitext (in sentences) at the time the PT
@@ -705,12 +764,12 @@ namespace Moses
// std::cerr << ret << " with " << ret->refCount << " references at "
// << HERE << std::endl;
boost::upgrade_lock<boost::shared_mutex> rlock(ret->lock);
- if (ret->GetSize()) return ret;
+ if (ret->GetSize()) return ret;
// new TPC (not found or old one was not up to date)
boost::upgrade_to_unique_lock<boost::shared_mutex> wlock(rlock);
// maybe another thread did the work while we waited for the lock ?
- if (ret->GetSize()) return ret;
+ if (ret->GetSize()) return ret;
// OK: pt entry NOT found or NOT up to date
// lookup and expansion could be done in parallel threads,
@@ -730,7 +789,8 @@ namespace Moses
BitextSampler<Token> s(btfix, mfix, context->bias,
m_min_sample_size,
m_default_sample_size,
- m_sampling_method);
+ m_sampling_method,
+ m_track_coord);
s();
sfix = s.stats();
}
@@ -918,7 +978,7 @@ namespace Moses
{
BitextSampler<Token> s(btfix, mfix, context->bias,
m_min_sample_size, m_default_sample_size,
- m_sampling_method);
+ m_sampling_method, m_track_coord);
if (*context->cache1->get(pid, s.stats()) == s.stats())
m_thread_pool->add(s);
}
@@ -939,7 +999,7 @@ namespace Moses
for (size_t i = 0; mdyn.size() == i && i < myphrase.size(); ++i)
mdyn.extend(myphrase[i]);
// let's assume a uniform bias over the foreground corpus
- if (mdyn.size() == myphrase.size()) dyn->prep(ttask, mdyn);
+ if (mdyn.size() == myphrase.size()) dyn->prep(ttask, mdyn, m_track_coord);
}
return mdyn.size() == myphrase.size();
}