Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorMichael Denkowski <mdenkows@amazon.com>2016-08-08 17:33:24 +0300
committerMichael Denkowski <mdenkows@amazon.com>2016-08-12 13:05:12 +0300
commitae1e51d81ad450f7ee497386eea16ebe3792f68b (patch)
treebaa538edd1b5b92bb7e08e3ece5a1face44483d5 /moses
parentd29916bbb3973b18acd80b1ef19841399f7247b0 (diff)
Support storing coordinates of target phrase
- Keep track of named spaces in StaticData - Adding coords to phrases implemented for Mmsapt
Diffstat (limited to 'moses')
-rw-r--r--moses/InputType.h9
-rw-r--r--moses/StaticData.cpp21
-rw-r--r--moses/StaticData.h11
-rw-r--r--moses/TargetPhrase.cpp23
-rw-r--r--moses/TargetPhrase.h7
-rw-r--r--moses/TranslationModel/PhraseDictionary.h11
-rw-r--r--moses/TranslationModel/UG/mmsapt.cpp27
-rw-r--r--moses/TranslationModel/UG/mmsapt.h4
-rw-r--r--moses/XmlOption.cpp34
9 files changed, 95 insertions, 52 deletions
diff --git a/moses/InputType.h b/moses/InputType.h
index e2dce5147..ce25cbc96 100644
--- a/moses/InputType.h
+++ b/moses/InputType.h
@@ -68,13 +68,8 @@ public:
size_t m_frontSpanCoveredLength;
// how many words from the beginning are covered
- // Coordinates in user-defined spaces, indexed by phrase dictionary pointer
- // Looking up PD* returns a vector of the input's coordinates in each space
- // known to the PD, in order (vector of pointers to float vectors). This
- // allows different models to use different subsets of all named spaces.
- typedef std::vector<boost::shared_ptr<std::vector<float> > > INCOORD;
- typedef std::map<PhraseDictionary const*, INCOORD> PD2IC;
- boost::shared_ptr<PD2IC> m_pd2InputCoord;
+ // Coordinates in user-defined spaces (see "coord" XML tag)
+ SPTR<std::map<size_t const, std::vector<float> > > m_coordMap;
InputType(AllOptions::ptr const& opts, long translationId = 0);
virtual ~InputType();
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 9ea88c97e..c80cc54ab 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -936,4 +936,25 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string
}
}
+size_t StaticData::GetCoordSpace(string space) const
+{
+ map<string, size_t>::const_iterator m = m_coordSpaceMap.find(space);
+ if(m == m_coordSpaceMap.end()) {
+ return 0;
+ }
+ return m->second;
+}
+
+size_t StaticData::MapCoordSpace(string space)
+{
+ map<string, size_t>::const_iterator m = m_coordSpaceMap.find(space);
+ if (m != m_coordSpaceMap.end()) {
+ return m->second;
+ }
+ size_t id = m_coordSpaceNextID;
+ m_coordSpaceNextID += 1;
+ m_coordSpaceMap[space] = id;
+ return id;
+}
+
} // namespace
diff --git a/moses/StaticData.h b/moses/StaticData.h
index 871b82641..88996a6f3 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -60,7 +60,7 @@ class PhraseDictionaryDynamicCacheBased;
typedef std::pair<std::string, float> UnknownLHSEntry;
typedef std::vector<UnknownLHSEntry> UnknownLHSList;
-/** Contains global variables and contants.
+/** Contains global variables and constants.
* Only 1 object of this class should be instantiated.
* A const object of this class is accessible by any function during decoding by calling StaticData::Instance();
*/
@@ -152,6 +152,12 @@ protected:
bool ini_performance_options();
void initialize_features();
+
+ // Coordinate space name map for matching spaces across XML input ("coord"
+ // tag) and feature functions that assign or use coordinates on target phrases
+ std::map< std::string const, size_t > m_coordSpaceMap;
+ size_t m_coordSpaceNextID = 1;
+
public:
//! destructor
@@ -394,6 +400,9 @@ public:
return m_requireSortingAfterSourceContext;
}
+ // Coordinate spaces
+ size_t GetCoordSpace(std::string space) const;
+ size_t MapCoordSpace(std::string space);
};
}
diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp
index 89575c462..35a139917 100644
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@@ -333,6 +333,29 @@ SetExtraScores(FeatureFunction const* ff,
m_cached_scores[ff] = s;
}
+vector<vector<float> const*> const&
+TargetPhrase::
+GetCoordList(size_t const spaceID) const
+{
+ UTIL_THROW_IF2(!m_cached_coord,
+ "No coordinates known for target phrase");
+ CoordCache_t::const_iterator m = m_cached_coord->find(spaceID);
+ UTIL_THROW_IF2(m == m_cached_coord->end(),
+ "No coordinates known in given space for target phrase");
+ return m->second;
+}
+
+void
+TargetPhrase::
+PushCoord(size_t const spaceID,
+ vector<float> const* coord)
+{
+ if (!m_cached_coord) {
+ m_cached_coord.reset(new CoordCache_t);
+ }
+ vector<vector<float> const *>& coordList = (*m_cached_coord)[spaceID];
+ coordList.push_back(coord);
+}
void TargetPhrase::SetProperties(const StringPiece &str)
{
diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h
index a2772cdae..f0b312628 100644
--- a/moses/TargetPhrase.h
+++ b/moses/TargetPhrase.h
@@ -56,9 +56,16 @@ public:
Scores const* GetExtraScores(FeatureFunction const* ff) const;
void SetExtraScores(FeatureFunction const* ff,boost::shared_ptr<Scores> const& scores);
+ typedef std::map<size_t const, std::vector<std::vector<float> const*> > CoordCache_t;
+ std::vector<std::vector<float> const*> const& GetCoordList(size_t const spaceID) const;
+ void PushCoord(size_t const spaceID, std::vector<float> const* coord);
private:
ScoreCache_t m_cached_scores;
+ // The coordinate cache stores vectors of pointers to vectors. The coordinate
+ // vectors referenced by the pointers should be owned by the phrase dictionary
+ // implementation.
+ SPTR<CoordCache_t> m_cached_coord;
WPTR<ContextScope> m_scope;
private:
diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h
index bc53221e9..33a128638 100644
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@@ -147,14 +147,6 @@ public:
void SetParameter(const std::string& key, const std::string& value);
- void AddKnownSpace(const std::string& name) {
- m_knownSpaces.push_back(name);
- }
-
- const std::vector<std::string> &GetKnownSpaces() const {
- return m_knownSpaces;
- }
-
// LEGACY
//! find list of translations that can translates a portion of src. Used by confusion network decoding
virtual
@@ -179,9 +171,6 @@ protected:
// cache
size_t m_maxCacheSize; // 0 = no caching
- // Named coordinate spaces used by this model, in order (see "coord" XML tag)
- std::vector<std::string> m_knownSpaces;
-
#ifdef WITH_THREADS
//reader-writer lock
mutable boost::thread_specific_ptr<CacheColl> m_cache;
diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index 634cdc539..a8b577845 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -286,16 +286,17 @@ namespace Moses
BOOST_FOREACH(std::string instance, coord_instances)
{
vector<string> toks = Moses::Tokenize(instance, ":");
- string name = toks[0];
+ string space = toks[0];
string file = toks[1];
- //TODO: register this space for this model
+ // Register that this model uses the given space
+ m_coord_spaces.push_back(StaticData::InstanceNonConst().MapCoordSpace(space));
// Load sid coordinates from file
m_sid_coord_list.push_back(vector<vector<float> >());
vector<vector<float> >& sid_coord = m_sid_coord_list[m_sid_coord_list.size() - 1];
//TODO: support extra data for btdyn, here? extra?
sid_coord.reserve(btfix->T1->size());
string line;
- cerr << "Loading coordinate lines for space \"" << name << "\" from " << file << endl;
+ cerr << "Loading coordinate lines for space \"" << space << "\" from " << file << endl;
iostreams::filtering_istream in;
ugdiss::open_input_stream(file, in);
while(getline(in, line))
@@ -648,19 +649,27 @@ namespace Moses
}
#endif
- // Track stats for rescoring non-cacheable phrases as needed
+ // Track coordinates if requested
if (m_track_coord)
{
- cerr << btfix->toString(pool.p1, 0) << " ::: " << btfix->toString(pool.p2, 1) << endl;
BOOST_FOREACH(uint32_t const sid, *pool.sids)
{
- BOOST_FOREACH(vector<vector<float> > coord, m_sid_coord_list)
+ for(size_t i = 0; i < m_coord_spaces.size(); ++i)
{
- //TODO: store coord[sid] in tp
- cerr << " : " << Join(" ", coord[sid]);
+ tp->PushCoord(m_coord_spaces[i], &m_sid_coord_list[i][sid]);
}
- cerr << endl;
}
+ /*
+ cerr << btfix->toString(pool.p1, 0) << " ::: " << btfix->toString(pool.p2, 1);
+ BOOST_FOREACH(size_t id, m_coord_spaces)
+ {
+ cerr << " [" << id << "]";
+ vector<vector<float> const*> const& coordList = tp->GetCoordList(id);
+ BOOST_FOREACH(vector<float> const* coord, coordList)
+ cerr << " : " << Join(" ", *coord);
+ }
+ cerr << endl;
+ */
}
return tp;
diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h
index a26e4fa2e..da7a1ef5a 100644
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@@ -119,8 +119,10 @@ namespace Moses
std::vector<SPTR<pscorer > > m_active_ff_common;
// activated feature functions (dyn)
- bool m_track_coord = false; // track coordinates? Effectively: track sids when sampling bitext?
+ bool m_track_coord = false; // track coordinates? Track sids when sampling
+ // from bitext, append coords to target phrases
std::vector<std::vector<std::vector<float> > > m_sid_coord_list;
+ std::vector<size_t> m_coord_spaces;
void
parse_factor_spec(std::vector<FactorType>& flist, std::string const key);
diff --git a/moses/XmlOption.cpp b/moses/XmlOption.cpp
index 3e367aa9e..b7969ae51 100644
--- a/moses/XmlOption.cpp
+++ b/moses/XmlOption.cpp
@@ -405,33 +405,21 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line,
// Coord: coordinates of the input sentence in a user-defined space
// <coord space="NAME" coord="X Y Z ..." />
// where NAME is the name of the space and X Y Z ... are floats. See
- // PScoreDist in PhraseDictionaryBitextSampling (Mmsapt) for an example
- // of using this information for feature scoring.
+ // TODO for an example of using this information for feature scoring.
else if (tagName == "coord") {
// Parse tag
string space = ParseXmlTagAttribute(tagContent, "space");
- vector<string> toks = Tokenize(ParseXmlTagAttribute(tagContent, "coord"));
- boost::shared_ptr<vector<float> > coord(new vector<float>);
- Scan<float>(*coord, toks);
- // Init if needed
- if (!input.m_pd2InputCoord) {
- input.m_pd2InputCoord.reset(new std::map<PhraseDictionary const*, std::vector<boost::shared_ptr<std::vector<float> > > >);
- }
- // Scan phrase dictionaries to see which (if any) use this space
- BOOST_FOREACH(PhraseDictionary const* pd, PhraseDictionary::GetColl()) {
- const vector<string>& pdKnownSpaces = pd->GetKnownSpaces();
- for (size_t i = 0; i < pdKnownSpaces.size(); ++i) {
- // Match
- if (pdKnownSpaces[i] == space) {
- // Make sure a slot to store the coordinates exists
- std::vector<boost::shared_ptr<std::vector<float> > >& inputCoord = (*input.m_pd2InputCoord)[pd];
- if (inputCoord.size() < i + 1) {
- inputCoord.resize(i + 1);
- }
- // Store
- inputCoord[i] = coord;
- }
+ vector<string> tok = Tokenize(ParseXmlTagAttribute(tagContent, "coord"));
+ size_t id = StaticData::Instance().GetCoordSpace(space);
+ if (!id) {
+ TRACE_ERR("ERROR: no models use space " << space << ", will be ignored" << endl);
+ } else {
+ // Init if needed
+ if (!input.m_coordMap) {
+ input.m_coordMap.reset(new std::map<size_t const, std::vector<float> >);
}
+ vector<float>& coord = (*input.m_coordMap)[id];
+ Scan<float>(coord, tok);
}
}