diff options
author | Michael Denkowski <mdenkows@amazon.com> | 2016-08-08 17:33:24 +0300 |
---|---|---|
committer | Michael Denkowski <mdenkows@amazon.com> | 2016-08-12 13:05:12 +0300 |
commit | ae1e51d81ad450f7ee497386eea16ebe3792f68b (patch) | |
tree | baa538edd1b5b92bb7e08e3ece5a1face44483d5 /moses | |
parent | d29916bbb3973b18acd80b1ef19841399f7247b0 (diff) |
Support storing coordinates of target phrase
- Keep track of named spaces in StaticData
- Adding coords to phrases implemented for Mmsapt
Diffstat (limited to 'moses')
-rw-r--r-- | moses/InputType.h | 9 | ||||
-rw-r--r-- | moses/StaticData.cpp | 21 | ||||
-rw-r--r-- | moses/StaticData.h | 11 | ||||
-rw-r--r-- | moses/TargetPhrase.cpp | 23 | ||||
-rw-r--r-- | moses/TargetPhrase.h | 7 | ||||
-rw-r--r-- | moses/TranslationModel/PhraseDictionary.h | 11 | ||||
-rw-r--r-- | moses/TranslationModel/UG/mmsapt.cpp | 27 | ||||
-rw-r--r-- | moses/TranslationModel/UG/mmsapt.h | 4 | ||||
-rw-r--r-- | moses/XmlOption.cpp | 34 |
9 files changed, 95 insertions, 52 deletions
diff --git a/moses/InputType.h b/moses/InputType.h index e2dce5147..ce25cbc96 100644 --- a/moses/InputType.h +++ b/moses/InputType.h @@ -68,13 +68,8 @@ public: size_t m_frontSpanCoveredLength; // how many words from the beginning are covered - // Coordinates in user-defined spaces, indexed by phrase dictionary pointer - // Looking up PD* returns a vector of the input's coordinates in each space - // known to the PD, in order (vector of pointers to float vectors). This - // allows different models to use different subsets of all named spaces. - typedef std::vector<boost::shared_ptr<std::vector<float> > > INCOORD; - typedef std::map<PhraseDictionary const*, INCOORD> PD2IC; - boost::shared_ptr<PD2IC> m_pd2InputCoord; + // Coordinates in user-defined spaces (see "coord" XML tag) + SPTR<std::map<size_t const, std::vector<float> > > m_coordMap; InputType(AllOptions::ptr const& opts, long translationId = 0); virtual ~InputType(); diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 9ea88c97e..c80cc54ab 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -936,4 +936,25 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string } } +size_t StaticData::GetCoordSpace(string space) const +{ + map<string, size_t>::const_iterator m = m_coordSpaceMap.find(space); + if(m == m_coordSpaceMap.end()) { + return 0; + } + return m->second; +} + +size_t StaticData::MapCoordSpace(string space) +{ + map<string, size_t>::const_iterator m = m_coordSpaceMap.find(space); + if (m != m_coordSpaceMap.end()) { + return m->second; + } + size_t id = m_coordSpaceNextID; + m_coordSpaceNextID += 1; + m_coordSpaceMap[space] = id; + return id; +} + } // namespace diff --git a/moses/StaticData.h b/moses/StaticData.h index 871b82641..88996a6f3 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -60,7 +60,7 @@ class PhraseDictionaryDynamicCacheBased; typedef std::pair<std::string, float> UnknownLHSEntry; typedef std::vector<UnknownLHSEntry> UnknownLHSList; -/** Contains global variables and contants. +/** Contains global variables and constants. * Only 1 object of this class should be instantiated. * A const object of this class is accessible by any function during decoding by calling StaticData::Instance(); */ @@ -152,6 +152,12 @@ protected: bool ini_performance_options(); void initialize_features(); + + // Coordinate space name map for matching spaces across XML input ("coord" + // tag) and feature functions that assign or use coordinates on target phrases + std::map< std::string const, size_t > m_coordSpaceMap; + size_t m_coordSpaceNextID = 1; + public: //! destructor @@ -394,6 +400,9 @@ public: return m_requireSortingAfterSourceContext; } + // Coordinate spaces + size_t GetCoordSpace(std::string space) const; + size_t MapCoordSpace(std::string space); }; } diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp index 89575c462..35a139917 100644 --- a/moses/TargetPhrase.cpp +++ b/moses/TargetPhrase.cpp @@ -333,6 +333,29 @@ SetExtraScores(FeatureFunction const* ff, m_cached_scores[ff] = s; } +vector<vector<float> const*> const& +TargetPhrase:: +GetCoordList(size_t const spaceID) const +{ + UTIL_THROW_IF2(!m_cached_coord, + "No coordinates known for target phrase"); + CoordCache_t::const_iterator m = m_cached_coord->find(spaceID); + UTIL_THROW_IF2(m == m_cached_coord->end(), + "No coordinates known in given space for target phrase"); + return m->second; +} + +void +TargetPhrase:: +PushCoord(size_t const spaceID, + vector<float> const* coord) +{ + if (!m_cached_coord) { + m_cached_coord.reset(new CoordCache_t); + } + vector<vector<float> const *>& coordList = (*m_cached_coord)[spaceID]; + coordList.push_back(coord); +} void TargetPhrase::SetProperties(const StringPiece &str) { diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h index a2772cdae..f0b312628 100644 --- a/moses/TargetPhrase.h +++ b/moses/TargetPhrase.h @@ -56,9 +56,16 @@ public: Scores const* GetExtraScores(FeatureFunction const* ff) const; void SetExtraScores(FeatureFunction const* ff,boost::shared_ptr<Scores> const& scores); + typedef std::map<size_t const, std::vector<std::vector<float> const*> > CoordCache_t; + std::vector<std::vector<float> const*> const& GetCoordList(size_t const spaceID) const; + void PushCoord(size_t const spaceID, std::vector<float> const* coord); private: ScoreCache_t m_cached_scores; + // The coordinate cache stores vectors of pointers to vectors. The coordinate + // vectors referenced by the pointers should be owned by the phrase dictionary + // implementation. + SPTR<CoordCache_t> m_cached_coord; WPTR<ContextScope> m_scope; private: diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h index bc53221e9..33a128638 100644 --- a/moses/TranslationModel/PhraseDictionary.h +++ b/moses/TranslationModel/PhraseDictionary.h @@ -147,14 +147,6 @@ public: void SetParameter(const std::string& key, const std::string& value); - void AddKnownSpace(const std::string& name) { - m_knownSpaces.push_back(name); - } - - const std::vector<std::string> &GetKnownSpaces() const { - return m_knownSpaces; - } - // LEGACY //! find list of translations that can translates a portion of src. Used by confusion network decoding virtual @@ -179,9 +171,6 @@ protected: // cache size_t m_maxCacheSize; // 0 = no caching - // Named coordinate spaces used by this model, in order (see "coord" XML tag) - std::vector<std::string> m_knownSpaces; - #ifdef WITH_THREADS //reader-writer lock mutable boost::thread_specific_ptr<CacheColl> m_cache; diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index 634cdc539..a8b577845 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -286,16 +286,17 @@ namespace Moses BOOST_FOREACH(std::string instance, coord_instances) { vector<string> toks = Moses::Tokenize(instance, ":"); - string name = toks[0]; + string space = toks[0]; string file = toks[1]; - //TODO: register this space for this model + // Register that this model uses the given space + m_coord_spaces.push_back(StaticData::InstanceNonConst().MapCoordSpace(space)); // Load sid coordinates from file m_sid_coord_list.push_back(vector<vector<float> >()); vector<vector<float> >& sid_coord = m_sid_coord_list[m_sid_coord_list.size() - 1]; //TODO: support extra data for btdyn, here? extra? sid_coord.reserve(btfix->T1->size()); string line; - cerr << "Loading coordinate lines for space \"" << name << "\" from " << file << endl; + cerr << "Loading coordinate lines for space \"" << space << "\" from " << file << endl; iostreams::filtering_istream in; ugdiss::open_input_stream(file, in); while(getline(in, line)) @@ -648,19 +649,27 @@ namespace Moses } #endif - // Track stats for rescoring non-cacheable phrases as needed + // Track coordinates if requested if (m_track_coord) { - cerr << btfix->toString(pool.p1, 0) << " ::: " << btfix->toString(pool.p2, 1) << endl; BOOST_FOREACH(uint32_t const sid, *pool.sids) { - BOOST_FOREACH(vector<vector<float> > coord, m_sid_coord_list) + for(size_t i = 0; i < m_coord_spaces.size(); ++i) { - //TODO: store coord[sid] in tp - cerr << " : " << Join(" ", coord[sid]); + tp->PushCoord(m_coord_spaces[i], &m_sid_coord_list[i][sid]); } - cerr << endl; } + /* + cerr << btfix->toString(pool.p1, 0) << " ::: " << btfix->toString(pool.p2, 1); + BOOST_FOREACH(size_t id, m_coord_spaces) + { + cerr << " [" << id << "]"; + vector<vector<float> const*> const& coordList = tp->GetCoordList(id); + BOOST_FOREACH(vector<float> const* coord, coordList) + cerr << " : " << Join(" ", *coord); + } + cerr << endl; + */ } return tp; diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h index a26e4fa2e..da7a1ef5a 100644 --- a/moses/TranslationModel/UG/mmsapt.h +++ b/moses/TranslationModel/UG/mmsapt.h @@ -119,8 +119,10 @@ namespace Moses std::vector<SPTR<pscorer > > m_active_ff_common; // activated feature functions (dyn) - bool m_track_coord = false; // track coordinates? Effectively: track sids when sampling bitext? + bool m_track_coord = false; // track coordinates? Track sids when sampling + // from bitext, append coords to target phrases std::vector<std::vector<std::vector<float> > > m_sid_coord_list; + std::vector<size_t> m_coord_spaces; void parse_factor_spec(std::vector<FactorType>& flist, std::string const key); diff --git a/moses/XmlOption.cpp b/moses/XmlOption.cpp index 3e367aa9e..b7969ae51 100644 --- a/moses/XmlOption.cpp +++ b/moses/XmlOption.cpp @@ -405,33 +405,21 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, // Coord: coordinates of the input sentence in a user-defined space // <coord space="NAME" coord="X Y Z ..." /> // where NAME is the name of the space and X Y Z ... are floats. See - // PScoreDist in PhraseDictionaryBitextSampling (Mmsapt) for an example - // of using this information for feature scoring. + // TODO for an example of using this information for feature scoring. else if (tagName == "coord") { // Parse tag string space = ParseXmlTagAttribute(tagContent, "space"); - vector<string> toks = Tokenize(ParseXmlTagAttribute(tagContent, "coord")); - boost::shared_ptr<vector<float> > coord(new vector<float>); - Scan<float>(*coord, toks); - // Init if needed - if (!input.m_pd2InputCoord) { - input.m_pd2InputCoord.reset(new std::map<PhraseDictionary const*, std::vector<boost::shared_ptr<std::vector<float> > > >); - } - // Scan phrase dictionaries to see which (if any) use this space - BOOST_FOREACH(PhraseDictionary const* pd, PhraseDictionary::GetColl()) { - const vector<string>& pdKnownSpaces = pd->GetKnownSpaces(); - for (size_t i = 0; i < pdKnownSpaces.size(); ++i) { - // Match - if (pdKnownSpaces[i] == space) { - // Make sure a slot to store the coordinates exists - std::vector<boost::shared_ptr<std::vector<float> > >& inputCoord = (*input.m_pd2InputCoord)[pd]; - if (inputCoord.size() < i + 1) { - inputCoord.resize(i + 1); - } - // Store - inputCoord[i] = coord; - } + vector<string> tok = Tokenize(ParseXmlTagAttribute(tagContent, "coord")); + size_t id = StaticData::Instance().GetCoordSpace(space); + if (!id) { + TRACE_ERR("ERROR: no models use space " << space << ", will be ignored" << endl); + } else { + // Init if needed + if (!input.m_coordMap) { + input.m_coordMap.reset(new std::map<size_t const, std::vector<float> >); } + vector<float>& coord = (*input.m_coordMap)[id]; + Scan<float>(coord, tok); } } |