diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2016-09-30 18:01:54 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2016-09-30 18:01:54 +0300 |
commit | 792b25cbbd404d49298c25a43d7fdb4c4cdd481f (patch) | |
tree | 78baf2cfdeb9f1d18519a7488d144fc417c70c04 | |
parent | 9bd727f1d537e25ed44bf0a42ebc6fb653b34d25 (diff) |
redo import of PhraseDecoder and TargetPhraseCollectionCache. Use custom classes for phrases and target phrases
4 files changed, 63 insertions, 65 deletions
diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp index 8930bf94b..4ff4375cd 100644 --- a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp +++ b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.cpp @@ -22,9 +22,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include <deque> #include "PhraseDecoder.h" -#include "moses/StaticData.h" - -#include "../../ManagerBase.h" +#include "../../System.h" +#include "../../SubPhrase.h" using namespace std; @@ -248,12 +247,12 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection( } TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( - const ManagerBase &mgr, - TargetPhraseVectorPtr tpv, - BitWrapper<> &encodedBitStream, - const Phrase<Word> &sourcePhrase, - bool topLevel, - bool eval) + const ManagerBase &mgr, + TargetPhraseVectorPtr tpv, + BitWrapper<> &encodedBitStream, + const Phrase<Word> &sourcePhrase, + bool topLevel, + bool eval) { const System &system = mgr.system; FactorCollection &vocab = system.GetVocab(); @@ -274,19 +273,23 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( unsigned phraseStopSymbol = 0; AlignPoint alignStopSymbol(-1, -1); - TPCompact tpCompact; + std::vector<float> scores; + std::set<AlignPointSizeT> alignment; enum DecodeState { New, Symbol, Score, Alignment, Add } state = New; size_t srcSize = sourcePhrase.GetSize(); + TPCompact* targetPhrase = NULL; while(encodedBitStream.TellFromEnd()) { if(state == New) { // Creating new TargetPhrase on the heap - tpCompact.words.clear(); - tpCompact.alignment.clear(); - tpCompact.scores.clear(); + tpv->push_back(TPCompact()); + targetPhrase = &tpv->back(); + + alignment.clear(); + scores.clear(); state = Symbol; } @@ -312,12 +315,12 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank)); if(m_phraseDictionary.m_useAlignmentInfo) { - size_t trgPos = tpCompact.words.size(); - tpCompact.alignment.insert(AlignPoint(srcPos, trgPos)); + size_t trgPos = targetPhrase->words.size(); + alignment.insert(AlignPoint(srcPos, trgPos)); } } else if(type == 3) { size_t rank = DecodeREncSymbol3(symbol); - size_t srcPos = tpCompact.words.size(); + size_t srcPos = targetPhrase->words.size(); if(srcPos >= sourceWords.size()) return TargetPhraseVectorPtr(); @@ -325,13 +328,13 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank)); if(m_phraseDictionary.m_useAlignmentInfo) { size_t trgPos = srcPos; - tpCompact.alignment.insert(AlignPoint(srcPos, trgPos)); + alignment.insert(AlignPoint(srcPos, trgPos)); } } Word word; word.CreateFromString(vocab, system, wordString); - tpCompact.words.push_back(word); + targetPhrase->words.push_back(word); } else if(m_coding == PREnc) { // if the symbol is just a word if(GetPREncType(symbol) == 1) { @@ -339,7 +342,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( Word word; word.CreateFromString(vocab, system, GetTargetSymbol(decodedSymbol)); - tpCompact.words.push_back(word); + targetPhrase->words.push_back(word); } // if the symbol is a subphrase pointer else { @@ -347,7 +350,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( int right = DecodePREncSymbol2Right(symbol); unsigned rank = DecodePREncSymbol2Rank(symbol); - int srcStart = left + tpCompact.words.size(); + int srcStart = left + targetPhrase->words.size(); int srcEnd = srcSize - right - 1; // false positive consistency check @@ -374,34 +377,34 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( // false positive consistency check if(subTpv != NULL && rank < subTpv->size()) { // insert the subphrase into the main target phrase - const TPCompact& subTp = subTpv->at(rank); + TPCompact& subTp = subTpv->at(rank); if(m_phraseDictionary.m_useAlignmentInfo) { // reconstruct the alignment data based on the alignment of the subphrase for(std::set<AlignPointSizeT>::const_iterator it = subTp.alignment.begin(); it != subTp.alignment.end(); it++) { - tpCompact.alignment.insert(AlignPointSizeT(srcStart + it->first, - tpCompact.words.size() + it->second)); + alignment.insert(AlignPointSizeT(srcStart + it->first, + targetPhrase->words.size() + it->second)); } } - for (size_t i = 0; i < subTp.words.size(); ++i) { - tpCompact.words.push_back(subTp.words[i]); - } + std::copy(subTp.words.begin(), subTp.words.end(), std::back_inserter(targetPhrase->words)); } else return TargetPhraseVectorPtr(); } } else { Word word; word.CreateFromString(vocab, system, GetTargetSymbol(symbol)); - tpCompact.words.push_back(word); + targetPhrase->words.push_back(word); } } } else if(state == Score) { - size_t idx = m_multipleScoreTrees ? tpCompact.scores.size() : 0; + size_t idx = m_multipleScoreTrees ? scores.size() : 0; float score = m_scoreTrees[idx]->Read(encodedBitStream); - tpCompact.scores.push_back(score); + scores.push_back(score); + + if(scores.size() == m_numScoreComponent) { + targetPhrase->scores = scores; - if(tpCompact.scores.size() == m_numScoreComponent) { if(m_containsAlignmentInfo) state = Alignment; else @@ -413,29 +416,19 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( state = Add; } else { if(m_phraseDictionary.m_useAlignmentInfo) - tpCompact.alignment.insert(AlignPointSizeT(alignPoint)); + alignment.insert(AlignPointSizeT(alignPoint)); } } if(state == Add) { - size_t targetSize = tpCompact.words.size(); - TargetPhraseImpl *targetPhrase = new (mgr.GetPool().Allocate<TargetPhraseImpl>()) TargetPhraseImpl(mgr.GetPool(), m_phraseDictionary, system, targetSize); - - for (size_t i = 0; i < tpCompact.words.size(); ++i) { - (*targetPhrase)[i] = tpCompact.words[i]; - } - if(m_phraseDictionary.m_useAlignmentInfo) { size_t sourceSize = sourcePhrase.GetSize(); - for(std::set<AlignPointSizeT>::iterator it = tpCompact.alignment.begin(); it != tpCompact.alignment.end(); it++) { + size_t targetSize = targetPhrase->words.size(); + for(std::set<AlignPointSizeT>::iterator it = alignment.begin(); it != alignment.end(); it++) { if(it->first >= sourceSize || it->second >= targetSize) return TargetPhraseVectorPtr(); } - targetPhrase->SetAlignTerm(tpCompact.alignment); - } - - if(eval) { - mgr.system.featureFunctions.EvaluateInIsolation(mgr.GetPool(), mgr.system, sourcePhrase, *targetPhrase); + targetPhrase->alignment = alignment; } if(m_coding == PREnc) { diff --git a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h index 01a7c23c5..79faa38a6 100644 --- a/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h +++ b/contrib/moses2/TranslationModel/CompactPT/PhraseDecoder.h @@ -30,19 +30,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include <algorithm> #include <sys/stat.h> -#include "moses/TypeDef.h" -#include "moses/FactorCollection.h" -#include "moses/Word.h" -#include "moses/Util.h" -#include "moses/InputFileStream.h" -#include "moses/StaticData.h" -#include "moses/Range.h" - #include "PhraseTableCompact.h" #include "StringVector.h" #include "CanonicalHuffman.h" #include "TargetPhraseCollectionCache.h" +#include "../../Phrase.h" +#include "../../ManagerBase.h" + namespace Moses2 { @@ -116,7 +111,7 @@ protected: public: PhraseDecoder( - PhraseTableCompact &phraseDictionary, + PhraseTableCompact &phraseDictionary, const std::vector<FactorType>* input, const std::vector<FactorType>* output, size_t numScoreComponent diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp index 5f1caad78..07d0469e0 100644 --- a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp +++ b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp @@ -24,9 +24,16 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA namespace Moses2 { - boost::thread_specific_ptr<TargetPhraseCollectionCache::CacheMap> TargetPhraseCollectionCache::m_phraseCache; +PhraseCompact::PhraseCompact(const Phrase<Word> ©) +{ + for (size_t i = 0; i < copy.GetSize(); ++i) { + const Word &word = copy[i]; + push_back(word); + } +} + } diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h index 601ac1bbe..3a9e6f170 100644 --- a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h +++ b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h @@ -28,13 +28,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include <boost/thread/tss.hpp> #include <boost/shared_ptr.hpp> -#include "../../PhraseBased/TargetPhraseImpl.h" +#include "../../Word.h" #include "../../Phrase.h" namespace Moses2 { typedef std::pair<size_t, size_t> AlignPointSizeT; +struct PhraseCompact : public std::vector<Word> +{ +public: + PhraseCompact(const Phrase<Word> ©); +}; + struct TPCompact { std::vector<Word> words; @@ -65,10 +71,7 @@ private: : m_clock(clock), m_tpv(tpv), m_bitsLeft(bitsLeft) {} }; - typedef boost::unordered_map< - const Phrase<Word>*, - LastUsed, UnorderedComparer< Phrase<Word> >, - UnorderedComparer< Phrase<Word> > > CacheMap; + typedef std::map<PhraseCompact, LastUsed> CacheMap; static boost::thread_specific_ptr<CacheMap> m_phraseCache; public: @@ -110,7 +113,7 @@ public: if(!m_phraseCache.get()) m_phraseCache.reset(new CacheMap()); // check if source phrase is already in cache - iterator it = m_phraseCache->find(&sourcePhrase); + iterator it = m_phraseCache->find(sourcePhrase); if(it != m_phraseCache->end()) // if found, just update clock it->second.m_clock = clock(); @@ -120,16 +123,16 @@ public: TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector()); tpv_temp->resize(maxRank); std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin()); - (*m_phraseCache)[&sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft); + (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft); } else - (*m_phraseCache)[&sourcePhrase] = LastUsed(clock(), tpv, bitsLeft); + (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft); } } std::pair<TargetPhraseVectorPtr, size_t> Retrieve(const Phrase<Word> &sourcePhrase) { if(!m_phraseCache.get()) m_phraseCache.reset(new CacheMap()); - iterator it = m_phraseCache->find(&sourcePhrase); + iterator it = m_phraseCache->find(sourcePhrase); if(it != m_phraseCache->end()) { LastUsed &lu = it->second; lu.m_clock = clock(); @@ -143,7 +146,7 @@ public: if(!m_phraseCache.get()) m_phraseCache.reset(new CacheMap()); if(m_phraseCache->size() > m_max * (1 + m_tolerance)) { - typedef boost::unordered_set<std::pair<clock_t, const Phrase<Word>*> > Cands; + typedef std::set<std::pair<clock_t, PhraseCompact > > Cands; Cands cands; for(CacheMap::iterator it = m_phraseCache->begin(); it != m_phraseCache->end(); it++) { @@ -152,7 +155,7 @@ public: } for(Cands::iterator it = cands.begin(); it != cands.end(); it++) { - const Phrase<Word> *p = it->second; + const PhraseCompact& p = it->second; m_phraseCache->erase(p); if(m_phraseCache->size() < (m_max * (1 - m_tolerance))) |