diff options
Diffstat (limited to 'contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h')
-rw-r--r-- | contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h new file mode 100644 index 000000000..3a9e6f170 --- /dev/null +++ b/contrib/moses2/TranslationModel/CompactPT/TargetPhraseCollectionCache.h @@ -0,0 +1,176 @@ +// $Id$ +// vim:tabstop=2 +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2006 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#pragma once + +#include <map> +#include <set> +#include <vector> + +#include <boost/thread/tss.hpp> +#include <boost/shared_ptr.hpp> + +#include "../../Word.h" +#include "../../Phrase.h" + +namespace Moses2 +{ +typedef std::pair<size_t, size_t> AlignPointSizeT; + +struct PhraseCompact : public std::vector<Word> +{ +public: + PhraseCompact(const Phrase<Word> ©); +}; + +struct TPCompact +{ + std::vector<Word> words; + std::set<AlignPointSizeT> alignment; + std::vector<float> scores; + +}; + +// Avoid using new due to locking +typedef std::vector<TPCompact> TargetPhraseVector; +typedef boost::shared_ptr<TargetPhraseVector> TargetPhraseVectorPtr; + +/** Implementation of Persistent Cache **/ +class TargetPhraseCollectionCache +{ +private: + size_t m_max; + float m_tolerance; + + struct LastUsed { + clock_t m_clock; + TargetPhraseVectorPtr m_tpv; + size_t m_bitsLeft; + + LastUsed() : m_clock(0), m_bitsLeft(0) {} + + LastUsed(clock_t clock, TargetPhraseVectorPtr tpv, size_t bitsLeft = 0) + : m_clock(clock), m_tpv(tpv), m_bitsLeft(bitsLeft) {} + }; + + typedef std::map<PhraseCompact, LastUsed> CacheMap; + static boost::thread_specific_ptr<CacheMap> m_phraseCache; + +public: + + typedef CacheMap::iterator iterator; + typedef CacheMap::const_iterator const_iterator; + + TargetPhraseCollectionCache(size_t max = 5000, float tolerance = 0.2) + : m_max(max), m_tolerance(tolerance) { + } + + iterator Begin() { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + return m_phraseCache->begin(); + } + + const_iterator Begin() const { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + return m_phraseCache->begin(); + } + + iterator End() { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + return m_phraseCache->end(); + } + + const_iterator End() const { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + return m_phraseCache->end(); + } + + /** retrieve translations for source phrase from persistent cache **/ + void Cache(const Phrase<Word> &sourcePhrase, TargetPhraseVectorPtr tpv, + size_t bitsLeft = 0, size_t maxRank = 0) { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + // check if source phrase is already in cache + iterator it = m_phraseCache->find(sourcePhrase); + if(it != m_phraseCache->end()) + // if found, just update clock + it->second.m_clock = clock(); + else { + // else, add to cache + if(maxRank && tpv->size() > maxRank) { + TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector()); + tpv_temp->resize(maxRank); + std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin()); + (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft); + } else + (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft); + } + } + + std::pair<TargetPhraseVectorPtr, size_t> Retrieve(const Phrase<Word> &sourcePhrase) { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + iterator it = m_phraseCache->find(sourcePhrase); + if(it != m_phraseCache->end()) { + LastUsed &lu = it->second; + lu.m_clock = clock(); + return std::make_pair(lu.m_tpv, lu.m_bitsLeft); + } else + return std::make_pair(TargetPhraseVectorPtr(), 0); + } + + // if cache full, reduce + void Prune() { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + if(m_phraseCache->size() > m_max * (1 + m_tolerance)) { + typedef std::set<std::pair<clock_t, PhraseCompact > > Cands; + Cands cands; + for(CacheMap::iterator it = m_phraseCache->begin(); + it != m_phraseCache->end(); it++) { + LastUsed &lu = it->second; + cands.insert(std::make_pair(lu.m_clock, it->first)); + } + + for(Cands::iterator it = cands.begin(); it != cands.end(); it++) { + const PhraseCompact& p = it->second; + m_phraseCache->erase(p); + + if(m_phraseCache->size() < (m_max * (1 - m_tolerance))) + break; + } + } + } + + void CleanUp() { + if(!m_phraseCache.get()) + m_phraseCache.reset(new CacheMap()); + m_phraseCache->clear(); + } + +}; + +} + |