diff options
author | Ulrich Germann <Ulrich.Germann@gmail.com> | 2015-03-28 17:41:08 +0300 |
---|---|---|
committer | Ulrich Germann <Ulrich.Germann@gmail.com> | 2015-03-28 17:41:08 +0300 |
commit | 1b23edf62f4f390fc0d7c155d2b27158362f8f3d (patch) | |
tree | aa0222bd7efa50760fe51e59a5687aeccff4e615 | |
parent | c7ad2571f5c574eb5d854a9a1847c6d633412f07 (diff) |
Cache for the N most recently used TargetPhraseCollections. Refactored out of mmsapt.h.
-rw-r--r-- | moses/TranslationModel/UG/TargetPhraseCollectionCache.cc | 156 | ||||
-rw-r--r-- | moses/TranslationModel/UG/TargetPhraseCollectionCache.h | 62 |
2 files changed, 218 insertions, 0 deletions
diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc new file mode 100644 index 000000000..4db92fb51 --- /dev/null +++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc @@ -0,0 +1,156 @@ +#include "TargetPhraseCollectionCache.h" + +namespace Moses +{ + +#if defined(timespec) + bool operator<(timespec const& a, timespec const& b) + { + if (a.tv_sec != b.tv_sec) return a.tv_sec < b.tv_sec; + return (a.tv_nsec < b.tv_nsec); + } + + bool operator>=(timespec const& a, timespec const& b) + { + if (a.tv_sec != b.tv_sec) return a.tv_sec > b.tv_sec; + return (a.tv_nsec >= b.tv_nsec); + } +#endif + + bool operator<(timeval const& a, timeval const& b) + { + if (a.tv_sec != b.tv_sec) return a.tv_sec < b.tv_sec; + return (a.tv_usec < b.tv_usec); + } + + bool operator>=(timeval const& a, timeval const& b) + { + if (a.tv_sec != b.tv_sec) return a.tv_sec > b.tv_sec; + return (a.tv_usec >= b.tv_usec); + } + + void + bubble_up(std::vector<TPCollWrapper*>& v, size_t k) + { + if (k >= v.size()) return; + for (;k && (v[k]->tstamp < v[k/2]->tstamp); k /=2) + { + std::swap(v[k],v[k/2]); + std::swap(v[k]->idx,v[k/2]->idx); + } + } + + void + bubble_down(std::vector<TPCollWrapper*>& v, size_t k) + { + for (size_t j = 2*(k+1); j <= v.size(); j = 2*((k=j)+1)) + { + if (j == v.size() || (v[j-1]->tstamp < v[j]->tstamp)) --j; + if (v[j]->tstamp >= v[k]->tstamp) break; + std::swap(v[k],v[j]); + v[k]->idx = k; + v[j]->idx = j; + } + } + + TPCollWrapper* + TPCollCache + ::encache(TPCollWrapper* const ptr) + { + using namespace boost; + // update time stamp: +#if defined(timespec) + clock_gettime(CLOCK_MONOTONIC, &ptr->tstamp); +#else + gettimeofday(&ptr->tstamp, NULL); +#endif + unique_lock<shared_mutex> lock(m_history_lock); + if (m_history.capacity() > 1) + { + vector<TPCollWrapper*>& v = m_history; + if (ptr->idx >= 0) // ptr is already in history + { + assert(ptr == v[ptr->idx]); + size_t k = 2 * (ptr->idx + 1); + if (k < v.size()) bubble_up(v,k--); + if (k < v.size()) bubble_up(v,k); + } + else if (v.size() < v.capacity()) + { + size_t k = ptr->idx = v.size(); + v.push_back(ptr); + bubble_up(v,k); + } + else // someone else needs to go + { + v[0]->idx = -1; + decache(v[0]); + v[0] = ptr; + bubble_down(v,0); + } + } + return ptr; + } // TPCollCache::encache(...) + + TPCollWrapper* + TPCollCache + ::get(uint64_t key, size_t revision) const + { + using namespace boost; + { + shared_lock<shared_mutex> lock(m_cache_lock); + cache_t::iterator m = m_cache.find(key); + if (m == m_cache.end() || m_cache->second->revision != revision) + return NULL; + ++m->second->refCount; + } + encache(m->second,lock); + return NULL; + } // TPCollCache::get(...) + + void + TPCollCache + ::add(uint64_t key, TPCollWrapper* ptr) const + { + { + boost::unique_lock<boost::shared_mutex> lock(m_cache_lock); + m_cache[key] = ptr; + ++ptr->refCount; + ++m_tpc_ctr; + } + encache(ptr,lock); + } // TPCollCache::add(...) + + void + TPCollCache + ::release(TargetPhraseCollection const*& tpc) + { + if (!tpc) return; + TPCollWrapper* ptr = (reinterpret_cast<TPCollWrapper*> + (const_cast<TargetPhraseCollection*>(tpc))); + + if (--ptr->refCount || ptr->idx >= 0) return; // tpc is still in use + +#if 0 + timespec t; clock_gettime(CLOCK_MONOTONIC,&t); + timespec r; clock_getres(CLOCK_MONOTONIC,&r); + float delta = t.tv_sec - ptr->tstamp.tv_sec; + cerr << "deleting old cache entry after " << delta << " seconds." + << " clock resolution is " << r.tv_sec << ":" << r.tv_nsec + << " at " << __FILE__ << ":" << __LINE__ << endl; +#endif + + boost::upgrade_lock<boost::shared_mutex> lock(m_cache_lock); + cache_t::iterator m = m_cache.find(ptr->key); + if (m != m_cache.end() && m->second == ptr) + { // the cache could have been updated with a new pointer + // for the same phrase already, so we need to check + // if the pointer we cound is the one we want to get rid of, + // hence the second check + boost::upgrade_to_unique_lock<boost::shared_mutex>(lock); + m_cache.erase(m); + } + delete ptr; + ptr = NULL; + } // TPCollCache::release(...) +} // namespace diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.h b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h new file mode 100644 index 000000000..7ae167138 --- /dev/null +++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h @@ -0,0 +1,62 @@ +// -*- c++ -*- +#pragma once +#include <time.h> +#include "moses/TargetPhraseCollection.h" + +namespace Moses +{ + class TPCollWrapper + // wrapper around TargetPhraseCollection that includes reference counts + // and a time stamp for least-recently-used caching of TargetPhraseCollection-s + : public TargetPhraseCollection + { + public: + size_t const revision; + // revison; gets changed when the underlying corpus in Mmsapt is updated + + uint64_t const key; // phrase key + uint32_t refCount; // reference count +#if defined(timespec) // timespec is better, but not available everywhere + timespec tstamp; // last use +#else + timeval tstamp; // last use +#endif + int idx; // position in the history heap + TPCollWrapper(size_t r, uint64_t const k); + ~TPCollWrapper(); + }; + + class TPCollCache + { + typedef boost::unordered_map<uint64_t, TPCollWrapper*> cache_t; + typedef std::vector<TPCollWrapper*> history_t; + cache_t m_cache; // maps from phrase ids to target phrase collections + history_t m_history; // heap of live items, least recently used one on top + + boost::shared_mutex m_cache_lock; // locks m_cache + boost::shared_mutex m_history_lock; // locks m_history + +#if 0 + // mutable size_t m_tpc_ctr; + // counter of all live item, for debugging. probably obsolete; was used + // to track memory leaks +#endif + + TPCollWrapper* encache(TPCollWrapper* const ptr); + // updates time stamp and position in least-recently-used heap m_history + + public: + // TPCollCache() : m_tpc_ctr(0) { } + + TPCollWrapper* + get(uint64_t key, size_t revision) const; + + void + add(uint64_t key, TPCollWrapper* ptr); + + void + release(TargetPhraseCollection const*& tpc); + }; + + +} |