Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorUlrich Germann <Ulrich.Germann@gmail.com>2015-03-28 17:41:08 +0300
committerUlrich Germann <Ulrich.Germann@gmail.com>2015-03-28 17:41:08 +0300
commit1b23edf62f4f390fc0d7c155d2b27158362f8f3d (patch)
treeaa0222bd7efa50760fe51e59a5687aeccff4e615 /moses
parentc7ad2571f5c574eb5d854a9a1847c6d633412f07 (diff)
Cache for the N most recently used TargetPhraseCollections. Refactored out of mmsapt.h.
Diffstat (limited to 'moses')
-rw-r--r--moses/TranslationModel/UG/TargetPhraseCollectionCache.cc156
-rw-r--r--moses/TranslationModel/UG/TargetPhraseCollectionCache.h62
2 files changed, 218 insertions, 0 deletions
diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc
new file mode 100644
index 000000000..4db92fb51
--- /dev/null
+++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc
@@ -0,0 +1,156 @@
+#include "TargetPhraseCollectionCache.h"
+
+namespace Moses
+{
+
+#if defined(timespec)
+ bool operator<(timespec const& a, timespec const& b)
+ {
+ if (a.tv_sec != b.tv_sec) return a.tv_sec < b.tv_sec;
+ return (a.tv_nsec < b.tv_nsec);
+ }
+
+ bool operator>=(timespec const& a, timespec const& b)
+ {
+ if (a.tv_sec != b.tv_sec) return a.tv_sec > b.tv_sec;
+ return (a.tv_nsec >= b.tv_nsec);
+ }
+#endif
+
+ bool operator<(timeval const& a, timeval const& b)
+ {
+ if (a.tv_sec != b.tv_sec) return a.tv_sec < b.tv_sec;
+ return (a.tv_usec < b.tv_usec);
+ }
+
+ bool operator>=(timeval const& a, timeval const& b)
+ {
+ if (a.tv_sec != b.tv_sec) return a.tv_sec > b.tv_sec;
+ return (a.tv_usec >= b.tv_usec);
+ }
+
+ void
+ bubble_up(std::vector<TPCollWrapper*>& v, size_t k)
+ {
+ if (k >= v.size()) return;
+ for (;k && (v[k]->tstamp < v[k/2]->tstamp); k /=2)
+ {
+ std::swap(v[k],v[k/2]);
+ std::swap(v[k]->idx,v[k/2]->idx);
+ }
+ }
+
+ void
+ bubble_down(std::vector<TPCollWrapper*>& v, size_t k)
+ {
+ for (size_t j = 2*(k+1); j <= v.size(); j = 2*((k=j)+1))
+ {
+ if (j == v.size() || (v[j-1]->tstamp < v[j]->tstamp)) --j;
+ if (v[j]->tstamp >= v[k]->tstamp) break;
+ std::swap(v[k],v[j]);
+ v[k]->idx = k;
+ v[j]->idx = j;
+ }
+ }
+
+ TPCollWrapper*
+ TPCollCache
+ ::encache(TPCollWrapper* const ptr)
+ {
+ using namespace boost;
+ // update time stamp:
+#if defined(timespec)
+ clock_gettime(CLOCK_MONOTONIC, &ptr->tstamp);
+#else
+ gettimeofday(&ptr->tstamp, NULL);
+#endif
+ unique_lock<shared_mutex> lock(m_history_lock);
+ if (m_history.capacity() > 1)
+ {
+ vector<TPCollWrapper*>& v = m_history;
+ if (ptr->idx >= 0) // ptr is already in history
+ {
+ assert(ptr == v[ptr->idx]);
+ size_t k = 2 * (ptr->idx + 1);
+ if (k < v.size()) bubble_up(v,k--);
+ if (k < v.size()) bubble_up(v,k);
+ }
+ else if (v.size() < v.capacity())
+ {
+ size_t k = ptr->idx = v.size();
+ v.push_back(ptr);
+ bubble_up(v,k);
+ }
+ else // someone else needs to go
+ {
+ v[0]->idx = -1;
+ decache(v[0]);
+ v[0] = ptr;
+ bubble_down(v,0);
+ }
+ }
+ return ptr;
+ } // TPCollCache::encache(...)
+
+ TPCollWrapper*
+ TPCollCache
+ ::get(uint64_t key, size_t revision) const
+ {
+ using namespace boost;
+ {
+ shared_lock<shared_mutex> lock(m_cache_lock);
+ cache_t::iterator m = m_cache.find(key);
+ if (m == m_cache.end() || m_cache->second->revision != revision)
+ return NULL;
+ ++m->second->refCount;
+ }
+ encache(m->second,lock);
+ return NULL;
+ } // TPCollCache::get(...)
+
+ void
+ TPCollCache
+ ::add(uint64_t key, TPCollWrapper* ptr) const
+ {
+ {
+ boost::unique_lock<boost::shared_mutex> lock(m_cache_lock);
+ m_cache[key] = ptr;
+ ++ptr->refCount;
+ ++m_tpc_ctr;
+ }
+ encache(ptr,lock);
+ } // TPCollCache::add(...)
+
+ void
+ TPCollCache
+ ::release(TargetPhraseCollection const*& tpc)
+ {
+ if (!tpc) return;
+ TPCollWrapper* ptr = (reinterpret_cast<TPCollWrapper*>
+ (const_cast<TargetPhraseCollection*>(tpc)));
+
+ if (--ptr->refCount || ptr->idx >= 0) return; // tpc is still in use
+
+#if 0
+ timespec t; clock_gettime(CLOCK_MONOTONIC,&t);
+ timespec r; clock_getres(CLOCK_MONOTONIC,&r);
+ float delta = t.tv_sec - ptr->tstamp.tv_sec;
+ cerr << "deleting old cache entry after " << delta << " seconds."
+ << " clock resolution is " << r.tv_sec << ":" << r.tv_nsec
+ << " at " << __FILE__ << ":" << __LINE__ << endl;
+#endif
+
+ boost::upgrade_lock<boost::shared_mutex> lock(m_cache_lock);
+ cache_t::iterator m = m_cache.find(ptr->key);
+ if (m != m_cache.end() && m->second == ptr)
+ { // the cache could have been updated with a new pointer
+ // for the same phrase already, so we need to check
+ // if the pointer we cound is the one we want to get rid of,
+ // hence the second check
+ boost::upgrade_to_unique_lock<boost::shared_mutex>(lock);
+ m_cache.erase(m);
+ }
+ delete ptr;
+ ptr = NULL;
+ } // TPCollCache::release(...)
+} // namespace
diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.h b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h
new file mode 100644
index 000000000..7ae167138
--- /dev/null
+++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h
@@ -0,0 +1,62 @@
+// -*- c++ -*-
+#pragma once
+#include <time.h>
+#include "moses/TargetPhraseCollection.h"
+
+namespace Moses
+{
+ class TPCollWrapper
+ // wrapper around TargetPhraseCollection that includes reference counts
+ // and a time stamp for least-recently-used caching of TargetPhraseCollection-s
+ : public TargetPhraseCollection
+ {
+ public:
+ size_t const revision;
+ // revison; gets changed when the underlying corpus in Mmsapt is updated
+
+ uint64_t const key; // phrase key
+ uint32_t refCount; // reference count
+#if defined(timespec) // timespec is better, but not available everywhere
+ timespec tstamp; // last use
+#else
+ timeval tstamp; // last use
+#endif
+ int idx; // position in the history heap
+ TPCollWrapper(size_t r, uint64_t const k);
+ ~TPCollWrapper();
+ };
+
+ class TPCollCache
+ {
+ typedef boost::unordered_map<uint64_t, TPCollWrapper*> cache_t;
+ typedef std::vector<TPCollWrapper*> history_t;
+ cache_t m_cache; // maps from phrase ids to target phrase collections
+ history_t m_history; // heap of live items, least recently used one on top
+
+ boost::shared_mutex m_cache_lock; // locks m_cache
+ boost::shared_mutex m_history_lock; // locks m_history
+
+#if 0
+ // mutable size_t m_tpc_ctr;
+ // counter of all live item, for debugging. probably obsolete; was used
+ // to track memory leaks
+#endif
+
+ TPCollWrapper* encache(TPCollWrapper* const ptr);
+ // updates time stamp and position in least-recently-used heap m_history
+
+ public:
+ // TPCollCache() : m_tpc_ctr(0) { }
+
+ TPCollWrapper*
+ get(uint64_t key, size_t revision) const;
+
+ void
+ add(uint64_t key, TPCollWrapper* ptr);
+
+ void
+ release(TargetPhraseCollection const*& tpc);
+ };
+
+
+}