Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-10-15 16:58:33 +0400
committerKenneth Heafield <github@kheafield.com>2012-10-15 16:58:33 +0400
commit0eb98df0fe32a8e31188fa107f789afd73d81ccf (patch)
tree6b2f5bb5a21e132f419da1fa7064f94eb74428e6 /util/probing_hash_table.hh
parented00315c8cf4c0b0f8ebc535cf676139b25ab087 (diff)
Submodules are evil. Undo it.
Diffstat (limited to 'util/probing_hash_table.hh')
-rw-r--r--util/probing_hash_table.hh144
1 files changed, 144 insertions, 0 deletions
diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh
new file mode 100644
index 000000000..770faa7e3
--- /dev/null
+++ b/util/probing_hash_table.hh
@@ -0,0 +1,144 @@
+#ifndef UTIL_PROBING_HASH_TABLE__
+#define UTIL_PROBING_HASH_TABLE__
+
+#include "util/exception.hh"
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+
+#include <assert.h>
+#include <inttypes.h>
+
+namespace util {
+
+/* Thrown when table grows too large */
+class ProbingSizeException : public Exception {
+ public:
+ ProbingSizeException() throw() {}
+ ~ProbingSizeException() throw() {}
+};
+
+// std::identity is an SGI extension :-(
+struct IdentityHash {
+ template <class T> T operator()(T arg) const { return arg; }
+};
+
+/* Non-standard hash table
+ * Buckets must be set at the beginning and must be greater than maximum number
+ * of elements, else it throws ProbingSizeException.
+ * Memory management and initialization is externalized to make it easier to
+ * serialize these to disk and load them quickly.
+ * Uses linear probing to find value.
+ * Only insert and lookup operations.
+ */
+
+template <class EntryT, class HashT, class EqualT = std::equal_to<typename EntryT::Key> > class ProbingHashTable {
+ public:
+ typedef EntryT Entry;
+ typedef typename Entry::Key Key;
+ typedef const Entry *ConstIterator;
+ typedef Entry *MutableIterator;
+ typedef HashT Hash;
+ typedef EqualT Equal;
+
+ public:
+ static uint64_t Size(uint64_t entries, float multiplier) {
+ uint64_t buckets = std::max(entries + 1, static_cast<uint64_t>(multiplier * static_cast<float>(entries)));
+ return buckets * sizeof(Entry);
+ }
+
+ // Must be assigned to later.
+ ProbingHashTable() : entries_(0)
+#ifdef DEBUG
+ , initialized_(false)
+#endif
+ {}
+
+ ProbingHashTable(void *start, std::size_t allocated, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal())
+ : begin_(reinterpret_cast<MutableIterator>(start)),
+ buckets_(allocated / sizeof(Entry)),
+ end_(begin_ + buckets_),
+ invalid_(invalid),
+ hash_(hash_func),
+ equal_(equal_func),
+ entries_(0)
+#ifdef DEBUG
+ , initialized_(true)
+#endif
+ {}
+
+ template <class T> MutableIterator Insert(const T &t) {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
+ UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
+ for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) {
+ if (equal_(i->GetKey(), invalid_)) { *i = t; return i; }
+ if (++i == end_) { i = begin_; }
+ }
+ }
+
+ // Return true if the value was found (and not inserted). This is consistent with Find but the opposite if hash_map!
+ template <class T> bool FindOrInsert(const T &t, MutableIterator &out) {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
+ for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) {
+ Key got(i->GetKey());
+ if (equal_(got, t.GetKey())) { out = i; return true; }
+ if (equal_(got, invalid_)) {
+ UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
+ *i = t;
+ out = i;
+ return false;
+ }
+ if (++i == end_) i = begin_;
+ }
+ }
+
+ void FinishedInserting() {}
+
+ void LoadedBinary() {}
+
+ // Don't change anything related to GetKey,
+ template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
+ for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) {
+ Key got(i->GetKey());
+ if (equal_(got, key)) { out = i; return true; }
+ if (equal_(got, invalid_)) return false;
+ if (++i == end_) i = begin_;
+ }
+ }
+
+ template <class Key> bool Find(const Key key, ConstIterator &out) const {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
+ for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) {
+ Key got(i->GetKey());
+ if (equal_(got, key)) { out = i; return true; }
+ if (equal_(got, invalid_)) return false;
+ if (++i == end_) i = begin_;
+ }
+ }
+
+ private:
+ MutableIterator begin_;
+ std::size_t buckets_;
+ MutableIterator end_;
+ Key invalid_;
+ Hash hash_;
+ Equal equal_;
+ std::size_t entries_;
+#ifdef DEBUG
+ bool initialized_;
+#endif
+};
+
+} // namespace util
+
+#endif // UTIL_PROBING_HASH_TABLE__