diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-10-15 16:58:33 +0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-10-15 16:58:33 +0400 |
commit | 0eb98df0fe32a8e31188fa107f789afd73d81ccf (patch) | |
tree | 6b2f5bb5a21e132f419da1fa7064f94eb74428e6 /util/probing_hash_table.hh | |
parent | ed00315c8cf4c0b0f8ebc535cf676139b25ab087 (diff) |
Submodules are evil. Undo it.
Diffstat (limited to 'util/probing_hash_table.hh')
-rw-r--r-- | util/probing_hash_table.hh | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh new file mode 100644 index 000000000..770faa7e3 --- /dev/null +++ b/util/probing_hash_table.hh @@ -0,0 +1,144 @@ +#ifndef UTIL_PROBING_HASH_TABLE__ +#define UTIL_PROBING_HASH_TABLE__ + +#include "util/exception.hh" + +#include <algorithm> +#include <cstddef> +#include <functional> + +#include <assert.h> +#include <inttypes.h> + +namespace util { + +/* Thrown when table grows too large */ +class ProbingSizeException : public Exception { + public: + ProbingSizeException() throw() {} + ~ProbingSizeException() throw() {} +}; + +// std::identity is an SGI extension :-( +struct IdentityHash { + template <class T> T operator()(T arg) const { return arg; } +}; + +/* Non-standard hash table + * Buckets must be set at the beginning and must be greater than maximum number + * of elements, else it throws ProbingSizeException. + * Memory management and initialization is externalized to make it easier to + * serialize these to disk and load them quickly. + * Uses linear probing to find value. + * Only insert and lookup operations. + */ + +template <class EntryT, class HashT, class EqualT = std::equal_to<typename EntryT::Key> > class ProbingHashTable { + public: + typedef EntryT Entry; + typedef typename Entry::Key Key; + typedef const Entry *ConstIterator; + typedef Entry *MutableIterator; + typedef HashT Hash; + typedef EqualT Equal; + + public: + static uint64_t Size(uint64_t entries, float multiplier) { + uint64_t buckets = std::max(entries + 1, static_cast<uint64_t>(multiplier * static_cast<float>(entries))); + return buckets * sizeof(Entry); + } + + // Must be assigned to later. + ProbingHashTable() : entries_(0) +#ifdef DEBUG + , initialized_(false) +#endif + {} + + ProbingHashTable(void *start, std::size_t allocated, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal()) + : begin_(reinterpret_cast<MutableIterator>(start)), + buckets_(allocated / sizeof(Entry)), + end_(begin_ + buckets_), + invalid_(invalid), + hash_(hash_func), + equal_(equal_func), + entries_(0) +#ifdef DEBUG + , initialized_(true) +#endif + {} + + template <class T> MutableIterator Insert(const T &t) { +#ifdef DEBUG + assert(initialized_); +#endif + UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full."); + for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) { + if (equal_(i->GetKey(), invalid_)) { *i = t; return i; } + if (++i == end_) { i = begin_; } + } + } + + // Return true if the value was found (and not inserted). This is consistent with Find but the opposite if hash_map! + template <class T> bool FindOrInsert(const T &t, MutableIterator &out) { +#ifdef DEBUG + assert(initialized_); +#endif + for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) { + Key got(i->GetKey()); + if (equal_(got, t.GetKey())) { out = i; return true; } + if (equal_(got, invalid_)) { + UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full."); + *i = t; + out = i; + return false; + } + if (++i == end_) i = begin_; + } + } + + void FinishedInserting() {} + + void LoadedBinary() {} + + // Don't change anything related to GetKey, + template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) { +#ifdef DEBUG + assert(initialized_); +#endif + for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) { + Key got(i->GetKey()); + if (equal_(got, key)) { out = i; return true; } + if (equal_(got, invalid_)) return false; + if (++i == end_) i = begin_; + } + } + + template <class Key> bool Find(const Key key, ConstIterator &out) const { +#ifdef DEBUG + assert(initialized_); +#endif + for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) { + Key got(i->GetKey()); + if (equal_(got, key)) { out = i; return true; } + if (equal_(got, invalid_)) return false; + if (++i == end_) i = begin_; + } + } + + private: + MutableIterator begin_; + std::size_t buckets_; + MutableIterator end_; + Key invalid_; + Hash hash_; + Equal equal_; + std::size_t entries_; +#ifdef DEBUG + bool initialized_; +#endif +}; + +} // namespace util + +#endif // UTIL_PROBING_HASH_TABLE__ |