/** * uhtbl - Generic coalesced hash table implementation * Copyright (C) 2010 Steven Barth * Copyright (C) 2010 John Crispin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. * */ /** * uhtbl is a coalesced cellared generic hash table implementation with the aim * to be both small in code size and heap memory requirements. This hash table * uses a hybrid approach called coalesced addressing which means that pointers * to other buckets will be used in the case of a collisions. In this case no * linked lists have to be used and less allocation calls have to be done. * To improve performance this hash table carries a cellar for collision * handling which is an additional address area that lies behind the * hash-addressable space. * * Overhead (on x86 32bit): * Bookkeeping: 32 Bytes (per hash table) * Metadata: 12 Bytes including pointer to key and keysize (per bucket) * */ #ifndef UHTBL_H_ #define UHTBL_H_ /* compile-time configurables */ #ifndef UHTBL_PAYLOADFACTOR #define UHTBL_PAYLOADFACTOR 0.86 #endif #ifndef UHTBL_GROWFACTOR #define UHTBL_GROWFACTOR 2 #endif #ifndef UHTBL_MINIMUMSIZE #define UHTBL_MINIMUMSIZE 16 #endif #include /* Internal flags and values */ #define UHTBL_FLAG_OCCUPIED 0x01 #define UHTBL_FLAG_STRANGER 0x02 #define UHTBL_FLAG_WITHNEXT 0x04 #define UHTBL_FLAG_LOCALKEY 0x08 #define UHTBL_MAXIMUMSIZE 2147483648 /* Status codes */ #define UHTBL_OK 0 #define UHTBL_EINVAL -1 #define UHTBL_ENOMEM -2 #define UHTBL_ENOENT -3 /* API */ #if __GNUC__ >= 4 #ifndef UHTBL_API #define UHTBL_API #endif #define UHTBL_INLINE static inline __attribute__((always_inline)) #else #ifndef UHTBL_API #define UHTBL_API #endif #define UHTBL_INLINE static inline #endif typedef union uhtbl_key uhtbl_key_t; typedef struct uhtbl_head uhtbl_head_t; typedef struct uhtbl_bucket uhtbl_bucket_t; typedef struct uhtbl_config uhtbl_config_t; typedef struct uhtbl uhtbl_t; typedef uint32_t uhtbl_size_t; typedef uhtbl_size_t(uhtbl_hash_t)(const void*, int len); typedef void(uhtbl_gc_t)(void *bucket); union uhtbl_key { void *ptr; long handle; }; struct uhtbl_head { uint8_t user; uint8_t flags; uint16_t keysize; uhtbl_size_t next; uhtbl_key_t key; }; struct uhtbl_bucket { uhtbl_head_t head; }; struct uhtbl { uint32_t bucketsize; uhtbl_size_t size; uhtbl_size_t used; uhtbl_size_t payload; uhtbl_size_t nextfree; uhtbl_hash_t *fct_hash; uhtbl_gc_t *fct_gc; void *buckets; }; /** * uhtbl_init() - Initialize a hash table. * @tbl: hash table * @bucketsize: size of a bucket * @sizehint: estimated maximum of needed buckets (optional) * @fct_hash: hash function * @fct_gc: bucket garbage collector (optional) * * Initializes a new hash table and preallocates memory. * * bucketsize is the size in Bytes each bucket will use but note the following: * Each bucket needs to begin with a struct uhtbl_head_t that keeps its metadata * in addition to the payload you want it to carry. You are advised to define a * bucket struct with the first element being a uhtbl_head_t followed by your * desired payload and pass the size of this struct to bucketsize. * * sizehint is a hint on how many distinct entries will be stored in the hash * table. This will be used to preallocate space for the buckets and is useful * if you know how many entries will be stored in the hash table as it avoids * expensive rehashing cycles. sizehint should be a power of 2. * * fct_hash is the hash function used. It takes a constant void pointer and a * integer as size parameter and returns an unsigned (32bit) int. * * fct_gc is the garbage collector for buckets. Every time a bucket gets unset * or the hash table gets cleared or finalized the garbage collector function * taking a pointer to a bucket will take care of doing any finalization for * the buckets' payload and key data. You may use uhtbl_key() to get a reference * to your key pointer or handle for deallocation or cleaning up any other * references. There is an optionally selectable garbage collector that will * take care of free()ing key pointers if your keys point to memory areas. * You have to pass uhtbl_gc_key as fct_gc parameter to use it. You may also * call this function in your custom garbage collector. * * WARNING: Your garbage collector function must otherwise not change the * metadata in the uhtbl_head_t structure of the bucket else behaviour will be * undefined for all subsequent actions. * * * Example: * struct mybucket { * uhtbl_head_t head; * int mypayload1; * int mypayload2; * } * * uhtbl_t table; * uhtbl_init(&table, sizeof(struct mybucket), 32, MurmurHash2, NULL); * * Returns 0 on success or a negative error code. */ UHTBL_API int uhtbl_init(uhtbl_t *tbl, uint32_t bucketsize, uhtbl_size_t sizehint, uhtbl_hash_t *fct_hash, uhtbl_gc_t *fct_gc); /** * uhtbl_get() - Get a bucket by its key. * @tbl: hash table * @key: key * @len: length of key * * Finds and returns the bucket with a given key. * * Key can either be: * 1. A pointer to a memory area then len is its length (must be < 64KB) * 2. A NULL-pointer then len is a locally stored numerical key * * * Example: * struct mybucket *bucket; * bucket = uhtbl_get(table, "foo", sizeof("foo")); * printf("%i", bucket->mypayload1); * * bucket = uhtbl_get(table, NULL, 42); * printf("%i", bucket->mypayload1); * * Returns the bucket or NULL if no bucket with given key was found. */ UHTBL_API void* uhtbl_get(uhtbl_t *tbl, const void *key, long len); /** * uhtbl_set() - Sets a bucket for given key. * @tbl: hash table * @key: key * @len: length of key * * Sets a new bucket for the given key and returns a pointer to the bucket for * you to assign your payload data. If there is already a bucket with that key * it will be unset first. * * Key can either be: * 1. A pointer to a memory area then len is its length (must be < 64KB) * 2. A NULL-pointer then len is a locally stored numerical key * * NOTE: If key is a pointer memory management of it will be your business. * You might want to use a garbage collection function (see uhtbl_init()) * * NOTE: The payload area of your bucket is NOT initialized to zeroes. * * WARNING: Note the following side effects when setting previously unset keys: * 1. A set may trigger several moving actions changing the order of buckets. * 2. A set may trigger a rehashing cycle if all buckets are occupied. * Therefore accessing any previously acquired pointers to any bucket results in * undefined behaviour. In addition iterations which have started before may * result in unwanted behaviour (e.g. buckets may be skipped or visited twice). * * * Example: * struct mybucket *bucket; * bucket = uhtbl_set(table, "foo", sizeof("foo")); * bucket->mypayload1 = 42: * * bucket = uhtbl_set(table, NULL, 42); * bucket->mypayload1 = 1337; * * * Returns the bucket or NULL if no bucket could be allocated (out of memory). */ UHTBL_API void* uhtbl_set(uhtbl_t *tbl, void *key, long len); /** * uhtbl_next() - Iterates over all entries of the hash table. * @tbl: hash table * @iter: Iteration counter * * Iterates over all entries of the hash table. * * iter is a pointer to a numeric variable that should be set to zero before * the first call and will save the iteration state. * * NOTE: You may safely do several iterations in parallel. You may also safely * unset any buckets of the hashtable or set keys that are currently in the * hash table. However setting buckets with keys that don't have an assigned * bucket yet results in undefined behaviour. * * Example: * uint32_t iter = 0; * struct mybucket *bucket; * while ((bucket = uhtbl_next(table, &iter))) { * printf("%i", bucket->mypayload1); * } * * Return the next bucket or NULL if all buckets were already visited. */ UHTBL_API void* uhtbl_next(uhtbl_t *tbl, uhtbl_size_t *iter); /** * uhtbl_unset() - Unsets the bucket with given key. * @tbl: hash table * @key: key * @len: length of key (optional) * * Unsets the bucket with given key and calls the garbage collector to free * any payload resources - if any. * * Key can either be: * 1. A pointer to a memory area then len is its length (must be < 64KB) * 2. A NULL-pointer then len is a locally stored numerical key * * Example: * uhtbl_unset(table, NULL, 42); * * Returns 0 on success or a negative error code if there was no matching bucket */ UHTBL_API int uhtbl_unset(uhtbl_t *tbl, const void *key, long len); /** * uhtbl_remove() - Unsets a bucket. * @tbl: hash table * @head: bucket head * * Unsets the bucket with given address and calls the garbage collector to free * any payload resources - if any. * * Example: * uhtbl_remove(table, &bucket->head); * * Returns 0 on success or a negative error code if the bucket was not found */ UHTBL_API int uhtbl_remove(uhtbl_t *tbl, uhtbl_head_t *head); /** * uhtbl_clear() - Clears the hashtable without freeing its memory. * @tbl: hash table * * Clears all buckets of the hashtable invoking the garbage collector - if any * but does not free the memory of the hash table. This is usually more * efficient than iterating and using unset. * * Returns nothing. */ UHTBL_API void uhtbl_clear(uhtbl_t *tbl); /** * uhtbl_resize() - Resizes and rehashes the hash table. * @tbl: hash table * @payload: Buckets to reserve. * * Resizes the hash table and rehashes its entries. * * payload is the number of buckets the hashtable should allocate. It must be * greater or at least equal to the number of buckets currently occupied. * * NOTE: Rehashing is an expensive process which should be avoided if possible. * However resizing will be automatically done if you try to set a new bucket * but all buckets are already occupied. In this case the payload bucket count * is usually doubled. There is currently no automatic resizing done when the * bucket usage count decreases. You have to take care of this by yourself. * * Returns 0 on success or a negative error code if out of memory. */ UHTBL_API int uhtbl_resize(uhtbl_t *tbl, uhtbl_size_t payload); /** * uhtbl_clear() - Clears the hashtable and frees the bucket memory. * @tbl: hash table * * Clears all buckets of the hashtable invoking the garbage collector - if any * and frees the memory occupied by the buckets. * * Returns nothing. */ UHTBL_API void uhtbl_finalize(uhtbl_t *tbl); /** * uhtbl_key() - Returns the key parameters as passed to set. * @head: Bucket head * @key: Pointer where key pointer should be stored (optional) * @len: Pointer where key len should be stored (optional) * * This function might be useful to obtain the key parameters of a bucket * when doing garbage collection. * * Returns nothing. */ UHTBL_API void uhtbl_key(uhtbl_head_t *head, void **key, long *len); /** * uhtbl_gc_key() - Basic garbage collector that frees key memory. * @bucket: Bucket * * This function is a basic garbage collector that will free any key pointers. * However it will not touch your payload data. * * WARNING: You must not call this function directly on any bucket otherwise * behaviour will be unspecified. Instead you may pass this function to the * uhtbl_init function. You may also call this function from inside a custom * garbage collector. * * Returns nothing. */ UHTBL_API void uhtbl_gc_key(void *bucket); #endif /* UHTBL_H_ */