From 7b64d42d22206d9995a8f0cb3b515e623cac4702 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 3 Jul 2014 00:22:54 +0200 Subject: hashmap: add string interning API Interning short strings with high probability of duplicates can reduce the memory footprint and speed up comparisons. Add strintern() and memintern() APIs that use a hashmap to manage the pool of unique, interned strings. Note: strintern(getenv()) could be used to sanitize git's use of getenv(), in case we ever encounter a platform where a call to getenv() invalidates previous getenv() results (which is allowed by POSIX). Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- hashmap.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'hashmap.c') diff --git a/hashmap.c b/hashmap.c index d1b8056d8d..f693839cb4 100644 --- a/hashmap.c +++ b/hashmap.c @@ -226,3 +226,41 @@ void *hashmap_iter_next(struct hashmap_iter *iter) current = iter->map->table[iter->tablepos++]; } } + +struct pool_entry { + struct hashmap_entry ent; + size_t len; + unsigned char data[FLEX_ARRAY]; +}; + +static int pool_entry_cmp(const struct pool_entry *e1, + const struct pool_entry *e2, + const unsigned char *keydata) +{ + return e1->data != keydata && + (e1->len != e2->len || memcmp(e1->data, keydata, e1->len)); +} + +const void *memintern(const void *data, size_t len) +{ + static struct hashmap map; + struct pool_entry key, *e; + + /* initialize string pool hashmap */ + if (!map.tablesize) + hashmap_init(&map, (hashmap_cmp_fn) pool_entry_cmp, 0); + + /* lookup interned string in pool */ + hashmap_entry_init(&key, memhash(data, len)); + key.len = len; + e = hashmap_get(&map, &key, data); + if (!e) { + /* not found: create it */ + e = xmallocz(sizeof(struct pool_entry) + len); + hashmap_entry_init(e, key.ent.hash); + e->len = len; + memcpy(e->data, data, len); + hashmap_add(&map, e); + } + return e->data; +} -- cgit v1.2.3