From: Ondřej Surý Date: Fri, 24 Jun 2022 06:32:12 +0000 (+0200) Subject: Add isc_hashmap API that implements Robin Hood hashing X-Git-Tag: v9.19.8~68^2~3 X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=f46ce447a676ea5ba938934d20d310aec9eef7f5;p=thirdparty%2Fbind9.git Add isc_hashmap API that implements Robin Hood hashing Add new isc_hashmap API that differs from the current isc_ht API in several aspects: 1. It implements Robin Hood Hashing which is open-addressing hash table algorithm (e.g. no linked-lists) 2. No memory allocations - the array to store the nodes is made of isc_hashmap_node_t structures instead of just pointers, so there's only allocation on resize. 3. The key is not copied into the hashmap node and must be also stored externally, either as part of the stored value or in any other location that's valid as long the value is stored in the hashmap. This makes the isc_hashmap_t a little less universal because of the key storage requirements, but the inserts and deletes are faster because they don't require memory allocation on isc_hashmap_add() and memory deallocation on isc_hashmap_delete(). --- diff --git a/lib/isc/Makefile.am b/lib/isc/Makefile.am index 4ff086b4203..2d8976a3633 100644 --- a/lib/isc/Makefile.am +++ b/lib/isc/Makefile.am @@ -37,6 +37,7 @@ libisc_la_HEADERS = \ include/isc/fuzz.h \ include/isc/glob.h \ include/isc/hash.h \ + include/isc/hashmap.h \ include/isc/heap.h \ include/isc/hex.h \ include/isc/hmac.h \ @@ -145,6 +146,7 @@ libisc_la_SOURCES = \ fsaccess_common_p.h \ glob.c \ hash.c \ + hashmap.c \ heap.c \ hex.c \ hmac.c \ diff --git a/lib/isc/hashmap.c b/lib/isc/hashmap.c new file mode 100644 index 00000000000..3526774cadd --- /dev/null +++ b/lib/isc/hashmap.c @@ -0,0 +1,749 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/* + * This is an implementation of the Robin Hood hash table algorithm as + * described in [a] with simple linear searching, and backwards shift + * deletion algorithm as described in [b] and [c]. + * + * Further work: + * 1. Implement 4.1 Speeding up Searches - 4.4 Smart Search [a] + * 2. Implement A Fast Concurrent and Resizable Robin Hood Hash Table [b] + * + * a. https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf paper. + * b. https://dspace.mit.edu/bitstream/handle/1721.1/130693/1251799942-MIT.pdf + * c. + * https://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define APPROX_99_PERCENT(x) (((x)*1013) >> 10) +#define APPROX_95_PERCENT(x) (((x)*972) >> 10) +#define APPROX_90_PERCENT(x) (((x)*921) >> 10) +#define APPROX_85_PERCENT(x) (((x)*870) >> 10) +#define APPROX_40_PERCENT(x) (((x)*409) >> 10) +#define APPROX_35_PERCENT(x) (((x)*359) >> 10) +#define APPROX_30_PERCENT(x) (((x)*308) >> 10) +#define APPROX_25_PERCENT(x) (((x)*256) >> 10) +#define APPROX_20_PERCENT(x) (((x)*205) >> 10) +#define APPROX_15_PERCENT(x) (((x)*154) >> 10) +#define APPROX_10_PERCENT(x) (((x)*103) >> 10) +#define APPROX_05_PERCENT(x) (((x)*52) >> 10) +#define APPROX_01_PERCENT(x) (((x)*11) >> 10) + +#define ISC_HASHMAP_MAGIC ISC_MAGIC('H', 'M', 'a', 'p') +#define ISC_HASHMAP_VALID(hashmap) ISC_MAGIC_VALID(hashmap, ISC_HASHMAP_MAGIC) + +/* We have two tables for incremental rehashing */ +#define HASHMAP_NUM_TABLES 2 + +#define HASHSIZE(bits) (UINT64_C(1) << (bits)) + +#define HASHMAP_NO_BITS 0U +#define HASHMAP_MIN_BITS 1U +#define HASHMAP_MAX_BITS 32U + +typedef struct hashmap_node { + const uint8_t *key; + void *value; + uint32_t hashval; + uint32_t psl; + uint16_t keysize; +} hashmap_node_t; + +typedef struct hashmap_table { + size_t size; + uint8_t hashbits; + uint32_t hashmask; + hashmap_node_t *table; +} hashmap_table_t; + +struct isc_hashmap { + unsigned int magic; + bool case_sensitive; + uint8_t hindex; + uint32_t hiter; /* rehashing iterator */ + isc_mem_t *mctx; + size_t count; + uint8_t hash_key[16]; + hashmap_table_t tables[HASHMAP_NUM_TABLES]; +}; + +struct isc_hashmap_iter { + isc_hashmap_t *hashmap; + size_t i; + uint8_t hindex; + hashmap_node_t *cur; +}; + +static isc_result_t +hashmap_add(isc_hashmap_t *hashmap, const uint32_t hashval, const uint8_t *key, + const uint32_t keysize, void *value, uint8_t idx); + +static void +hashmap_rehash_one(isc_hashmap_t *hashmap); +static void +hashmap_rehash_start_grow(isc_hashmap_t *hashmap); +static void +hashmap_rehash_start_shrink(isc_hashmap_t *hashmap); +static bool +over_threshold(isc_hashmap_t *hashmap); +static bool +under_threshold(isc_hashmap_t *hashmap); + +static uint8_t +hashmap_nexttable(uint8_t idx) { + return ((idx == 0) ? 1 : 0); +} + +static bool +rehashing_in_progress(const isc_hashmap_t *hashmap) { + return (hashmap->tables[hashmap_nexttable(hashmap->hindex)].table != + NULL); +} + +static bool +try_nexttable(const isc_hashmap_t *hashmap, uint8_t idx) { + return (idx == hashmap->hindex && rehashing_in_progress(hashmap)); +} + +static void +hashmap_node_init(hashmap_node_t *node, const uint32_t hashval, + const uint8_t *key, const uint32_t keysize, void *value) { + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + *node = (hashmap_node_t){ + .value = value, + .hashval = hashval, + .key = key, + .keysize = keysize, + .psl = 0, + }; +} + +static void __attribute__((__unused__)) +hashmap_dump_table(const isc_hashmap_t *hashmap, const uint8_t idx) { + fprintf(stderr, + "====== %" PRIu8 " (bits = %" PRIu8 ", size = %zu =====\n", idx, + hashmap->tables[idx].hashbits, hashmap->tables[idx].size); + for (size_t i = 0; i < hashmap->tables[idx].size; i++) { + hashmap_node_t *node = &hashmap->tables[idx].table[i]; + if (node->key != NULL) { + uint32_t hash = isc_hash_bits32( + node->hashval, hashmap->tables[idx].hashbits); + fprintf(stderr, + "%p: %zu -> %p" + ", value = %p" + ", hash = %" PRIu32 ", hashval = %" PRIu32 + ", psl = %" PRIu32 ", key = %s\n", + hashmap, i, node, node->value, hash, + node->hashval, node->psl, (char *)node->key); + } + } + fprintf(stderr, "================\n\n"); +} + +static void +hashmap_create_table(isc_hashmap_t *hashmap, const uint8_t idx, + const uint8_t bits) { + size_t size; + + REQUIRE(hashmap->tables[idx].hashbits == HASHMAP_NO_BITS); + REQUIRE(hashmap->tables[idx].table == NULL); + REQUIRE(bits >= HASHMAP_MIN_BITS); + REQUIRE(bits <= HASHMAP_MAX_BITS); + + hashmap->tables[idx] = (hashmap_table_t){ + .hashbits = bits, + .hashmask = HASHSIZE(bits) - 1, + .size = HASHSIZE(bits), + }; + + size = hashmap->tables[idx].size * + sizeof(hashmap->tables[idx].table[0]); + + hashmap->tables[idx].table = isc_mem_getx(hashmap->mctx, size, + ISC_MEM_ZERO); +} + +static void +hashmap_free_table(isc_hashmap_t *hashmap, const uint8_t idx, bool cleanup) { + size_t size; + + if (cleanup) { + for (size_t i = 0; i < hashmap->tables[idx].size; i++) { + hashmap_node_t *node = &hashmap->tables[idx].table[i]; + if (node->key != NULL) { + *node = (hashmap_node_t){ 0 }; + hashmap->count--; + } + } + } + + size = hashmap->tables[idx].size * + sizeof(hashmap->tables[idx].table[0]); + isc_mem_put(hashmap->mctx, hashmap->tables[idx].table, size); + + hashmap->tables[idx] = (hashmap_table_t){ + .hashbits = HASHMAP_NO_BITS, + }; +} + +void +isc_hashmap_create(isc_mem_t *mctx, uint8_t bits, unsigned int options, + isc_hashmap_t **hashmapp) { + isc_hashmap_t *hashmap = isc_mem_get(mctx, sizeof(*hashmap)); + bool case_sensitive = ((options & ISC_HASHMAP_CASE_INSENSITIVE) == 0); + + REQUIRE(hashmapp != NULL && *hashmapp == NULL); + REQUIRE(mctx != NULL); + REQUIRE(bits >= HASHMAP_MIN_BITS && bits <= HASHMAP_MAX_BITS); + + *hashmap = (isc_hashmap_t){ + .magic = ISC_HASHMAP_MAGIC, + .hash_key = { 0, 1 }, + .case_sensitive = case_sensitive, + }; + isc_mem_attach(mctx, &hashmap->mctx); + +#if !defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && !defined(UNIT_TESTING) + isc_entropy_get(hashmap->hash_key, sizeof(hashmap->hash_key)); +#endif /* if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */ + + hashmap_create_table(hashmap, 0, bits); + + hashmap->magic = ISC_HASHMAP_MAGIC; + + *hashmapp = hashmap; +} + +void +isc_hashmap_destroy(isc_hashmap_t **hashmapp) { + isc_hashmap_t *hashmap; + + REQUIRE(hashmapp != NULL && *hashmapp != NULL); + REQUIRE(ISC_HASHMAP_VALID(*hashmapp)); + + hashmap = *hashmapp; + *hashmapp = NULL; + + hashmap->magic = 0; + + for (size_t i = 0; i < HASHMAP_NUM_TABLES; i++) { + if (hashmap->tables[i].table != NULL) { + hashmap_free_table(hashmap, i, true); + } + } + INSIST(hashmap->count == 0); + + isc_mem_putanddetach(&hashmap->mctx, hashmap, sizeof(*hashmap)); +} + +static bool +hashmap_match(hashmap_node_t *node, const uint32_t hashval, const uint8_t *key, + const uint32_t keysize, const bool case_sensitive) { + return (node->hashval == hashval && node->keysize == keysize && + (case_sensitive + ? (memcmp(node->key, key, keysize) == 0) + : (isc_ascii_lowerequal(node->key, key, keysize)))); +} + +static hashmap_node_t * +hashmap_find(const isc_hashmap_t *hashmap, const uint32_t hashval, + const uint8_t *key, uint32_t keysize, uint32_t *pslp, + uint8_t *idxp) { + uint32_t hash; + uint32_t psl; + uint8_t idx = *idxp; + uint32_t pos; + +nexttable: + psl = 0; + hash = isc_hash_bits32(hashval, hashmap->tables[idx].hashbits); + + while (true) { + hashmap_node_t *node = NULL; + + pos = (hash + psl) & hashmap->tables[idx].hashmask; + + node = &hashmap->tables[idx].table[pos]; + + if (node->key == NULL || psl > node->psl) { + break; + } + + if (hashmap_match(node, hashval, key, keysize, + hashmap->case_sensitive)) { + *pslp = psl; + *idxp = idx; + return (node); + } + + psl++; + } + if (try_nexttable(hashmap, idx)) { + idx = hashmap_nexttable(idx); + goto nexttable; + } + + return (NULL); +} + +uint32_t +isc_hashmap_hash(const isc_hashmap_t *hashmap, const void *key, + uint32_t keysize) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + uint32_t hashval; + + isc_halfsiphash24(hashmap->hash_key, key, keysize, + hashmap->case_sensitive, (uint8_t *)&hashval); + + return (hashval); +} + +isc_result_t +isc_hashmap_find(const isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize, void **valuep) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + hashmap_node_t *node; + uint8_t idx = hashmap->hindex; + uint32_t hashval = (hashvalp != NULL) + ? *hashvalp + : isc_hashmap_hash(hashmap, key, keysize); + + node = hashmap_find(hashmap, hashval, key, keysize, &(uint32_t){ 0 }, + &idx); + if (node == NULL) { + return (ISC_R_NOTFOUND); + } + + INSIST(node->key != NULL); + if (valuep != NULL) { + *valuep = node->value; + } + return (ISC_R_SUCCESS); +} + +static void +hashmap_delete_node(isc_hashmap_t *hashmap, hashmap_node_t *entry, + uint32_t hashval, uint32_t psl, const uint8_t idx) { + uint32_t pos; + uint32_t hash; + + hashmap->count--; + + hash = isc_hash_bits32(hashval, hashmap->tables[idx].hashbits); + pos = hash + psl; + + while (true) { + hashmap_node_t *node = NULL; + + pos = (pos + 1) & hashmap->tables[idx].hashmask; + INSIST(pos < hashmap->tables[idx].size); + + node = &hashmap->tables[idx].table[pos]; + + if (node->key == NULL || node->psl == 0) { + break; + } + + node->psl--; + *entry = *node; + entry = &hashmap->tables[idx].table[pos]; + } + + *entry = (hashmap_node_t){ 0 }; +} + +static void +hashmap_rehash_one(isc_hashmap_t *hashmap) { + uint8_t oldidx = hashmap_nexttable(hashmap->hindex); + uint32_t oldsize = hashmap->tables[oldidx].size; + hashmap_node_t *oldtable = hashmap->tables[oldidx].table; + hashmap_node_t node; + isc_result_t result; + + /* Find first non-empty node */ + while (hashmap->hiter < oldsize && oldtable[hashmap->hiter].key == NULL) + { + hashmap->hiter++; + } + + /* Rehashing complete */ + if (hashmap->hiter == oldsize) { + hashmap_free_table(hashmap, hashmap_nexttable(hashmap->hindex), + false); + hashmap->hiter = 0; + return; + } + + /* Move the first non-empty node from old table to new table */ + node = oldtable[hashmap->hiter]; + + hashmap_delete_node(hashmap, &oldtable[hashmap->hiter], node.hashval, + node.psl, oldidx); + + result = hashmap_add(hashmap, node.hashval, node.key, node.keysize, + node.value, hashmap->hindex); + INSIST(result == ISC_R_SUCCESS); + + /* + * we don't increase the hiter here because the table has been reordered + * when we deleted the old node + */ +} + +static uint32_t +grow_bits(isc_hashmap_t *hashmap) { + uint32_t newbits = hashmap->tables[hashmap->hindex].hashbits + 1; + size_t newsize = HASHSIZE(newbits); + + while (hashmap->count > APPROX_40_PERCENT(newsize)) { + newbits += 1; + newsize = HASHSIZE(newbits); + } + if (newbits > HASHMAP_MAX_BITS) { + newbits = HASHMAP_MAX_BITS; + } + + return (newbits); +} + +static uint32_t +shrink_bits(isc_hashmap_t *hashmap) { + uint32_t newbits = hashmap->tables[hashmap->hindex].hashbits - 1; + + if (newbits <= HASHMAP_MIN_BITS) { + newbits = HASHMAP_MIN_BITS; + } + + return (newbits); +} + +static void +hashmap_rehash_start_grow(isc_hashmap_t *hashmap) { + uint32_t newbits; + uint8_t oldindex = hashmap->hindex; + uint32_t oldbits = hashmap->tables[oldindex].hashbits; + uint8_t newindex = hashmap_nexttable(oldindex); + + REQUIRE(!rehashing_in_progress(hashmap)); + + newbits = grow_bits(hashmap); + + if (newbits > oldbits) { + hashmap_create_table(hashmap, newindex, newbits); + hashmap->hindex = newindex; + } +} + +static void +hashmap_rehash_start_shrink(isc_hashmap_t *hashmap) { + uint32_t newbits; + uint8_t oldindex = hashmap->hindex; + uint32_t oldbits = hashmap->tables[oldindex].hashbits; + uint8_t newindex = hashmap_nexttable(oldindex); + + REQUIRE(!rehashing_in_progress(hashmap)); + + newbits = shrink_bits(hashmap); + + if (newbits < oldbits) { + hashmap_create_table(hashmap, newindex, newbits); + hashmap->hindex = newindex; + } +} + +isc_result_t +isc_hashmap_delete(isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + hashmap_node_t *node; + isc_result_t result = ISC_R_NOTFOUND; + uint32_t psl = 0; + uint8_t idx; + uint32_t hashval = (hashvalp != NULL) + ? *hashvalp + : isc_hashmap_hash(hashmap, key, keysize); + + if (rehashing_in_progress(hashmap)) { + hashmap_rehash_one(hashmap); + } else if (under_threshold(hashmap)) { + hashmap_rehash_start_shrink(hashmap); + hashmap_rehash_one(hashmap); + } + + /* Initialize idx after possible shrink start */ + idx = hashmap->hindex; + + node = hashmap_find(hashmap, hashval, key, keysize, &psl, &idx); + if (node != NULL) { + INSIST(node->key != NULL); + hashmap_delete_node(hashmap, node, hashval, psl, idx); + result = ISC_R_SUCCESS; + } + + return (result); +} + +static bool +over_threshold(isc_hashmap_t *hashmap) { + uint32_t bits = hashmap->tables[hashmap->hindex].hashbits; + if (bits == HASHMAP_MAX_BITS) { + return (false); + } + size_t threshold = APPROX_90_PERCENT(HASHSIZE(bits)); + return (hashmap->count > threshold); +} + +static bool +under_threshold(isc_hashmap_t *hashmap) { + uint32_t bits = hashmap->tables[hashmap->hindex].hashbits; + if (bits == HASHMAP_MIN_BITS) { + return (false); + } + size_t threshold = APPROX_20_PERCENT(HASHSIZE(bits)); + return (hashmap->count < threshold); +} + +static isc_result_t +hashmap_add(isc_hashmap_t *hashmap, const uint32_t hashval, const uint8_t *key, + const uint32_t keysize, void *value, uint8_t idx) { + uint32_t hash; + uint32_t psl = 0; + hashmap_node_t node; + hashmap_node_t *current = NULL; + uint32_t pos; + + hash = isc_hash_bits32(hashval, hashmap->tables[idx].hashbits); + + /* Initialize the node to be store to 'node' */ + hashmap_node_init(&node, hashval, key, keysize, value); + + psl = 0; + while (true) { + pos = (hash + psl) & hashmap->tables[idx].hashmask; + + current = &hashmap->tables[idx].table[pos]; + + /* Found an empty node */ + if (current->key == NULL) { + break; + } + + if (hashmap_match(current, hashval, key, keysize, + hashmap->case_sensitive)) { + return (ISC_R_EXISTS); + } + /* Found rich node */ + if (node.psl > current->psl) { + /* Swap the poor with the rich node */ + ISC_SWAP(*current, node); + } + + node.psl++; + psl++; + } + + /* + * Possible optimalization - start growing when the poor node is too far + */ +#if ISC_HASHMAP_GROW_FAST + if (psl > hashmap->hashbits[idx]) { + if (!rehashing_in_progress(hashmap)) { + hashmap_rehash_start_grow(hashmap); + } + } +#endif + + hashmap->count++; + + /* We found an empty place, store entry into current node */ + *current = node; + + return (ISC_R_SUCCESS); +} + +isc_result_t +isc_hashmap_add(isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize, void *value) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + isc_result_t result; + uint32_t hashval = (hashvalp != NULL) + ? *hashvalp + : isc_hashmap_hash(hashmap, key, keysize); + + if (rehashing_in_progress(hashmap)) { + hashmap_rehash_one(hashmap); + } else if (over_threshold(hashmap)) { + hashmap_rehash_start_grow(hashmap); + hashmap_rehash_one(hashmap); + } + + if (rehashing_in_progress(hashmap)) { + uint8_t fidx = hashmap_nexttable(hashmap->hindex); + uint32_t psl; + + /* Look for the value in the old table */ + if (hashmap_find(hashmap, hashval, key, keysize, &psl, &fidx)) { + return (ISC_R_EXISTS); + } + } + + result = hashmap_add(hashmap, hashval, key, keysize, value, + hashmap->hindex); + switch (result) { + case ISC_R_SUCCESS: + case ISC_R_EXISTS: + return (result); + default: + UNREACHABLE(); + } +} + +void +isc_hashmap_iter_create(isc_hashmap_t *hashmap, isc_hashmap_iter_t **iterp) { + isc_hashmap_iter_t *iter; + + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(iterp != NULL && *iterp == NULL); + + iter = isc_mem_get(hashmap->mctx, sizeof(*iter)); + *iter = (isc_hashmap_iter_t){ + .hashmap = hashmap, + .hindex = hashmap->hindex, + }; + + *iterp = iter; +} + +void +isc_hashmap_iter_destroy(isc_hashmap_iter_t **iterp) { + isc_hashmap_iter_t *iter; + isc_hashmap_t *hashmap; + + REQUIRE(iterp != NULL && *iterp != NULL); + + iter = *iterp; + *iterp = NULL; + hashmap = iter->hashmap; + isc_mem_put(hashmap->mctx, iter, sizeof(*iter)); +} + +static isc_result_t +isc__hashmap_iter_next(isc_hashmap_iter_t *iter) { + isc_hashmap_t *hashmap = iter->hashmap; + + while (iter->i < hashmap->tables[iter->hindex].size && + hashmap->tables[iter->hindex].table[iter->i].key == NULL) + { + iter->i++; + } + + if (iter->i < hashmap->tables[iter->hindex].size) { + iter->cur = &hashmap->tables[iter->hindex].table[iter->i]; + + return (ISC_R_SUCCESS); + } + + if (try_nexttable(hashmap, iter->hindex)) { + iter->hindex = hashmap_nexttable(iter->hindex); + iter->i = 0; + return (isc__hashmap_iter_next(iter)); + } + + return (ISC_R_NOMORE); +} + +isc_result_t +isc_hashmap_iter_first(isc_hashmap_iter_t *iter) { + REQUIRE(iter != NULL); + + iter->hindex = iter->hashmap->hindex; + iter->i = 0; + + return (isc__hashmap_iter_next(iter)); +} + +isc_result_t +isc_hashmap_iter_next(isc_hashmap_iter_t *iter) { + REQUIRE(iter != NULL); + REQUIRE(iter->cur != NULL); + + iter->i++; + + return (isc__hashmap_iter_next(iter)); +} + +isc_result_t +isc_hashmap_iter_delcurrent_next(isc_hashmap_iter_t *iter) { + REQUIRE(iter != NULL); + REQUIRE(iter->cur != NULL); + + hashmap_node_t *node = + &iter->hashmap->tables[iter->hindex].table[iter->i]; + + hashmap_delete_node(iter->hashmap, node, node->hashval, node->psl, + iter->hindex); + + return (isc__hashmap_iter_next(iter)); +} + +void +isc_hashmap_iter_current(isc_hashmap_iter_t *it, void **valuep) { + REQUIRE(it != NULL); + REQUIRE(it->cur != NULL); + REQUIRE(valuep != NULL && *valuep == NULL); + + *valuep = it->cur->value; +} + +void +isc_hashmap_iter_currentkey(isc_hashmap_iter_t *it, const unsigned char **key, + size_t *keysize) { + REQUIRE(it != NULL); + REQUIRE(it->cur != NULL); + REQUIRE(key != NULL && *key == NULL); + + *key = it->cur->key; + *keysize = it->cur->keysize; +} + +unsigned int +isc_hashmap_count(isc_hashmap_t *hashmap) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + + return (hashmap->count); +} diff --git a/lib/isc/include/isc/hashmap.h b/lib/isc/include/isc/hashmap.h new file mode 100644 index 00000000000..8af0c6b7e31 --- /dev/null +++ b/lib/isc/include/isc/hashmap.h @@ -0,0 +1,198 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/* ! \file */ + +#pragma once + +#include +#include + +#include +#include + +typedef struct isc_hashmap isc_hashmap_t; +typedef struct isc_hashmap_iter isc_hashmap_iter_t; + +enum { ISC_HASHMAP_CASE_SENSITIVE = 0x00, ISC_HASHMAP_CASE_INSENSITIVE = 0x01 }; + +/*% + * Create hashmap at *hashmapp, using memory context and size of (1<=1 and 'bits' <=32 + * + */ +void +isc_hashmap_create(isc_mem_t *mctx, uint8_t bits, unsigned int options, + isc_hashmap_t **hashmapp); + +/*% + * Destroy hashmap, freeing everything + * + * Requires: + * \li '*hashmapp' is valid hashmap + */ +void +isc_hashmap_destroy(isc_hashmap_t **hashmapp); + +/*% + * Return current hashed value for 'key' of size 'keysize'; + */ +uint32_t +isc_hashmap_hash(const isc_hashmap_t *hashmap, const void *key, + uint32_t keysize); + +/*% + * Add a node to hashmap, pointed by binary key 'key' of size 'keysize'; + * set its value to 'value' + * + * Requires: + * \li 'hashmap' is a valid hashmap + * \li 'hashval' is optional precomputed hash value of 'key' + * \li 'key' is non-null key of size 'keysize' + * + * Returns: + * \li #ISC_R_EXISTS -- node of the same key already exists + * \li #ISC_R_SUCCESS -- all is well. + */ +isc_result_t +isc_hashmap_add(isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize, void *value); + +/*% + * Find a node matching 'key'/'keysize' in hashmap 'hashmap'; + * if found, set '*valuep' to its value. (If 'valuep' is NULL, + * then simply return SUCCESS or NOTFOUND to indicate whether the + * key exists in the hashmap.) + * + * Requires: + * \li 'hashmap' is a valid hashmap + * \li 'hashval' is optional precomputed hash value of 'key' + * \li 'key' is non-null key of size 'keysize' + * + * Returns: + * \li #ISC_R_SUCCESS -- success + * \li #ISC_R_NOTFOUND -- key not found + */ +isc_result_t +isc_hashmap_find(const isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize, void **valuep); + +/*% + * Delete node from hashmap + * + * Requires: + * \li 'hashmap' is a valid hashmap + * \li 'hashval' is optional precomputed hash value of 'key' + * \li 'key' is non-null key of size 'keysize' + * + * Returns: + * \li #ISC_R_NOTFOUND -- key not found + * \li #ISC_R_SUCCESS -- all is well + */ +isc_result_t +isc_hashmap_delete(isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize); + +/*% + * Create an iterator for the hashmap; point '*itp' to it. + * + * Requires: + * \li 'hashmap' is a valid hashmap + * \li 'itp' is non NULL and '*itp' is NULL. + */ +void +isc_hashmap_iter_create(isc_hashmap_t *hashmap, isc_hashmap_iter_t **itp); + +/*% + * Destroy the iterator '*itp', set it to NULL + * + * Requires: + * \li 'itp' is non NULL and '*itp' is non NULL. + */ +void +isc_hashmap_iter_destroy(isc_hashmap_iter_t **itp); + +/*% + * Set an iterator to the first entry. + * + * Requires: + * \li 'it' is non NULL. + * + * Returns: + * \li #ISC_R_SUCCESS -- success + * \li #ISC_R_NOMORE -- no data in the hashmap + */ +isc_result_t +isc_hashmap_iter_first(isc_hashmap_iter_t *it); + +/*% + * Set an iterator to the next entry. + * + * Requires: + * \li 'it' is non NULL. + * + * Returns: + * \li #ISC_R_SUCCESS -- success + * \li #ISC_R_NOMORE -- end of hashmap reached + */ +isc_result_t +isc_hashmap_iter_next(isc_hashmap_iter_t *it); + +/*% + * Delete current entry and set an iterator to the next entry. + * + * Requires: + * \li 'it' is non NULL. + * + * Returns: + * \li #ISC_R_SUCCESS -- success + * \li #ISC_R_NOMORE -- end of hashmap reached + */ +isc_result_t +isc_hashmap_iter_delcurrent_next(isc_hashmap_iter_t *it); + +/*% + * Set 'value' to the current value under the iterator + * + * Requires: + * \li 'it' is non NULL. + * \li 'valuep' is non NULL and '*valuep' is NULL. + */ +void +isc_hashmap_iter_current(isc_hashmap_iter_t *it, void **valuep); + +/*% + * Set 'key' and 'keysize to the current key and keysize for the value + * under the iterator + * + * Requires: + * \li 'it' is non NULL. + * \li 'key' is non NULL and '*key' is NULL. + * \li 'keysize' is non NULL. + */ +void +isc_hashmap_iter_currentkey(isc_hashmap_iter_t *it, const unsigned char **key, + size_t *keysize); + +/*% + * Returns the number of items in the hashmap. + * + * Requires: + * \li 'hashmap' is a valid hashmap + */ +unsigned int +isc_hashmap_count(isc_hashmap_t *hashmap); diff --git a/tests/isc/Makefile.am b/tests/isc/Makefile.am index e6a5d46ac05..4a55ec2abac 100644 --- a/tests/isc/Makefile.am +++ b/tests/isc/Makefile.am @@ -19,6 +19,7 @@ check_PROGRAMS = \ errno_test \ file_test \ hash_test \ + hashmap_test \ heap_test \ hmac_test \ ht_test \ diff --git a/tests/isc/hashmap_test.c b/tests/isc/hashmap_test.c new file mode 100644 index 00000000000..334ed8d64f4 --- /dev/null +++ b/tests/isc/hashmap_test.c @@ -0,0 +1,413 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include /* IWYU pragma: keep */ +#include +#include +#include +#include +#include +#include + +#define UNIT_TESTING +#include + +#include +#include +#include +#include +#include +#include + +#include + +/* INCLUDE LAST */ + +#define mctx __mctx +#include "hashmap.c" +#undef mctx + +typedef struct test_node { + uint32_t hashval; + char key[64]; +} test_node_t; + +static void +test_hashmap_full(uint8_t init_bits, uintptr_t count) { + isc_hashmap_t *hashmap = NULL; + isc_result_t result; + test_node_t *nodes, *long_nodes, *upper_nodes; + + nodes = isc_mem_get(mctx, count * sizeof(nodes[0])); + long_nodes = isc_mem_get(mctx, count * sizeof(nodes[0])); + upper_nodes = isc_mem_get(mctx, count * sizeof(nodes[0])); + + isc_hashmap_create(mctx, init_bits, ISC_HASHMAP_CASE_SENSITIVE, + &hashmap); + assert_non_null(hashmap); + + /* + * Note: snprintf() is followed with strlcat() + * to ensure we are always filling the 16 byte key. + */ + for (size_t i = 0; i < count; i++) { + /* short keys */ + snprintf(nodes[i].key, 16, "%u", (unsigned int)i); + strlcat(nodes[i].key, " key of a raw hashmap!!", 16); + + /* long keys */ + snprintf(long_nodes[i].key, sizeof(long_nodes[i].key), "%u", + (unsigned int)i); + strlcat(long_nodes[i].key, " key of a raw hashmap!!", + sizeof(long_nodes[i].key)); + + /* (some) uppercase keys */ + snprintf(upper_nodes[i].key, 16, "%u", (unsigned int)i); + strlcat(upper_nodes[i].key, " KEY of a raw hashmap!!", 16); + } + + /* insert short nodes */ + for (size_t i = 0; i < count; i++) { + nodes[i].hashval = isc_hashmap_hash(hashmap, nodes[i].key, 16); + result = isc_hashmap_add(hashmap, &(nodes[i]).hashval, + nodes[i].key, 16, &nodes[i]); + assert_int_equal(result, ISC_R_SUCCESS); + } + + /* check if the short nodes were insert */ + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find(hashmap, &(nodes[i]).hashval, + nodes[i].key, 16, &f); + assert_int_equal(result, ISC_R_SUCCESS); + assert_ptr_equal(&nodes[i], f); + } + + /* check for double inserts */ + for (size_t i = 0; i < count; i++) { + result = isc_hashmap_add(hashmap, NULL, nodes[i].key, 16, + &nodes[i]); + assert_int_equal(result, ISC_R_EXISTS); + } + + for (size_t i = 0; i < count; i++) { + result = + isc_hashmap_add(hashmap, NULL, long_nodes[i].key, + strlen((const char *)long_nodes[i].key), + &long_nodes[i]); + assert_int_equal(result, ISC_R_SUCCESS); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find(hashmap, NULL, upper_nodes[i].key, 16, + &f); + assert_int_equal(result, ISC_R_NOTFOUND); + assert_null(f); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find( + hashmap, NULL, long_nodes[i].key, + strlen((const char *)long_nodes[i].key), &f); + assert_int_equal(result, ISC_R_SUCCESS); + assert_ptr_equal(f, &long_nodes[i]); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_delete(hashmap, &nodes[i].hashval, + nodes[i].key, 16); + assert_int_equal(result, ISC_R_SUCCESS); + result = isc_hashmap_find(hashmap, NULL, nodes[i].key, 16, &f); + assert_int_equal(result, ISC_R_NOTFOUND); + assert_null(f); + } + + for (size_t i = 0; i < count; i++) { + result = isc_hashmap_add(hashmap, NULL, upper_nodes[i].key, 16, + &upper_nodes[i]); + assert_int_equal(result, ISC_R_SUCCESS); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_delete( + hashmap, NULL, long_nodes[i].key, + strlen((const char *)long_nodes[i].key)); + assert_int_equal(result, ISC_R_SUCCESS); + result = isc_hashmap_find( + hashmap, NULL, long_nodes[i].key, + strlen((const char *)long_nodes[i].key), &f); + assert_int_equal(result, ISC_R_NOTFOUND); + assert_null(f); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find(hashmap, NULL, upper_nodes[i].key, 16, + &f); + assert_int_equal(result, ISC_R_SUCCESS); + assert_ptr_equal(f, &upper_nodes[i]); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find(hashmap, NULL, nodes[i].key, 16, &f); + assert_int_equal(result, ISC_R_NOTFOUND); + assert_null(f); + } + + isc_hashmap_destroy(&hashmap); + assert_null(hashmap); + + isc_mem_put(mctx, nodes, count * sizeof(nodes[0])); + isc_mem_put(mctx, long_nodes, count * sizeof(nodes[0])); + isc_mem_put(mctx, upper_nodes, count * sizeof(nodes[0])); +} + +static void +test_hashmap_iterator(void) { + isc_hashmap_t *hashmap = NULL; + isc_result_t result; + isc_hashmap_iter_t *iter = NULL; + size_t count = 7600; + uint32_t walked; + size_t tksize; + test_node_t *nodes; + + nodes = isc_mem_get(mctx, count * sizeof(nodes[0])); + + isc_hashmap_create(mctx, HASHMAP_MIN_BITS, ISC_HASHMAP_CASE_SENSITIVE, + &hashmap); + assert_non_null(hashmap); + + for (size_t i = 0; i < count; i++) { + /* short keys */ + snprintf(nodes[i].key, 16, "%u", (unsigned int)i); + strlcat(nodes[i].key, " key of a raw hashmap!!", 16); + } + + for (size_t i = 0; i < count; i++) { + result = isc_hashmap_add(hashmap, NULL, nodes[i].key, 16, + &nodes[i]); + assert_int_equal(result, ISC_R_SUCCESS); + } + + /* We want to iterate while rehashing is in progress */ + assert_true(rehashing_in_progress(hashmap)); + + walked = 0; + isc_hashmap_iter_create(hashmap, &iter); + + for (result = isc_hashmap_iter_first(iter); result == ISC_R_SUCCESS; + result = isc_hashmap_iter_next(iter)) + { + char key[16] = { 0 }; + ptrdiff_t i; + const uint8_t *tkey = NULL; + test_node_t *v = NULL; + + isc_hashmap_iter_current(iter, (void *)&v); + isc_hashmap_iter_currentkey(iter, &tkey, &tksize); + assert_int_equal(tksize, 16); + + i = v - &nodes[0]; + + snprintf(key, 16, "%u", (unsigned int)i); + strlcat(key, " key of a raw hashmap!!", 16); + + assert_memory_equal(key, tkey, 16); + + walked++; + } + assert_int_equal(walked, count); + assert_int_equal(result, ISC_R_NOMORE); + + /* erase odd */ + walked = 0; + result = isc_hashmap_iter_first(iter); + while (result == ISC_R_SUCCESS) { + char key[16] = { 0 }; + ptrdiff_t i; + const uint8_t *tkey = NULL; + test_node_t *v = NULL; + + isc_hashmap_iter_current(iter, (void *)&v); + isc_hashmap_iter_currentkey(iter, &tkey, &tksize); + assert_int_equal(tksize, 16); + + i = v - nodes; + snprintf(key, 16, "%u", (unsigned int)i); + strlcat(key, " key of a raw hashmap!!", 16); + assert_memory_equal(key, tkey, 16); + + if (i % 2 == 0) { + result = isc_hashmap_iter_delcurrent_next(iter); + } else { + result = isc_hashmap_iter_next(iter); + } + walked++; + } + assert_int_equal(result, ISC_R_NOMORE); + assert_int_equal(walked, count); + + /* erase even */ + walked = 0; + result = isc_hashmap_iter_first(iter); + while (result == ISC_R_SUCCESS) { + char key[16] = { 0 }; + ptrdiff_t i; + const uint8_t *tkey = NULL; + test_node_t *v = NULL; + + isc_hashmap_iter_current(iter, (void *)&v); + isc_hashmap_iter_currentkey(iter, &tkey, &tksize); + assert_int_equal(tksize, 16); + + i = v - nodes; + snprintf(key, 16, "%u", (unsigned int)i); + strlcat(key, " key of a raw hashmap!!", 16); + assert_memory_equal(key, tkey, 16); + + if (i % 2 == 1) { + result = isc_hashmap_iter_delcurrent_next(iter); + } else { + result = isc_hashmap_iter_next(iter); + } + walked++; + } + assert_int_equal(result, ISC_R_NOMORE); + assert_int_equal(walked, count / 2); + + walked = 0; + for (result = isc_hashmap_iter_first(iter); result == ISC_R_SUCCESS; + result = isc_hashmap_iter_next(iter)) + { + walked++; + } + + assert_int_equal(result, ISC_R_NOMORE); + assert_int_equal(walked, 0); + + /* Iterator doesn't progress rehashing */ + assert_true(rehashing_in_progress(hashmap)); + + isc_hashmap_iter_destroy(&iter); + assert_null(iter); + + isc_hashmap_destroy(&hashmap); + assert_null(hashmap); + + isc_mem_put(mctx, nodes, count * sizeof(nodes[0])); +} + +/* 1 bit, 120 elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_1_120) { + test_hashmap_full(1, 120); + return; +} + +/* 6 bit, 1000 elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_6_1000) { + test_hashmap_full(6, 1000); + return; +} + +/* 24 bit, 200K elements test, no rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_24_200000) { + test_hashmap_full(24, 200000); + return; +} + +/* 15 bit, 45K elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_1_48000) { + test_hashmap_full(1, 48000); + return; +} + +/* 8 bit, 20k elements test, partial rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_8_20000) { + test_hashmap_full(8, 20000); + return; +} + +/* test hashmap iterator */ + +ISC_RUN_TEST_IMPL(isc_hashmap_iterator) { + test_hashmap_iterator(); + return; +} + +ISC_RUN_TEST_IMPL(isc_hashmap_case) { + isc_result_t result; + isc_hashmap_t *hashmap = NULL; + test_node_t lower = { .key = "isc_hashmap_case" }; + test_node_t upper = { .key = "ISC_HASHMAP_CASE" }; + test_node_t mixed = { .key = "IsC_hAsHmAp_CaSe" }; + test_node_t *value; + + isc_hashmap_create(mctx, 1, ISC_HASHMAP_CASE_SENSITIVE, &hashmap); + + result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key), + &lower); + assert_int_equal(result, ISC_R_SUCCESS); + + result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key), + &lower); + assert_int_equal(result, ISC_R_EXISTS); + + result = isc_hashmap_add(hashmap, NULL, upper.key, strlen(upper.key), + &upper); + assert_int_equal(result, ISC_R_SUCCESS); + + result = isc_hashmap_find(hashmap, NULL, mixed.key, strlen(mixed.key), + (void *)&value); + assert_int_equal(result, ISC_R_NOTFOUND); + + isc_hashmap_destroy(&hashmap); + + isc_hashmap_create(mctx, 1, ISC_HASHMAP_CASE_INSENSITIVE, &hashmap); + + result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key), + &lower); + assert_int_equal(result, ISC_R_SUCCESS); + + result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key), + &lower); + assert_int_equal(result, ISC_R_EXISTS); + + result = isc_hashmap_add(hashmap, NULL, upper.key, strlen(upper.key), + &upper); + assert_int_equal(result, ISC_R_EXISTS); + + result = isc_hashmap_find(hashmap, NULL, mixed.key, strlen(mixed.key), + (void *)&value); + assert_int_equal(result, ISC_R_SUCCESS); + + isc_hashmap_destroy(&hashmap); +} + +ISC_TEST_LIST_START +ISC_TEST_ENTRY(isc_hashmap_case) +ISC_TEST_ENTRY(isc_hashmap_1_120) +ISC_TEST_ENTRY(isc_hashmap_6_1000) +ISC_TEST_ENTRY(isc_hashmap_24_200000) +ISC_TEST_ENTRY(isc_hashmap_1_48000) +ISC_TEST_ENTRY(isc_hashmap_8_20000) +ISC_TEST_ENTRY(isc_hashmap_iterator) +ISC_TEST_LIST_END + +ISC_TEST_MAIN diff --git a/tests/isc/ht_test.c b/tests/isc/ht_test.c index 152994e47db..eec36f53c0d 100644 --- a/tests/isc/ht_test.c +++ b/tests/isc/ht_test.c @@ -39,7 +39,7 @@ #undef mctx static void -test_ht_full(uint8_t init_bits, uint8_t finish_bits, uintptr_t count) { +test_ht_full(uint8_t init_bits, uintptr_t count) { isc_ht_t *ht = NULL; isc_result_t result; uintptr_t i; @@ -174,8 +174,6 @@ test_ht_full(uint8_t init_bits, uint8_t finish_bits, uintptr_t count) { assert_null(f); } - assert_int_equal(ht->hashbits[ht->hindex], finish_bits); - isc_ht_destroy(&ht); assert_null(ht); } @@ -186,7 +184,7 @@ test_ht_iterator(void) { isc_result_t result; isc_ht_iter_t *iter = NULL; uintptr_t i; - uintptr_t count = 6300; + uintptr_t count = 7600; uint32_t walked; unsigned char key[16]; size_t tksize; @@ -296,22 +294,34 @@ test_ht_iterator(void) { assert_null(ht); } +/* 1 bit, 120 elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_ht_1_120) { + test_ht_full(1, 120); + return; +} + +/* 6 bit, 1000 elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_ht_6_1000) { + test_ht_full(6, 1000); + return; +} + /* 24 bit, 200K elements test, no rehashing */ -ISC_RUN_TEST_IMPL(isc_ht_24) { +ISC_RUN_TEST_IMPL(isc_ht_24_200000) { UNUSED(state); - test_ht_full(24, 24, 200000); + test_ht_full(24, 200000); } /* 15 bit, 45K elements test, full rehashing */ -ISC_RUN_TEST_IMPL(isc_ht_15) { +ISC_RUN_TEST_IMPL(isc_ht_1_48000) { UNUSED(state); - test_ht_full(1, 15, 48000); + test_ht_full(1, 48000); } /* 8 bit, 20k elements test, partial rehashing */ -ISC_RUN_TEST_IMPL(isc_ht_8) { +ISC_RUN_TEST_IMPL(isc_ht_8_20000) { UNUSED(state); - test_ht_full(8, 14, 20000); + test_ht_full(8, 20000); } /* test hashtable iterator */ @@ -322,9 +332,11 @@ ISC_RUN_TEST_IMPL(isc_ht_iterator) { } ISC_TEST_LIST_START -ISC_TEST_ENTRY(isc_ht_24) -ISC_TEST_ENTRY(isc_ht_15) -ISC_TEST_ENTRY(isc_ht_8) +ISC_TEST_ENTRY(isc_ht_1_120) +ISC_TEST_ENTRY(isc_ht_6_1000) +ISC_TEST_ENTRY(isc_ht_24_200000) +ISC_TEST_ENTRY(isc_ht_1_48000) +ISC_TEST_ENTRY(isc_ht_8_20000) ISC_TEST_ENTRY(isc_ht_iterator) ISC_TEST_LIST_END