From c0233a35da819d5f4f7dc4b7f0ee040472f17127 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Mon, 12 Aug 2024 14:06:12 +0200 Subject: [PATCH] hash: provide asserts to verify API use - converted the Curl_hash_count() macro to a function - Discourage accessing struct fields directly - Document the internal API in HASH.md Closes #14503 --- docs/HASH.md | 188 ++++++++++++++++++++++++++++++++++++++++++ docs/Makefile.am | 1 + lib/conncache.c | 5 +- lib/hash.c | 27 +++++- lib/hash.h | 12 ++- lib/hostip.c | 3 +- lib/multi.c | 17 ++-- lib/share.c | 7 +- tests/unit/unit1609.c | 1 - 9 files changed, 246 insertions(+), 15 deletions(-) create mode 100644 docs/HASH.md diff --git a/docs/HASH.md b/docs/HASH.md new file mode 100644 index 0000000000..5a5bdc9838 --- /dev/null +++ b/docs/HASH.md @@ -0,0 +1,188 @@ + + +# `hash` + + #include "hash.h" + +This is the internal module for doing hash tables. A hash table uses a hash +function to compute an index. On each index there is a separate linked list of +entries. + +Create a hash table. Add items. Retrieve items. Remove items. Destroy table. + +## `Curl_hash_init` + +~~~c +void Curl_hash_init(struct Curl_hash *h, + size_t slots, + hash_function hfunc, + comp_function comparator, + Curl_hash_dtor dtor); +~~~ + +The call initializes a `struct Curl_hash`. + +- `slots` is the number of entries to create in the hash table. Larger is + better (faster lookups) but also uses more memory. +- `hfunc` is a function pointer to a function that returns a `size_t` value as + a checksum for an entry in this hash table. Ideally, it returns a unique + value for every entry ever added to the hash table, but hash collisions are + handled. +- `comparator` is a function pointer to a function that compares two hash + table entries. It should return non-zero if the compared items are + identical. +- `dtor` is a function pointer to a destructor called when an entry is removed + from the table + +## `Curl_hash_add` + +~~~c +void * +Curl_hash_add(struct Curl_hash *h, void *key, size_t key_len, void *p) +~~~ + +This call adds an entry to the hash. `key` points to the hash key and +`key_len` is the length of the hash key. `p` is a custom pointer. + +If there already was a match in the hash, that data is replaced with this new +entry. + +This function also lazily allocates the table if needed, as it is not done in +the `Curl_hash_init` function. + +Returns NULL on error, otherwise it returns a pointer to `p`. + +## `Curl_hash_add2` + +~~~c +void *Curl_hash_add2(struct Curl_hash *h, void *key, size_t key_len, void *p, + Curl_hash_elem_dtor dtor) +~~~ + +This works like `Curl_hash_add` but has an extra argument: `dtor`, which is a +destructor call for this specific entry. When this entry is removed, this +function is called instead of the function stored for the whole hash table. + +## `Curl_hash_delete` + +~~~c +int Curl_hash_delete(struct Curl_hash *h, void *key, size_t key_len); +~~~ + +This function removes an entry from the hash table. If successful, it returns +zero. If the entry was not found, it returns 1. + +## `Curl_hash_pick` + +~~~c +void *Curl_hash_pick(struct Curl_hash *h, void *key, size_t key_len); +~~~ + +If there is an entry in the hash that matches the given `key` with size of +`key_len`, that its custom pointer is returned. The pointer that was called +`p` when the entry was added. + +It returns NULL if there is no matching entry in the hash. + +## `Curl_hash_destroy` + +~~~c +void Curl_hash_destroy(struct Curl_hash *h); +~~~ + +This function destroys a hash and cleanups up all its related data. Calling it +multiple times is fine. + +## `Curl_hash_clean` + +~~~c +void Curl_hash_clean(struct Curl_hash *h); +~~~ + +This function removes all the entries in the given hash. + +## `Curl_hash_clean_with_criterium` + +~~~c +void +Curl_hash_clean_with_criterium(struct Curl_hash *h, void *user, + int (*comp)(void *, void *)) +~~~ + +This function removes all the entries in the given hash that matches the +criterion. The provided `comp` function determines if the criteria is met by +returning non-zero. + +## `Curl_hash_count` + +~~~c +size_t Curl_hash_count(struct Curl_hash *h) +~~~ + +Returns the number of entries stored in the hash. + +## `Curl_hash_start_iterate` + +~~~c +void Curl_hash_start_iterate(struct Curl_hash *hash, + struct Curl_hash_iterator *iter): +~~~ + +This function initializes a `struct Curl_hash_iterator` that `iter` points to. +It can then be used to iterate over all the entries in the hash. + +## `Curl_hash_next_element` + +~~~c +struct Curl_hash_element * +Curl_hash_next_element(struct Curl_hash_iterator *iter); +~~~ + +Given the iterator `iter`, this function returns a pointer to the next hash +entry if there is one, or NULL if there is no more entries. + +Called repeatedly, it iterates over all the entries in the hash table. + +Note: it only guarantees functionality if the hash table remains untouched +during its iteration. + +# `curl_off_t` dedicated hash functions + +## `Curl_hash_offt_init` + +~~~c +void Curl_hash_offt_init(struct Curl_hash *h, + size_t slots, + Curl_hash_dtor dtor); +~~~ + +Initializes a hash table for `curl_off_t` values. Pass in desired number of +`slots` and `dtor` function. + +## `Curl_hash_offt_set` + +~~~c +void *Curl_hash_offt_set(struct Curl_hash *h, curl_off_t id, void *elem); +~~~ + +Associate a custom `elem` pointer with the given `id`. + +## `Curl_hash_offt_remove` + +~~~c +int Curl_hash_offt_remove(struct Curl_hash *h, curl_off_t id); +~~~ + +Remove the `id` from the hash. + +## `Curl_hash_offt_get` + +~~~c +void *Curl_hash_offt_get(struct Curl_hash *h, curl_off_t id); +~~~ + +Get the pointer associated with the specified `id`. diff --git a/docs/Makefile.am b/docs/Makefile.am index f37e0d629d..7f9dbc3bdd 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -70,6 +70,7 @@ EXTRA_DIST = \ FAQ \ FEATURES.md \ GOVERNANCE.md \ + HASH.md \ HELP-US.md \ HISTORY.md \ HSTS.md \ diff --git a/lib/conncache.c b/lib/conncache.c index e0a15cf32c..3591b4f8ef 100644 --- a/lib/conncache.c +++ b/lib/conncache.c @@ -123,6 +123,9 @@ static void free_bundle_hash_entry(void *freethis) int Curl_conncache_init(struct conncache *connc, struct Curl_multi *multi, size_t size) { + Curl_hash_init(&connc->hash, size, Curl_hash_str, + Curl_str_key_compare, free_bundle_hash_entry); + /* allocate a new easy handle to use when closing cached connections */ connc->closure_handle = curl_easy_init(); if(!connc->closure_handle) @@ -133,8 +136,6 @@ int Curl_conncache_init(struct conncache *connc, connc->closure_handle->set.verbose = true; #endif - Curl_hash_init(&connc->hash, size, Curl_hash_str, - Curl_str_key_compare, free_bundle_hash_entry); connc->closure_handle->state.conn_cache = connc; connc->multi = multi; Curl_llist_init(&connc->shutdowns.conn_list, NULL); diff --git a/lib/hash.c b/lib/hash.c index be36704eb2..1910ac5dc4 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -33,6 +33,10 @@ /* The last #include file should be: */ #include "memdebug.h" +/* random patterns for API verification */ +#define HASHINIT 0x7017e781 +#define ITERINIT 0x5FEDCBA9 + static void hash_element_dtor(void *user, void *element) { @@ -77,6 +81,9 @@ Curl_hash_init(struct Curl_hash *h, h->dtor = dtor; h->size = 0; h->slots = slots; +#ifdef DEBUGBUILD + h->init = HASHINIT; +#endif } static struct Curl_hash_element * @@ -107,6 +114,7 @@ void *Curl_hash_add2(struct Curl_hash *h, void *key, size_t key_len, void *p, DEBUGASSERT(h); DEBUGASSERT(h->slots); + DEBUGASSERT(h->init == HASHINIT); if(!h->table) { size_t i; h->table = malloc(h->slots * sizeof(struct Curl_llist)); @@ -160,6 +168,7 @@ int Curl_hash_delete(struct Curl_hash *h, void *key, size_t key_len) { DEBUGASSERT(h); DEBUGASSERT(h->slots); + DEBUGASSERT(h->init == HASHINIT); if(h->table) { struct Curl_llist_node *le; struct Curl_llist *l = FETCH_LIST(h, key, key_len); @@ -184,6 +193,7 @@ void * Curl_hash_pick(struct Curl_hash *h, void *key, size_t key_len) { DEBUGASSERT(h); + DEBUGASSERT(h->init == HASHINIT); if(h->table) { struct Curl_llist_node *le; struct Curl_llist *l; @@ -210,6 +220,7 @@ Curl_hash_pick(struct Curl_hash *h, void *key, size_t key_len) void Curl_hash_destroy(struct Curl_hash *h) { + DEBUGASSERT(h->init == HASHINIT); if(h->table) { size_t i; for(i = 0; i < h->slots; ++i) { @@ -231,6 +242,12 @@ Curl_hash_clean(struct Curl_hash *h) Curl_hash_clean_with_criterium(h, NULL, NULL); } +size_t Curl_hash_count(struct Curl_hash *h) +{ + DEBUGASSERT(h->init == HASHINIT); + return h->size; +} + /* Cleans all entries that pass the comp function criteria. */ void Curl_hash_clean_with_criterium(struct Curl_hash *h, void *user, @@ -241,6 +258,7 @@ Curl_hash_clean_with_criterium(struct Curl_hash *h, void *user, if(!h || !h->table) return; + DEBUGASSERT(h->init == HASHINIT); for(i = 0; i < h->slots; ++i) { struct Curl_llist *list = &h->table[i]; struct Curl_llist_node *le = @@ -285,16 +303,21 @@ size_t Curl_str_key_compare(void *k1, size_t key1_len, void Curl_hash_start_iterate(struct Curl_hash *hash, struct Curl_hash_iterator *iter) { + DEBUGASSERT(hash->init == HASHINIT); iter->hash = hash; iter->slot_index = 0; iter->current_element = NULL; +#ifdef DEBUGBUILD + iter->init = ITERINIT; +#endif } struct Curl_hash_element * Curl_hash_next_element(struct Curl_hash_iterator *iter) { - struct Curl_hash *h = iter->hash; - + struct Curl_hash *h; + DEBUGASSERT(iter->init == ITERINIT); + h = iter->hash; if(!h->table) return NULL; /* empty hash, nothing to return */ diff --git a/lib/hash.h b/lib/hash.h index b0bc29be39..b160395024 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -56,6 +56,9 @@ struct Curl_hash { Curl_hash_dtor dtor; size_t slots; size_t size; +#ifdef DEBUGBUILD + int init; +#endif }; typedef void (*Curl_hash_elem_dtor)(void *key, size_t key_len, void *p); @@ -65,6 +68,9 @@ struct Curl_hash_element { void *ptr; Curl_hash_elem_dtor dtor; size_t key_len; +#ifdef DEBUGBUILD + int init; +#endif char key[1]; /* allocated memory following the struct */ }; @@ -72,6 +78,9 @@ struct Curl_hash_iterator { struct Curl_hash *hash; size_t slot_index; struct Curl_llist_node *current_element; +#ifdef DEBUGBUILD + int init; +#endif }; void Curl_hash_init(struct Curl_hash *h, @@ -85,8 +94,9 @@ void *Curl_hash_add2(struct Curl_hash *h, void *key, size_t key_len, void *p, Curl_hash_elem_dtor dtor); int Curl_hash_delete(struct Curl_hash *h, void *key, size_t key_len); void *Curl_hash_pick(struct Curl_hash *, void *key, size_t key_len); -#define Curl_hash_count(h) ((h)->size) + void Curl_hash_destroy(struct Curl_hash *h); +size_t Curl_hash_count(struct Curl_hash *h); void Curl_hash_clean(struct Curl_hash *h); void Curl_hash_clean_with_criterium(struct Curl_hash *h, void *user, int (*comp)(void *, void *)); diff --git a/lib/hostip.c b/lib/hostip.c index a4de378bcc..e37d009daa 100644 --- a/lib/hostip.c +++ b/lib/hostip.c @@ -257,7 +257,8 @@ void Curl_hostcache_prune(struct Curl_easy *data) /* if the cache size is still too big, use the oldest age as new prune limit */ - } while(timeout && (data->dns.hostcache->size > MAX_DNS_CACHE_SIZE)); + } while(timeout && + (Curl_hash_count(data->dns.hostcache) > MAX_DNS_CACHE_SIZE)); if(data->share) Curl_share_unlock(data, CURL_LOCK_DATA_DNS); diff --git a/lib/multi.c b/lib/multi.c index 8732837f3b..2e62e56b62 100644 --- a/lib/multi.c +++ b/lib/multi.c @@ -247,10 +247,8 @@ static size_t trhash(void *key, size_t key_length, size_t slots_num) static size_t trhash_compare(void *k1, size_t k1_len, void *k2, size_t k2_len) { - (void)k1_len; (void)k2_len; - - return *(struct Curl_easy **)k1 == *(struct Curl_easy **)k2; + return !memcmp(k1, k2, k1_len); } static void trhash_dtor(void *nada) @@ -2929,18 +2927,24 @@ CURLMcode Curl_multi_pollset_ev(struct Curl_multi *multi, } if(last_action && (last_action != cur_action)) { /* Socket was used already, but different action now */ - if(last_action & CURL_POLL_IN) + if(last_action & CURL_POLL_IN) { + DEBUGASSERT(entry->readers); entry->readers--; - if(last_action & CURL_POLL_OUT) + } + if(last_action & CURL_POLL_OUT) { + DEBUGASSERT(entry->writers); entry->writers--; - if(cur_action & CURL_POLL_IN) + } + if(cur_action & CURL_POLL_IN) { entry->readers++; + } if(cur_action & CURL_POLL_OUT) entry->writers++; } else if(!last_action && !Curl_hash_pick(&entry->transfers, (char *)&data, /* hash key */ sizeof(struct Curl_easy *))) { + DEBUGASSERT(entry->users < 100000); /* detect weird values */ /* a new transfer using this socket */ entry->users++; if(cur_action & CURL_POLL_IN) @@ -3002,6 +3006,7 @@ CURLMcode Curl_multi_pollset_ev(struct Curl_multi *multi, if(entry) { unsigned char oldactions = last_ps->actions[i]; /* this socket has been removed. Decrease user count */ + DEBUGASSERT(entry->users); entry->users--; if(oldactions & CURL_POLL_OUT) entry->writers--; diff --git a/lib/share.c b/lib/share.c index 164aceb874..3d0c2f5e45 100644 --- a/lib/share.c +++ b/lib/share.c @@ -224,8 +224,11 @@ curl_share_cleanup(struct Curl_share *share) return CURLSHE_IN_USE; } - Curl_conncache_close_all_connections(&share->conn_cache); - Curl_conncache_destroy(&share->conn_cache); + if(share->specifier & (1 << CURL_LOCK_DATA_CONNECT)) { + /* avoid the hash if it was never initialized */ + Curl_conncache_close_all_connections(&share->conn_cache); + Curl_conncache_destroy(&share->conn_cache); + } Curl_hash_destroy(&share->hostcache); #if !defined(CURL_DISABLE_HTTP) && !defined(CURL_DISABLE_COOKIES) diff --git a/tests/unit/unit1609.c b/tests/unit/unit1609.c index 19c59ada3e..c902243f59 100644 --- a/tests/unit/unit1609.c +++ b/tests/unit/unit1609.c @@ -200,7 +200,6 @@ UNITTEST_START curl_easy_cleanup(easy); easy = NULL; - Curl_hash_destroy(&multi->hostcache); curl_multi_cleanup(multi); multi = NULL; curl_slist_free_all(list); -- 2.47.3