1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
8 #if HAVE_VALGRIND_VALGRIND_H
9 # include <valgrind/valgrind.h>
12 #include "alloc-util.h"
15 #include "logarithm.h"
17 #include "memory-util.h"
19 #include "missing_syscall.h"
20 #include "process-util.h"
21 #include "random-util.h"
23 #include "siphash24.h"
24 #include "sort-util.h"
25 #include "string-util.h"
28 #if ENABLE_DEBUG_HASHMAP
33 * Implementation of hashmaps.
35 * - uses less RAM compared to closed addressing (chaining), because
36 * our entries are small (especially in Sets, which tend to contain
37 * the majority of entries in systemd).
38 * Collision resolution: Robin Hood
39 * - tends to equalize displacement of entries from their optimal buckets.
40 * Probe sequence: linear
41 * - though theoretically worse than random probing/uniform hashing/double
42 * hashing, it is good for cache locality.
45 * Celis, P. 1986. Robin Hood Hashing.
46 * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
47 * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
48 * - The results are derived for random probing. Suggests deletion with
49 * tombstones and two mean-centered search methods. None of that works
50 * well for linear probing.
52 * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
53 * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
54 * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
55 * http://www.math.uu.se/~svante/papers/sj157.pdf
56 * - Applies to Robin Hood with linear probing. Contains remarks on
57 * the unsuitability of mean-centered search with linear probing.
59 * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
60 * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
61 * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
62 * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
63 * in a successful search), and Janson writes about displacement. C = d + 1.
65 * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
66 * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
67 * - Explanation of backward shift deletion with pictures.
69 * Khuong, P. 2013. The Other Robin Hood Hashing.
70 * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
71 * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
75 * XXX Ideas for improvement:
76 * For unordered hashmaps, randomize iteration order, similarly to Perl:
77 * http://blog.booking.com/hardening-perls-hash-function.html
80 /* INV_KEEP_FREE = 1 / (1 - max_load_factor)
81 * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
82 #define INV_KEEP_FREE 5U
84 /* Fields common to entries of all hashmap/set types */
85 struct hashmap_base_entry
{
89 /* Entry types for specific hashmap/set types
90 * hashmap_base_entry must be at the beginning of each entry struct. */
92 struct plain_hashmap_entry
{
93 struct hashmap_base_entry b
;
97 struct ordered_hashmap_entry
{
98 struct plain_hashmap_entry p
;
99 unsigned iterate_next
, iterate_previous
;
103 struct hashmap_base_entry b
;
106 /* In several functions it is advantageous to have the hash table extended
107 * virtually by a couple of additional buckets. We reserve special index values
108 * for these "swap" buckets. */
109 #define _IDX_SWAP_BEGIN (UINT_MAX - 3)
110 #define IDX_PUT (_IDX_SWAP_BEGIN + 0)
111 #define IDX_TMP (_IDX_SWAP_BEGIN + 1)
112 #define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2)
114 #define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */
115 #define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */
117 assert_cc(IDX_FIRST
== _IDX_SWAP_END
);
118 assert_cc(IDX_FIRST
== _IDX_ITERATOR_FIRST
);
120 /* Storage space for the "swap" buckets.
121 * All entry types can fit into an ordered_hashmap_entry. */
122 struct swap_entries
{
123 struct ordered_hashmap_entry e
[_IDX_SWAP_END
- _IDX_SWAP_BEGIN
];
126 /* Distance from Initial Bucket */
127 typedef uint8_t dib_raw_t
;
128 #define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */
129 #define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */
130 #define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */
131 #define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
133 #define DIB_FREE UINT_MAX
135 #if ENABLE_DEBUG_HASHMAP
136 struct hashmap_debug_info
{
137 LIST_FIELDS(struct hashmap_debug_info
, debug_list
);
138 unsigned max_entries
; /* high watermark of n_entries */
140 /* who allocated this hashmap */
145 /* fields to detect modification while iterating */
146 unsigned put_count
; /* counts puts into the hashmap */
147 unsigned rem_count
; /* counts removals from hashmap */
148 unsigned last_rem_idx
; /* remembers last removal index */
151 /* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
152 static LIST_HEAD(struct hashmap_debug_info
, hashmap_debug_list
);
153 static pthread_mutex_t hashmap_debug_list_mutex
= PTHREAD_MUTEX_INITIALIZER
;
158 HASHMAP_TYPE_ORDERED
,
163 struct _packed_ indirect_storage
{
164 void *storage
; /* where buckets and DIBs are stored */
165 uint8_t hash_key
[HASH_KEY_SIZE
]; /* hash key; changes during resize */
167 unsigned n_entries
; /* number of stored entries */
168 unsigned n_buckets
; /* number of buckets */
170 unsigned idx_lowest_entry
; /* Index below which all buckets are free.
171 Makes "while (hashmap_steal_first())" loops
172 O(n) instead of O(n^2) for unordered hashmaps. */
173 uint8_t _pad
[3]; /* padding for the whole HashmapBase */
174 /* The bitfields in HashmapBase complete the alignment of the whole thing. */
177 struct direct_storage
{
178 /* This gives us 39 bytes on 64-bit, or 35 bytes on 32-bit.
179 * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64-bit,
180 * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32-bit. */
181 uint8_t storage
[sizeof(struct indirect_storage
)];
184 #define DIRECT_BUCKETS(entry_t) \
185 (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
187 /* We should be able to store at least one entry directly. */
188 assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry
) >= 1);
190 /* We have 3 bits for n_direct_entries. */
191 assert_cc(DIRECT_BUCKETS(struct set_entry
) < (1 << 3));
193 /* Hashmaps with directly stored entries all use this shared hash key.
194 * It's no big deal if the key is guessed, because there can be only
195 * a handful of directly stored entries in a hashmap. When a hashmap
196 * outgrows direct storage, it gets its own key for indirect storage. */
197 static uint8_t shared_hash_key
[HASH_KEY_SIZE
];
199 /* Fields that all hashmap/set types must have */
201 const struct hash_ops
*hash_ops
; /* hash and compare ops to use */
204 struct indirect_storage indirect
; /* if has_indirect */
205 struct direct_storage direct
; /* if !has_indirect */
208 enum HashmapType type
:2; /* HASHMAP_TYPE_* */
209 bool has_indirect
:1; /* whether indirect storage is used */
210 unsigned n_direct_entries
:3; /* Number of entries in direct storage.
211 * Only valid if !has_indirect. */
212 bool from_pool
:1; /* whether was allocated from mempool */
213 bool dirty
:1; /* whether dirtied since last iterated_cache_get() */
214 bool cached
:1; /* whether this hashmap is being cached */
216 #if ENABLE_DEBUG_HASHMAP
217 struct hashmap_debug_info debug
;
221 /* Specific hash types
222 * HashmapBase must be at the beginning of each hashmap struct. */
225 struct HashmapBase b
;
228 struct OrderedHashmap
{
229 struct HashmapBase b
;
230 unsigned iterate_list_head
, iterate_list_tail
;
234 struct HashmapBase b
;
237 typedef struct CacheMem
{
243 struct IteratedCache
{
244 HashmapBase
*hashmap
;
245 CacheMem keys
, values
;
248 DEFINE_MEMPOOL(hashmap_pool
, Hashmap
, 8);
249 DEFINE_MEMPOOL(ordered_hashmap_pool
, OrderedHashmap
, 8);
250 /* No need for a separate Set pool */
251 assert_cc(sizeof(Hashmap
) == sizeof(Set
));
253 struct hashmap_type_info
{
256 struct mempool
*mempool
;
257 unsigned n_direct_buckets
;
260 static _used_
const struct hashmap_type_info hashmap_type_info
[_HASHMAP_TYPE_MAX
] = {
261 [HASHMAP_TYPE_PLAIN
] = {
262 .head_size
= sizeof(Hashmap
),
263 .entry_size
= sizeof(struct plain_hashmap_entry
),
264 .mempool
= &hashmap_pool
,
265 .n_direct_buckets
= DIRECT_BUCKETS(struct plain_hashmap_entry
),
267 [HASHMAP_TYPE_ORDERED
] = {
268 .head_size
= sizeof(OrderedHashmap
),
269 .entry_size
= sizeof(struct ordered_hashmap_entry
),
270 .mempool
= &ordered_hashmap_pool
,
271 .n_direct_buckets
= DIRECT_BUCKETS(struct ordered_hashmap_entry
),
273 [HASHMAP_TYPE_SET
] = {
274 .head_size
= sizeof(Set
),
275 .entry_size
= sizeof(struct set_entry
),
276 .mempool
= &hashmap_pool
,
277 .n_direct_buckets
= DIRECT_BUCKETS(struct set_entry
),
281 void hashmap_trim_pools(void) {
284 /* The pool is only allocated by the main thread, but the memory can be passed to other
285 * threads. Let's clean up if we are the main thread and no other threads are live. */
287 /* We build our own is_main_thread() here, which doesn't use C11 TLS based caching of the
288 * result. That's because valgrind apparently doesn't like TLS to be used from a GCC destructor. */
289 if (getpid() != gettid())
290 return (void) log_debug("Not cleaning up memory pools, not in main thread.");
292 r
= get_process_threads(0);
294 return (void) log_debug_errno(r
, "Failed to determine number of threads, not cleaning up memory pools: %m");
296 return (void) log_debug("Not cleaning up memory pools, running in multi-threaded process.");
298 mempool_trim(&hashmap_pool
);
299 mempool_trim(&ordered_hashmap_pool
);
302 #if HAVE_VALGRIND_VALGRIND_H
303 _destructor_
static void cleanup_pools(void) {
304 /* Be nice to valgrind */
305 if (RUNNING_ON_VALGRIND
)
306 hashmap_trim_pools();
310 static unsigned n_buckets(HashmapBase
*h
) {
311 return h
->has_indirect
? h
->indirect
.n_buckets
312 : hashmap_type_info
[h
->type
].n_direct_buckets
;
315 static unsigned n_entries(HashmapBase
*h
) {
316 return h
->has_indirect
? h
->indirect
.n_entries
317 : h
->n_direct_entries
;
320 static void n_entries_inc(HashmapBase
*h
) {
322 h
->indirect
.n_entries
++;
324 h
->n_direct_entries
++;
327 static void n_entries_dec(HashmapBase
*h
) {
329 h
->indirect
.n_entries
--;
331 h
->n_direct_entries
--;
334 static void* storage_ptr(HashmapBase
*h
) {
335 return h
->has_indirect
? h
->indirect
.storage
339 static uint8_t* hash_key(HashmapBase
*h
) {
340 return h
->has_indirect
? h
->indirect
.hash_key
344 static unsigned base_bucket_hash(HashmapBase
*h
, const void *p
) {
345 struct siphash state
;
348 siphash24_init(&state
, hash_key(h
));
350 h
->hash_ops
->hash(p
, &state
);
352 hash
= siphash24_finalize(&state
);
354 return (unsigned) (hash
% n_buckets(h
));
356 #define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
358 static void base_set_dirty(HashmapBase
*h
) {
361 #define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h))
363 static void get_hash_key(uint8_t hash_key
[HASH_KEY_SIZE
], bool reuse_is_ok
) {
364 static uint8_t current
[HASH_KEY_SIZE
];
365 static bool current_initialized
= false;
367 /* Returns a hash function key to use. In order to keep things
368 * fast we will not generate a new key each time we allocate a
369 * new hash table. Instead, we'll just reuse the most recently
370 * generated one, except if we never generated one or when we
371 * are rehashing an entire hash table because we reached a
374 if (!current_initialized
|| !reuse_is_ok
) {
375 random_bytes(current
, sizeof(current
));
376 current_initialized
= true;
379 memcpy(hash_key
, current
, sizeof(current
));
382 static struct hashmap_base_entry
* bucket_at(HashmapBase
*h
, unsigned idx
) {
383 return CAST_ALIGN_PTR(
384 struct hashmap_base_entry
,
385 (uint8_t *) storage_ptr(h
) + idx
* hashmap_type_info
[h
->type
].entry_size
);
388 static struct plain_hashmap_entry
* plain_bucket_at(Hashmap
*h
, unsigned idx
) {
389 return (struct plain_hashmap_entry
*) bucket_at(HASHMAP_BASE(h
), idx
);
392 static struct ordered_hashmap_entry
* ordered_bucket_at(OrderedHashmap
*h
, unsigned idx
) {
393 return (struct ordered_hashmap_entry
*) bucket_at(HASHMAP_BASE(h
), idx
);
396 static struct set_entry
*set_bucket_at(Set
*h
, unsigned idx
) {
397 return (struct set_entry
*) bucket_at(HASHMAP_BASE(h
), idx
);
400 static struct ordered_hashmap_entry
* bucket_at_swap(struct swap_entries
*swap
, unsigned idx
) {
401 return &swap
->e
[idx
- _IDX_SWAP_BEGIN
];
404 /* Returns a pointer to the bucket at index idx.
405 * Understands real indexes and swap indexes, hence "_virtual". */
406 static struct hashmap_base_entry
* bucket_at_virtual(HashmapBase
*h
, struct swap_entries
*swap
,
408 if (idx
< _IDX_SWAP_BEGIN
)
409 return bucket_at(h
, idx
);
411 if (idx
< _IDX_SWAP_END
)
412 return &bucket_at_swap(swap
, idx
)->p
.b
;
414 assert_not_reached();
417 static dib_raw_t
* dib_raw_ptr(HashmapBase
*h
) {
419 ((uint8_t*) storage_ptr(h
) + hashmap_type_info
[h
->type
].entry_size
* n_buckets(h
));
422 static unsigned bucket_distance(HashmapBase
*h
, unsigned idx
, unsigned from
) {
423 return idx
>= from
? idx
- from
424 : n_buckets(h
) + idx
- from
;
427 static unsigned bucket_calculate_dib(HashmapBase
*h
, unsigned idx
, dib_raw_t raw_dib
) {
428 unsigned initial_bucket
;
430 if (raw_dib
== DIB_RAW_FREE
)
433 if (_likely_(raw_dib
< DIB_RAW_OVERFLOW
))
437 * Having an overflow DIB value is very unlikely. The hash function
438 * would have to be bad. For example, in a table of size 2^24 filled
439 * to load factor 0.9 the maximum observed DIB is only about 60.
440 * In theory (assuming I used Maxima correctly), for an infinite size
441 * hash table with load factor 0.8 the probability of a given entry
442 * having DIB > 40 is 1.9e-8.
443 * This returns the correct DIB value by recomputing the hash value in
444 * the unlikely case. XXX Hitting this case could be a hint to rehash.
446 initial_bucket
= bucket_hash(h
, bucket_at(h
, idx
)->key
);
447 return bucket_distance(h
, idx
, initial_bucket
);
450 static void bucket_set_dib(HashmapBase
*h
, unsigned idx
, unsigned dib
) {
451 dib_raw_ptr(h
)[idx
] = dib
!= DIB_FREE
? MIN(dib
, DIB_RAW_OVERFLOW
) : DIB_RAW_FREE
;
454 static unsigned skip_free_buckets(HashmapBase
*h
, unsigned idx
) {
457 dibs
= dib_raw_ptr(h
);
459 for ( ; idx
< n_buckets(h
); idx
++)
460 if (dibs
[idx
] != DIB_RAW_FREE
)
466 static void bucket_mark_free(HashmapBase
*h
, unsigned idx
) {
467 memzero(bucket_at(h
, idx
), hashmap_type_info
[h
->type
].entry_size
);
468 bucket_set_dib(h
, idx
, DIB_FREE
);
471 static void bucket_move_entry(HashmapBase
*h
, struct swap_entries
*swap
,
472 unsigned from
, unsigned to
) {
473 struct hashmap_base_entry
*e_from
, *e_to
;
477 e_from
= bucket_at_virtual(h
, swap
, from
);
478 e_to
= bucket_at_virtual(h
, swap
, to
);
480 memcpy(e_to
, e_from
, hashmap_type_info
[h
->type
].entry_size
);
482 if (h
->type
== HASHMAP_TYPE_ORDERED
) {
483 OrderedHashmap
*lh
= (OrderedHashmap
*) h
;
484 struct ordered_hashmap_entry
*le
, *le_to
;
486 le_to
= (struct ordered_hashmap_entry
*) e_to
;
488 if (le_to
->iterate_next
!= IDX_NIL
) {
489 le
= (struct ordered_hashmap_entry
*)
490 bucket_at_virtual(h
, swap
, le_to
->iterate_next
);
491 le
->iterate_previous
= to
;
494 if (le_to
->iterate_previous
!= IDX_NIL
) {
495 le
= (struct ordered_hashmap_entry
*)
496 bucket_at_virtual(h
, swap
, le_to
->iterate_previous
);
497 le
->iterate_next
= to
;
500 if (lh
->iterate_list_head
== from
)
501 lh
->iterate_list_head
= to
;
502 if (lh
->iterate_list_tail
== from
)
503 lh
->iterate_list_tail
= to
;
507 static unsigned next_idx(HashmapBase
*h
, unsigned idx
) {
508 return (idx
+ 1U) % n_buckets(h
);
511 static unsigned prev_idx(HashmapBase
*h
, unsigned idx
) {
512 return (n_buckets(h
) + idx
- 1U) % n_buckets(h
);
515 static void* entry_value(HashmapBase
*h
, struct hashmap_base_entry
*e
) {
518 case HASHMAP_TYPE_PLAIN
:
519 case HASHMAP_TYPE_ORDERED
:
520 return ((struct plain_hashmap_entry
*)e
)->value
;
522 case HASHMAP_TYPE_SET
:
523 return (void*) e
->key
;
526 assert_not_reached();
530 static void base_remove_entry(HashmapBase
*h
, unsigned idx
) {
531 unsigned left
, right
, prev
, dib
;
532 dib_raw_t raw_dib
, *dibs
;
534 dibs
= dib_raw_ptr(h
);
535 assert(dibs
[idx
] != DIB_RAW_FREE
);
537 #if ENABLE_DEBUG_HASHMAP
538 h
->debug
.rem_count
++;
539 h
->debug
.last_rem_idx
= idx
;
543 /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
544 for (right
= next_idx(h
, left
); ; right
= next_idx(h
, right
)) {
545 raw_dib
= dibs
[right
];
546 if (IN_SET(raw_dib
, 0, DIB_RAW_FREE
))
549 /* The buckets are not supposed to be all occupied and with DIB > 0.
550 * That would mean we could make everyone better off by shifting them
551 * backward. This scenario is impossible. */
552 assert(left
!= right
);
555 if (h
->type
== HASHMAP_TYPE_ORDERED
) {
556 OrderedHashmap
*lh
= (OrderedHashmap
*) h
;
557 struct ordered_hashmap_entry
*le
= ordered_bucket_at(lh
, idx
);
559 if (le
->iterate_next
!= IDX_NIL
)
560 ordered_bucket_at(lh
, le
->iterate_next
)->iterate_previous
= le
->iterate_previous
;
562 lh
->iterate_list_tail
= le
->iterate_previous
;
564 if (le
->iterate_previous
!= IDX_NIL
)
565 ordered_bucket_at(lh
, le
->iterate_previous
)->iterate_next
= le
->iterate_next
;
567 lh
->iterate_list_head
= le
->iterate_next
;
570 /* Now shift all buckets in the interval (left, right) one step backwards */
571 for (prev
= left
, left
= next_idx(h
, left
); left
!= right
;
572 prev
= left
, left
= next_idx(h
, left
)) {
573 dib
= bucket_calculate_dib(h
, left
, dibs
[left
]);
575 bucket_move_entry(h
, NULL
, left
, prev
);
576 bucket_set_dib(h
, prev
, dib
- 1);
579 bucket_mark_free(h
, prev
);
583 #define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
585 static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap
*h
, Iterator
*i
) {
586 struct ordered_hashmap_entry
*e
;
592 if (i
->idx
== IDX_NIL
)
595 if (i
->idx
== IDX_FIRST
&& h
->iterate_list_head
== IDX_NIL
)
598 if (i
->idx
== IDX_FIRST
) {
599 idx
= h
->iterate_list_head
;
600 e
= ordered_bucket_at(h
, idx
);
603 e
= ordered_bucket_at(h
, idx
);
605 * We allow removing the current entry while iterating, but removal may cause
606 * a backward shift. The next entry may thus move one bucket to the left.
607 * To detect when it happens, we remember the key pointer of the entry we were
608 * going to iterate next. If it does not match, there was a backward shift.
610 if (e
->p
.b
.key
!= i
->next_key
) {
611 idx
= prev_idx(HASHMAP_BASE(h
), idx
);
612 e
= ordered_bucket_at(h
, idx
);
614 assert(e
->p
.b
.key
== i
->next_key
);
617 #if ENABLE_DEBUG_HASHMAP
621 if (e
->iterate_next
!= IDX_NIL
) {
622 struct ordered_hashmap_entry
*n
;
623 i
->idx
= e
->iterate_next
;
624 n
= ordered_bucket_at(h
, i
->idx
);
625 i
->next_key
= n
->p
.b
.key
;
636 static unsigned hashmap_iterate_in_internal_order(HashmapBase
*h
, Iterator
*i
) {
642 if (i
->idx
== IDX_NIL
)
645 if (i
->idx
== IDX_FIRST
) {
646 /* fast forward to the first occupied bucket */
647 if (h
->has_indirect
) {
648 i
->idx
= skip_free_buckets(h
, h
->indirect
.idx_lowest_entry
);
649 h
->indirect
.idx_lowest_entry
= i
->idx
;
651 i
->idx
= skip_free_buckets(h
, 0);
653 if (i
->idx
== IDX_NIL
)
656 struct hashmap_base_entry
*e
;
660 e
= bucket_at(h
, i
->idx
);
662 * We allow removing the current entry while iterating, but removal may cause
663 * a backward shift. The next entry may thus move one bucket to the left.
664 * To detect when it happens, we remember the key pointer of the entry we were
665 * going to iterate next. If it does not match, there was a backward shift.
667 if (e
->key
!= i
->next_key
)
668 e
= bucket_at(h
, --i
->idx
);
670 assert(e
->key
== i
->next_key
);
674 #if ENABLE_DEBUG_HASHMAP
678 i
->idx
= skip_free_buckets(h
, i
->idx
+ 1);
679 if (i
->idx
!= IDX_NIL
)
680 i
->next_key
= bucket_at(h
, i
->idx
)->key
;
691 static unsigned hashmap_iterate_entry(HashmapBase
*h
, Iterator
*i
) {
697 #if ENABLE_DEBUG_HASHMAP
698 if (i
->idx
== IDX_FIRST
) {
699 i
->put_count
= h
->debug
.put_count
;
700 i
->rem_count
= h
->debug
.rem_count
;
702 /* While iterating, must not add any new entries */
703 assert(i
->put_count
== h
->debug
.put_count
);
704 /* ... or remove entries other than the current one */
705 assert(i
->rem_count
== h
->debug
.rem_count
||
706 (i
->rem_count
== h
->debug
.rem_count
- 1 &&
707 i
->prev_idx
== h
->debug
.last_rem_idx
));
708 /* Reset our removals counter */
709 i
->rem_count
= h
->debug
.rem_count
;
713 return h
->type
== HASHMAP_TYPE_ORDERED
? hashmap_iterate_in_insertion_order((OrderedHashmap
*) h
, i
)
714 : hashmap_iterate_in_internal_order(h
, i
);
717 bool _hashmap_iterate(HashmapBase
*h
, Iterator
*i
, void **value
, const void **key
) {
718 struct hashmap_base_entry
*e
;
722 idx
= hashmap_iterate_entry(h
, i
);
723 if (idx
== IDX_NIL
) {
732 e
= bucket_at(h
, idx
);
733 data
= entry_value(h
, e
);
742 #define HASHMAP_FOREACH_IDX(idx, h, i) \
743 for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
745 (idx) = hashmap_iterate_entry((h), &(i)))
747 IteratedCache
* _hashmap_iterated_cache_new(HashmapBase
*h
) {
748 IteratedCache
*cache
;
756 cache
= new0(IteratedCache
, 1);
766 static void reset_direct_storage(HashmapBase
*h
) {
767 const struct hashmap_type_info
*hi
= &hashmap_type_info
[h
->type
];
770 assert(!h
->has_indirect
);
772 p
= mempset(h
->direct
.storage
, 0, hi
->entry_size
* hi
->n_direct_buckets
);
773 memset(p
, DIB_RAW_INIT
, sizeof(dib_raw_t
) * hi
->n_direct_buckets
);
776 static void shared_hash_key_initialize(void) {
777 random_bytes(shared_hash_key
, sizeof(shared_hash_key
));
780 static struct HashmapBase
* hashmap_base_new(const struct hash_ops
*hash_ops
, enum HashmapType type HASHMAP_DEBUG_PARAMS
) {
782 const struct hashmap_type_info
*hi
= &hashmap_type_info
[type
];
784 bool use_pool
= mempool_enabled
&& mempool_enabled(); /* mempool_enabled is a weak symbol */
786 h
= use_pool
? mempool_alloc0_tile(hi
->mempool
) : malloc0(hi
->head_size
);
791 h
->from_pool
= use_pool
;
792 h
->hash_ops
= hash_ops
?: &trivial_hash_ops
;
794 if (type
== HASHMAP_TYPE_ORDERED
) {
795 OrderedHashmap
*lh
= (OrderedHashmap
*)h
;
796 lh
->iterate_list_head
= lh
->iterate_list_tail
= IDX_NIL
;
799 reset_direct_storage(h
);
801 static pthread_once_t once
= PTHREAD_ONCE_INIT
;
802 assert_se(pthread_once(&once
, shared_hash_key_initialize
) == 0);
804 #if ENABLE_DEBUG_HASHMAP
805 h
->debug
.func
= func
;
806 h
->debug
.file
= file
;
807 h
->debug
.line
= line
;
808 assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex
) == 0);
809 LIST_PREPEND(debug_list
, hashmap_debug_list
, &h
->debug
);
810 assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex
) == 0);
816 Hashmap
*_hashmap_new(const struct hash_ops
*hash_ops HASHMAP_DEBUG_PARAMS
) {
817 return (Hashmap
*) hashmap_base_new(hash_ops
, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS
);
820 OrderedHashmap
*_ordered_hashmap_new(const struct hash_ops
*hash_ops HASHMAP_DEBUG_PARAMS
) {
821 return (OrderedHashmap
*) hashmap_base_new(hash_ops
, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS
);
824 Set
*_set_new(const struct hash_ops
*hash_ops HASHMAP_DEBUG_PARAMS
) {
825 return (Set
*) hashmap_base_new(hash_ops
, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS
);
828 static int hashmap_base_ensure_allocated(HashmapBase
**h
, const struct hash_ops
*hash_ops
,
829 enum HashmapType type HASHMAP_DEBUG_PARAMS
) {
837 q
= hashmap_base_new(hash_ops
, type HASHMAP_DEBUG_PASS_ARGS
);
845 int _hashmap_ensure_allocated(Hashmap
**h
, const struct hash_ops
*hash_ops HASHMAP_DEBUG_PARAMS
) {
846 return hashmap_base_ensure_allocated((HashmapBase
**)h
, hash_ops
, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS
);
849 int _ordered_hashmap_ensure_allocated(OrderedHashmap
**h
, const struct hash_ops
*hash_ops HASHMAP_DEBUG_PARAMS
) {
850 return hashmap_base_ensure_allocated((HashmapBase
**)h
, hash_ops
, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS
);
853 int _set_ensure_allocated(Set
**s
, const struct hash_ops
*hash_ops HASHMAP_DEBUG_PARAMS
) {
854 return hashmap_base_ensure_allocated((HashmapBase
**)s
, hash_ops
, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS
);
857 int _hashmap_ensure_put(Hashmap
**h
, const struct hash_ops
*hash_ops
, const void *key
, void *value HASHMAP_DEBUG_PARAMS
) {
860 r
= _hashmap_ensure_allocated(h
, hash_ops HASHMAP_DEBUG_PASS_ARGS
);
864 return hashmap_put(*h
, key
, value
);
867 int _ordered_hashmap_ensure_put(OrderedHashmap
**h
, const struct hash_ops
*hash_ops
, const void *key
, void *value HASHMAP_DEBUG_PARAMS
) {
870 r
= _ordered_hashmap_ensure_allocated(h
, hash_ops HASHMAP_DEBUG_PASS_ARGS
);
874 return ordered_hashmap_put(*h
, key
, value
);
877 static void hashmap_free_no_clear(HashmapBase
*h
) {
878 assert(!h
->has_indirect
);
879 assert(h
->n_direct_entries
== 0);
881 #if ENABLE_DEBUG_HASHMAP
882 assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex
) == 0);
883 LIST_REMOVE(debug_list
, hashmap_debug_list
, &h
->debug
);
884 assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex
) == 0);
888 /* Ensure that the object didn't get migrated between threads. */
889 assert_se(is_main_thread());
890 mempool_free_tile(hashmap_type_info
[h
->type
].mempool
, h
);
895 HashmapBase
* _hashmap_free(HashmapBase
*h
, free_func_t default_free_key
, free_func_t default_free_value
) {
897 _hashmap_clear(h
, default_free_key
, default_free_value
);
898 hashmap_free_no_clear(h
);
904 void _hashmap_clear(HashmapBase
*h
, free_func_t default_free_key
, free_func_t default_free_value
) {
905 free_func_t free_key
, free_value
;
909 free_key
= h
->hash_ops
->free_key
?: default_free_key
;
910 free_value
= h
->hash_ops
->free_value
?: default_free_value
;
912 if (free_key
|| free_value
) {
914 /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the
915 * hash table, and only then call the destructor functions. If these destructors then try to unregister
916 * themselves from our hash table a second time, the entry is already gone. */
918 while (_hashmap_size(h
) > 0) {
922 v
= _hashmap_first_key_and_value(h
, true, &k
);
932 if (h
->has_indirect
) {
933 free(h
->indirect
.storage
);
934 h
->has_indirect
= false;
937 h
->n_direct_entries
= 0;
938 reset_direct_storage(h
);
940 if (h
->type
== HASHMAP_TYPE_ORDERED
) {
941 OrderedHashmap
*lh
= (OrderedHashmap
*) h
;
942 lh
->iterate_list_head
= lh
->iterate_list_tail
= IDX_NIL
;
948 static int resize_buckets(HashmapBase
*h
, unsigned entries_add
);
951 * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
952 * Performs Robin Hood swaps as it goes. The entry to put must be placed
953 * by the caller into swap slot IDX_PUT.
954 * If used for in-place resizing, may leave a displaced entry in swap slot
955 * IDX_PUT. Caller must rehash it next.
956 * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
959 static bool hashmap_put_robin_hood(HashmapBase
*h
, unsigned idx
,
960 struct swap_entries
*swap
) {
961 dib_raw_t raw_dib
, *dibs
;
962 unsigned dib
, distance
;
964 #if ENABLE_DEBUG_HASHMAP
965 h
->debug
.put_count
++;
968 dibs
= dib_raw_ptr(h
);
970 for (distance
= 0; ; distance
++) {
972 if (IN_SET(raw_dib
, DIB_RAW_FREE
, DIB_RAW_REHASH
)) {
973 if (raw_dib
== DIB_RAW_REHASH
)
974 bucket_move_entry(h
, swap
, idx
, IDX_TMP
);
976 if (h
->has_indirect
&& h
->indirect
.idx_lowest_entry
> idx
)
977 h
->indirect
.idx_lowest_entry
= idx
;
979 bucket_set_dib(h
, idx
, distance
);
980 bucket_move_entry(h
, swap
, IDX_PUT
, idx
);
981 if (raw_dib
== DIB_RAW_REHASH
) {
982 bucket_move_entry(h
, swap
, IDX_TMP
, IDX_PUT
);
989 dib
= bucket_calculate_dib(h
, idx
, raw_dib
);
991 if (dib
< distance
) {
992 /* Found a wealthier entry. Go Robin Hood! */
993 bucket_set_dib(h
, idx
, distance
);
995 /* swap the entries */
996 bucket_move_entry(h
, swap
, idx
, IDX_TMP
);
997 bucket_move_entry(h
, swap
, IDX_PUT
, idx
);
998 bucket_move_entry(h
, swap
, IDX_TMP
, IDX_PUT
);
1003 idx
= next_idx(h
, idx
);
1008 * Puts an entry into a hashmap, boldly - no check whether key already exists.
1009 * The caller must place the entry (only its key and value, not link indexes)
1010 * in swap slot IDX_PUT.
1011 * Caller must ensure: the key does not exist yet in the hashmap.
1012 * that resize is not needed if !may_resize.
1013 * Returns: 1 if entry was put successfully.
1014 * -ENOMEM if may_resize==true and resize failed with -ENOMEM.
1015 * Cannot return -ENOMEM if !may_resize.
1017 static int hashmap_base_put_boldly(HashmapBase
*h
, unsigned idx
,
1018 struct swap_entries
*swap
, bool may_resize
) {
1019 struct ordered_hashmap_entry
*new_entry
;
1022 assert(idx
< n_buckets(h
));
1024 new_entry
= bucket_at_swap(swap
, IDX_PUT
);
1027 r
= resize_buckets(h
, 1);
1031 idx
= bucket_hash(h
, new_entry
->p
.b
.key
);
1033 assert(n_entries(h
) < n_buckets(h
));
1035 if (h
->type
== HASHMAP_TYPE_ORDERED
) {
1036 OrderedHashmap
*lh
= (OrderedHashmap
*) h
;
1038 new_entry
->iterate_next
= IDX_NIL
;
1039 new_entry
->iterate_previous
= lh
->iterate_list_tail
;
1041 if (lh
->iterate_list_tail
!= IDX_NIL
) {
1042 struct ordered_hashmap_entry
*old_tail
;
1044 old_tail
= ordered_bucket_at(lh
, lh
->iterate_list_tail
);
1045 assert(old_tail
->iterate_next
== IDX_NIL
);
1046 old_tail
->iterate_next
= IDX_PUT
;
1049 lh
->iterate_list_tail
= IDX_PUT
;
1050 if (lh
->iterate_list_head
== IDX_NIL
)
1051 lh
->iterate_list_head
= IDX_PUT
;
1054 assert_se(hashmap_put_robin_hood(h
, idx
, swap
) == false);
1057 #if ENABLE_DEBUG_HASHMAP
1058 h
->debug
.max_entries
= MAX(h
->debug
.max_entries
, n_entries(h
));
1065 #define hashmap_put_boldly(h, idx, swap, may_resize) \
1066 hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
1069 * Returns 0 if resize is not needed.
1070 * 1 if successfully resized.
1071 * -ENOMEM on allocation failure.
1073 static int resize_buckets(HashmapBase
*h
, unsigned entries_add
) {
1074 struct swap_entries swap
;
1076 dib_raw_t
*old_dibs
, *new_dibs
;
1077 const struct hashmap_type_info
*hi
;
1078 unsigned idx
, optimal_idx
;
1079 unsigned old_n_buckets
, new_n_buckets
, n_rehashed
, new_n_entries
;
1085 hi
= &hashmap_type_info
[h
->type
];
1086 new_n_entries
= n_entries(h
) + entries_add
;
1089 if (_unlikely_(new_n_entries
< entries_add
))
1092 /* For direct storage we allow 100% load, because it's tiny. */
1093 if (!h
->has_indirect
&& new_n_entries
<= hi
->n_direct_buckets
)
1097 * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
1098 * From it follows: m = n + n/(INV_KEEP_FREE - 1)
1100 new_n_buckets
= new_n_entries
+ new_n_entries
/ (INV_KEEP_FREE
- 1);
1102 if (_unlikely_(new_n_buckets
< new_n_entries
))
1105 if (_unlikely_(new_n_buckets
> UINT_MAX
/ (hi
->entry_size
+ sizeof(dib_raw_t
))))
1108 old_n_buckets
= n_buckets(h
);
1110 if (_likely_(new_n_buckets
<= old_n_buckets
))
1113 new_shift
= log2u_round_up(MAX(
1114 new_n_buckets
* (hi
->entry_size
+ sizeof(dib_raw_t
)),
1115 2 * sizeof(struct direct_storage
)));
1117 /* Realloc storage (buckets and DIB array). */
1118 new_storage
= realloc(h
->has_indirect
? h
->indirect
.storage
: NULL
,
1123 /* Must upgrade direct to indirect storage. */
1124 if (!h
->has_indirect
) {
1125 memcpy(new_storage
, h
->direct
.storage
,
1126 old_n_buckets
* (hi
->entry_size
+ sizeof(dib_raw_t
)));
1127 h
->indirect
.n_entries
= h
->n_direct_entries
;
1128 h
->indirect
.idx_lowest_entry
= 0;
1129 h
->n_direct_entries
= 0;
1132 /* Get a new hash key. If we've just upgraded to indirect storage,
1133 * allow reusing a previously generated key. It's still a different key
1134 * from the shared one that we used for direct storage. */
1135 get_hash_key(h
->indirect
.hash_key
, !h
->has_indirect
);
1137 h
->has_indirect
= true;
1138 h
->indirect
.storage
= new_storage
;
1139 h
->indirect
.n_buckets
= (1U << new_shift
) /
1140 (hi
->entry_size
+ sizeof(dib_raw_t
));
1142 old_dibs
= (dib_raw_t
*)((uint8_t*) new_storage
+ hi
->entry_size
* old_n_buckets
);
1143 new_dibs
= dib_raw_ptr(h
);
1146 * Move the DIB array to the new place, replacing valid DIB values with
1147 * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
1148 * Note: Overlap is not possible, because we have at least doubled the
1149 * number of buckets and dib_raw_t is smaller than any entry type.
1151 for (idx
= 0; idx
< old_n_buckets
; idx
++) {
1152 assert(old_dibs
[idx
] != DIB_RAW_REHASH
);
1153 new_dibs
[idx
] = old_dibs
[idx
] == DIB_RAW_FREE
? DIB_RAW_FREE
1157 /* Zero the area of newly added entries (including the old DIB area) */
1158 memzero(bucket_at(h
, old_n_buckets
),
1159 (n_buckets(h
) - old_n_buckets
) * hi
->entry_size
);
1161 /* The upper half of the new DIB array needs initialization */
1162 memset(&new_dibs
[old_n_buckets
], DIB_RAW_INIT
,
1163 (n_buckets(h
) - old_n_buckets
) * sizeof(dib_raw_t
));
1165 /* Rehash entries that need it */
1167 for (idx
= 0; idx
< old_n_buckets
; idx
++) {
1168 if (new_dibs
[idx
] != DIB_RAW_REHASH
)
1171 optimal_idx
= bucket_hash(h
, bucket_at(h
, idx
)->key
);
1174 * Not much to do if by luck the entry hashes to its current
1175 * location. Just set its DIB.
1177 if (optimal_idx
== idx
) {
1183 new_dibs
[idx
] = DIB_RAW_FREE
;
1184 bucket_move_entry(h
, &swap
, idx
, IDX_PUT
);
1185 /* bucket_move_entry does not clear the source */
1186 memzero(bucket_at(h
, idx
), hi
->entry_size
);
1190 * Find the new bucket for the current entry. This may make
1191 * another entry homeless and load it into IDX_PUT.
1193 rehash_next
= hashmap_put_robin_hood(h
, optimal_idx
, &swap
);
1196 /* Did the current entry displace another one? */
1198 optimal_idx
= bucket_hash(h
, bucket_at_swap(&swap
, IDX_PUT
)->p
.b
.key
);
1199 } while (rehash_next
);
1202 assert_se(n_rehashed
== n_entries(h
));
1208 * Finds an entry with a matching key
1209 * Returns: index of the found entry, or IDX_NIL if not found.
1211 static unsigned base_bucket_scan(HashmapBase
*h
, unsigned idx
, const void *key
) {
1212 struct hashmap_base_entry
*e
;
1213 unsigned dib
, distance
;
1214 dib_raw_t
*dibs
= dib_raw_ptr(h
);
1216 assert(idx
< n_buckets(h
));
1218 for (distance
= 0; ; distance
++) {
1219 if (dibs
[idx
] == DIB_RAW_FREE
)
1222 dib
= bucket_calculate_dib(h
, idx
, dibs
[idx
]);
1226 if (dib
== distance
) {
1227 e
= bucket_at(h
, idx
);
1228 if (h
->hash_ops
->compare(e
->key
, key
) == 0)
1232 idx
= next_idx(h
, idx
);
1235 #define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
1237 int hashmap_put(Hashmap
*h
, const void *key
, void *value
) {
1238 struct swap_entries swap
;
1239 struct plain_hashmap_entry
*e
;
1244 hash
= bucket_hash(h
, key
);
1245 idx
= bucket_scan(h
, hash
, key
);
1246 if (idx
!= IDX_NIL
) {
1247 e
= plain_bucket_at(h
, idx
);
1248 if (e
->value
== value
)
1253 e
= &bucket_at_swap(&swap
, IDX_PUT
)->p
;
1256 return hashmap_put_boldly(h
, hash
, &swap
, true);
1259 int set_put(Set
*s
, const void *key
) {
1260 struct swap_entries swap
;
1261 struct hashmap_base_entry
*e
;
1266 hash
= bucket_hash(s
, key
);
1267 idx
= bucket_scan(s
, hash
, key
);
1271 e
= &bucket_at_swap(&swap
, IDX_PUT
)->p
.b
;
1273 return hashmap_put_boldly(s
, hash
, &swap
, true);
1276 int _set_ensure_put(Set
**s
, const struct hash_ops
*hash_ops
, const void *key HASHMAP_DEBUG_PARAMS
) {
1279 r
= _set_ensure_allocated(s
, hash_ops HASHMAP_DEBUG_PASS_ARGS
);
1283 return set_put(*s
, key
);
1286 int _set_ensure_consume(Set
**s
, const struct hash_ops
*hash_ops
, void *key HASHMAP_DEBUG_PARAMS
) {
1289 r
= _set_ensure_put(s
, hash_ops
, key HASHMAP_DEBUG_PASS_ARGS
);
1291 if (hash_ops
&& hash_ops
->free_key
)
1292 hash_ops
->free_key(key
);
1300 int hashmap_replace(Hashmap
*h
, const void *key
, void *value
) {
1301 struct swap_entries swap
;
1302 struct plain_hashmap_entry
*e
;
1307 hash
= bucket_hash(h
, key
);
1308 idx
= bucket_scan(h
, hash
, key
);
1309 if (idx
!= IDX_NIL
) {
1310 e
= plain_bucket_at(h
, idx
);
1311 #if ENABLE_DEBUG_HASHMAP
1312 /* Although the key is equal, the key pointer may have changed,
1313 * and this would break our assumption for iterating. So count
1314 * this operation as incompatible with iteration. */
1315 if (e
->b
.key
!= key
) {
1316 h
->b
.debug
.put_count
++;
1317 h
->b
.debug
.rem_count
++;
1318 h
->b
.debug
.last_rem_idx
= idx
;
1323 hashmap_set_dirty(h
);
1328 e
= &bucket_at_swap(&swap
, IDX_PUT
)->p
;
1331 return hashmap_put_boldly(h
, hash
, &swap
, true);
1334 int hashmap_update(Hashmap
*h
, const void *key
, void *value
) {
1335 struct plain_hashmap_entry
*e
;
1340 hash
= bucket_hash(h
, key
);
1341 idx
= bucket_scan(h
, hash
, key
);
1345 e
= plain_bucket_at(h
, idx
);
1347 hashmap_set_dirty(h
);
1352 void* _hashmap_get(HashmapBase
*h
, const void *key
) {
1353 struct hashmap_base_entry
*e
;
1359 hash
= bucket_hash(h
, key
);
1360 idx
= bucket_scan(h
, hash
, key
);
1364 e
= bucket_at(h
, idx
);
1365 return entry_value(h
, e
);
1368 void* hashmap_get2(Hashmap
*h
, const void *key
, void **key2
) {
1369 struct plain_hashmap_entry
*e
;
1375 hash
= bucket_hash(h
, key
);
1376 idx
= bucket_scan(h
, hash
, key
);
1380 e
= plain_bucket_at(h
, idx
);
1382 *key2
= (void*) e
->b
.key
;
1387 bool _hashmap_contains(HashmapBase
*h
, const void *key
) {
1393 hash
= bucket_hash(h
, key
);
1394 return bucket_scan(h
, hash
, key
) != IDX_NIL
;
1397 void* _hashmap_remove(HashmapBase
*h
, const void *key
) {
1398 struct hashmap_base_entry
*e
;
1405 hash
= bucket_hash(h
, key
);
1406 idx
= bucket_scan(h
, hash
, key
);
1410 e
= bucket_at(h
, idx
);
1411 data
= entry_value(h
, e
);
1412 remove_entry(h
, idx
);
1417 void* hashmap_remove2(Hashmap
*h
, const void *key
, void **rkey
) {
1418 struct plain_hashmap_entry
*e
;
1428 hash
= bucket_hash(h
, key
);
1429 idx
= bucket_scan(h
, hash
, key
);
1430 if (idx
== IDX_NIL
) {
1436 e
= plain_bucket_at(h
, idx
);
1439 *rkey
= (void*) e
->b
.key
;
1441 remove_entry(h
, idx
);
1446 int hashmap_remove_and_put(Hashmap
*h
, const void *old_key
, const void *new_key
, void *value
) {
1447 struct swap_entries swap
;
1448 struct plain_hashmap_entry
*e
;
1449 unsigned old_hash
, new_hash
, idx
;
1454 old_hash
= bucket_hash(h
, old_key
);
1455 idx
= bucket_scan(h
, old_hash
, old_key
);
1459 new_hash
= bucket_hash(h
, new_key
);
1460 if (bucket_scan(h
, new_hash
, new_key
) != IDX_NIL
)
1463 remove_entry(h
, idx
);
1465 e
= &bucket_at_swap(&swap
, IDX_PUT
)->p
;
1468 assert_se(hashmap_put_boldly(h
, new_hash
, &swap
, false) == 1);
1473 int set_remove_and_put(Set
*s
, const void *old_key
, const void *new_key
) {
1474 struct swap_entries swap
;
1475 struct hashmap_base_entry
*e
;
1476 unsigned old_hash
, new_hash
, idx
;
1481 old_hash
= bucket_hash(s
, old_key
);
1482 idx
= bucket_scan(s
, old_hash
, old_key
);
1486 new_hash
= bucket_hash(s
, new_key
);
1487 if (bucket_scan(s
, new_hash
, new_key
) != IDX_NIL
)
1490 remove_entry(s
, idx
);
1492 e
= &bucket_at_swap(&swap
, IDX_PUT
)->p
.b
;
1494 assert_se(hashmap_put_boldly(s
, new_hash
, &swap
, false) == 1);
1499 int hashmap_remove_and_replace(Hashmap
*h
, const void *old_key
, const void *new_key
, void *value
) {
1500 struct swap_entries swap
;
1501 struct plain_hashmap_entry
*e
;
1502 unsigned old_hash
, new_hash
, idx_old
, idx_new
;
1507 old_hash
= bucket_hash(h
, old_key
);
1508 idx_old
= bucket_scan(h
, old_hash
, old_key
);
1509 if (idx_old
== IDX_NIL
)
1512 old_key
= bucket_at(HASHMAP_BASE(h
), idx_old
)->key
;
1514 new_hash
= bucket_hash(h
, new_key
);
1515 idx_new
= bucket_scan(h
, new_hash
, new_key
);
1516 if (idx_new
!= IDX_NIL
)
1517 if (idx_old
!= idx_new
) {
1518 remove_entry(h
, idx_new
);
1519 /* Compensate for a possible backward shift. */
1520 if (old_key
!= bucket_at(HASHMAP_BASE(h
), idx_old
)->key
)
1521 idx_old
= prev_idx(HASHMAP_BASE(h
), idx_old
);
1522 assert(old_key
== bucket_at(HASHMAP_BASE(h
), idx_old
)->key
);
1525 remove_entry(h
, idx_old
);
1527 e
= &bucket_at_swap(&swap
, IDX_PUT
)->p
;
1530 assert_se(hashmap_put_boldly(h
, new_hash
, &swap
, false) == 1);
1535 void* _hashmap_remove_value(HashmapBase
*h
, const void *key
, void *value
) {
1536 struct hashmap_base_entry
*e
;
1542 hash
= bucket_hash(h
, key
);
1543 idx
= bucket_scan(h
, hash
, key
);
1547 e
= bucket_at(h
, idx
);
1548 if (entry_value(h
, e
) != value
)
1551 remove_entry(h
, idx
);
1556 static unsigned find_first_entry(HashmapBase
*h
) {
1557 Iterator i
= ITERATOR_FIRST
;
1559 if (!h
|| !n_entries(h
))
1562 return hashmap_iterate_entry(h
, &i
);
1565 void* _hashmap_first_key_and_value(HashmapBase
*h
, bool remove
, void **ret_key
) {
1566 struct hashmap_base_entry
*e
;
1570 idx
= find_first_entry(h
);
1571 if (idx
== IDX_NIL
) {
1577 e
= bucket_at(h
, idx
);
1578 key
= (void*) e
->key
;
1579 data
= entry_value(h
, e
);
1582 remove_entry(h
, idx
);
1590 unsigned _hashmap_size(HashmapBase
*h
) {
1594 return n_entries(h
);
1597 unsigned _hashmap_buckets(HashmapBase
*h
) {
1601 return n_buckets(h
);
1604 int _hashmap_merge(Hashmap
*h
, Hashmap
*other
) {
1610 HASHMAP_FOREACH_IDX(idx
, HASHMAP_BASE(other
), i
) {
1611 struct plain_hashmap_entry
*pe
= plain_bucket_at(other
, idx
);
1614 r
= hashmap_put(h
, pe
->b
.key
, pe
->value
);
1615 if (r
< 0 && r
!= -EEXIST
)
1622 int set_merge(Set
*s
, Set
*other
) {
1628 HASHMAP_FOREACH_IDX(idx
, HASHMAP_BASE(other
), i
) {
1629 struct set_entry
*se
= set_bucket_at(other
, idx
);
1632 r
= set_put(s
, se
->b
.key
);
1640 int _hashmap_reserve(HashmapBase
*h
, unsigned entries_add
) {
1645 r
= resize_buckets(h
, entries_add
);
1653 * The same as hashmap_merge(), but every new item from other is moved to h.
1654 * Keys already in h are skipped and stay in other.
1655 * Returns: 0 on success.
1656 * -ENOMEM on alloc failure, in which case no move has been done.
1658 int _hashmap_move(HashmapBase
*h
, HashmapBase
*other
) {
1659 struct swap_entries swap
;
1660 struct hashmap_base_entry
*e
, *n
;
1670 assert(other
->type
== h
->type
);
1673 * This reserves buckets for the worst case, where none of other's
1674 * entries are yet present in h. This is preferable to risking
1675 * an allocation failure in the middle of the moving and having to
1676 * rollback or return a partial result.
1678 r
= resize_buckets(h
, n_entries(other
));
1682 HASHMAP_FOREACH_IDX(idx
, other
, i
) {
1685 e
= bucket_at(other
, idx
);
1686 h_hash
= bucket_hash(h
, e
->key
);
1687 if (bucket_scan(h
, h_hash
, e
->key
) != IDX_NIL
)
1690 n
= &bucket_at_swap(&swap
, IDX_PUT
)->p
.b
;
1692 if (h
->type
!= HASHMAP_TYPE_SET
)
1693 ((struct plain_hashmap_entry
*) n
)->value
=
1694 ((struct plain_hashmap_entry
*) e
)->value
;
1695 assert_se(hashmap_put_boldly(h
, h_hash
, &swap
, false) == 1);
1697 remove_entry(other
, idx
);
1703 int _hashmap_move_one(HashmapBase
*h
, HashmapBase
*other
, const void *key
) {
1704 struct swap_entries swap
;
1705 unsigned h_hash
, other_hash
, idx
;
1706 struct hashmap_base_entry
*e
, *n
;
1711 h_hash
= bucket_hash(h
, key
);
1712 if (bucket_scan(h
, h_hash
, key
) != IDX_NIL
)
1718 assert(other
->type
== h
->type
);
1720 other_hash
= bucket_hash(other
, key
);
1721 idx
= bucket_scan(other
, other_hash
, key
);
1725 e
= bucket_at(other
, idx
);
1727 n
= &bucket_at_swap(&swap
, IDX_PUT
)->p
.b
;
1729 if (h
->type
!= HASHMAP_TYPE_SET
)
1730 ((struct plain_hashmap_entry
*) n
)->value
=
1731 ((struct plain_hashmap_entry
*) e
)->value
;
1732 r
= hashmap_put_boldly(h
, h_hash
, &swap
, true);
1736 remove_entry(other
, idx
);
1740 HashmapBase
* _hashmap_copy(HashmapBase
*h HASHMAP_DEBUG_PARAMS
) {
1746 copy
= hashmap_base_new(h
->hash_ops
, h
->type HASHMAP_DEBUG_PASS_ARGS
);
1751 case HASHMAP_TYPE_PLAIN
:
1752 case HASHMAP_TYPE_ORDERED
:
1753 r
= hashmap_merge((Hashmap
*)copy
, (Hashmap
*)h
);
1755 case HASHMAP_TYPE_SET
:
1756 r
= set_merge((Set
*)copy
, (Set
*)h
);
1759 assert_not_reached();
1763 return _hashmap_free(copy
, NULL
, NULL
);
1768 char** _hashmap_get_strv(HashmapBase
*h
) {
1774 return new0(char*, 1);
1776 sv
= new(char*, n_entries(h
)+1);
1781 HASHMAP_FOREACH_IDX(idx
, h
, i
)
1782 sv
[n
++] = entry_value(h
, bucket_at(h
, idx
));
1788 void* ordered_hashmap_next(OrderedHashmap
*h
, const void *key
) {
1789 struct ordered_hashmap_entry
*e
;
1795 hash
= bucket_hash(h
, key
);
1796 idx
= bucket_scan(h
, hash
, key
);
1800 e
= ordered_bucket_at(h
, idx
);
1801 if (e
->iterate_next
== IDX_NIL
)
1803 return ordered_bucket_at(h
, e
->iterate_next
)->p
.value
;
1806 int set_consume(Set
*s
, void *value
) {
1812 r
= set_put(s
, value
);
1819 int _hashmap_put_strdup_full(Hashmap
**h
, const struct hash_ops
*hash_ops
, const char *k
, const char *v HASHMAP_DEBUG_PARAMS
) {
1822 r
= _hashmap_ensure_allocated(h
, hash_ops HASHMAP_DEBUG_PASS_ARGS
);
1826 _cleanup_free_
char *kdup
= NULL
, *vdup
= NULL
;
1838 r
= hashmap_put(*h
, kdup
, vdup
);
1840 if (r
== -EEXIST
&& streq_ptr(v
, hashmap_get(*h
, kdup
)))
1845 /* 0 with non-null vdup would mean vdup is already in the hashmap, which cannot be */
1846 assert(vdup
== NULL
|| r
> 0);
1853 int _set_put_strndup_full(Set
**s
, const struct hash_ops
*hash_ops
, const char *p
, size_t n HASHMAP_DEBUG_PARAMS
) {
1860 r
= _set_ensure_allocated(s
, hash_ops HASHMAP_DEBUG_PASS_ARGS
);
1864 if (n
== SIZE_MAX
) {
1865 if (set_contains(*s
, (char*) p
))
1874 return set_consume(*s
, c
);
1877 int _set_put_strdupv_full(Set
**s
, const struct hash_ops
*hash_ops
, char **l HASHMAP_DEBUG_PARAMS
) {
1882 STRV_FOREACH(i
, l
) {
1883 r
= _set_put_strndup_full(s
, hash_ops
, *i
, SIZE_MAX HASHMAP_DEBUG_PASS_ARGS
);
1893 int set_put_strsplit(Set
*s
, const char *v
, const char *separators
, ExtractFlags flags
) {
1894 const char *p
= ASSERT_PTR(v
);
1902 r
= extract_first_word(&p
, &word
, separators
, flags
);
1906 r
= set_consume(s
, word
);
1912 /* expand the cachemem if needed, return true if newly (re)activated. */
1913 static int cachemem_maintain(CacheMem
*mem
, size_t size
) {
1916 if (!GREEDY_REALLOC(mem
->ptr
, size
)) {
1929 int iterated_cache_get(IteratedCache
*cache
, const void ***res_keys
, const void ***res_values
, unsigned *res_n_entries
) {
1930 bool sync_keys
= false, sync_values
= false;
1935 assert(cache
->hashmap
);
1937 size
= n_entries(cache
->hashmap
);
1940 r
= cachemem_maintain(&cache
->keys
, size
);
1946 cache
->keys
.active
= false;
1949 r
= cachemem_maintain(&cache
->values
, size
);
1955 cache
->values
.active
= false;
1957 if (cache
->hashmap
->dirty
) {
1958 if (cache
->keys
.active
)
1960 if (cache
->values
.active
)
1963 cache
->hashmap
->dirty
= false;
1966 if (sync_keys
|| sync_values
) {
1971 HASHMAP_FOREACH_IDX(idx
, cache
->hashmap
, iter
) {
1972 struct hashmap_base_entry
*e
;
1974 e
= bucket_at(cache
->hashmap
, idx
);
1977 cache
->keys
.ptr
[i
] = e
->key
;
1979 cache
->values
.ptr
[i
] = entry_value(cache
->hashmap
, e
);
1985 *res_keys
= cache
->keys
.ptr
;
1987 *res_values
= cache
->values
.ptr
;
1989 *res_n_entries
= size
;
1994 IteratedCache
* iterated_cache_free(IteratedCache
*cache
) {
1996 free(cache
->keys
.ptr
);
1997 free(cache
->values
.ptr
);
2000 return mfree(cache
);
2003 int set_strjoin(Set
*s
, const char *separator
, bool wrap_with_separator
, char **ret
) {
2004 _cleanup_free_
char *str
= NULL
;
2005 size_t separator_len
, len
= 0;
2011 if (set_isempty(s
)) {
2016 separator_len
= strlen_ptr(separator
);
2018 if (separator_len
== 0)
2019 wrap_with_separator
= false;
2021 first
= !wrap_with_separator
;
2023 SET_FOREACH(value
, s
) {
2024 size_t l
= strlen_ptr(value
);
2029 if (!GREEDY_REALLOC(str
, len
+ l
+ (first
? 0 : separator_len
) + (wrap_with_separator
? separator_len
: 0) + 1))
2032 if (separator_len
> 0 && !first
) {
2033 memcpy(str
+ len
, separator
, separator_len
);
2034 len
+= separator_len
;
2037 memcpy(str
+ len
, value
, l
);
2042 if (wrap_with_separator
) {
2043 memcpy(str
+ len
, separator
, separator_len
);
2044 len
+= separator_len
;
2049 *ret
= TAKE_PTR(str
);
2053 bool set_equal(Set
*a
, Set
*b
) {
2056 /* Checks whether each entry of 'a' is also in 'b' and vice versa, i.e. the two sets contain the same
2062 if (set_isempty(a
) && set_isempty(b
))
2065 if (set_size(a
) != set_size(b
)) /* Cheap check that hopefully catches a lot of inequality cases
2070 if (!set_contains(b
, p
))
2073 /* If we have the same hashops, then we don't need to check things backwards given we compared the
2074 * size and that all of a is in b. */
2075 if (a
->b
.hash_ops
== b
->b
.hash_ops
)
2079 if (!set_contains(a
, p
))
2085 static bool set_fnmatch_one(Set
*patterns
, const char *needle
) {
2090 /* Any failure of fnmatch() is treated as equivalent to FNM_NOMATCH, i.e. as non-matching pattern */
2092 SET_FOREACH(p
, patterns
)
2093 if (fnmatch(p
, needle
, 0) == 0)
2099 bool set_fnmatch(Set
*include_patterns
, Set
*exclude_patterns
, const char *needle
) {
2102 if (set_fnmatch_one(exclude_patterns
, needle
))
2105 if (set_isempty(include_patterns
))
2108 return set_fnmatch_one(include_patterns
, needle
);
2111 static int hashmap_entry_compare(
2112 struct hashmap_base_entry
* const *a
,
2113 struct hashmap_base_entry
* const *b
,
2114 compare_func_t compare
) {
2120 return compare((*a
)->key
, (*b
)->key
);
2123 int _hashmap_dump_sorted(HashmapBase
*h
, void ***ret
, size_t *ret_n
) {
2124 _cleanup_free_
struct hashmap_base_entry
**entries
= NULL
;
2131 if (_hashmap_size(h
) == 0) {
2138 /* We append one more element than needed so that the resulting array can be used as a strv. We
2139 * don't count this entry in the returned size. */
2140 entries
= new(struct hashmap_base_entry
*, _hashmap_size(h
) + 1);
2144 HASHMAP_FOREACH_IDX(idx
, h
, iter
)
2145 entries
[n
++] = bucket_at(h
, idx
);
2147 assert(n
== _hashmap_size(h
));
2150 typesafe_qsort_r(entries
, n
, hashmap_entry_compare
, h
->hash_ops
->compare
);
2152 /* Reuse the array. */
2153 FOREACH_ARRAY(e
, entries
, n
)
2154 *e
= entry_value(h
, *e
);
2156 *ret
= (void**) TAKE_PTR(entries
);