]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/hashmap.c
strv: make iterator in STRV_FOREACH() declaread in the loop
[thirdparty/systemd.git] / src / basic / hashmap.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <pthread.h>
5 #include <stdint.h>
6 #include <stdlib.h>
7
8 #include "alloc-util.h"
9 #include "fileio.h"
10 #include "hashmap.h"
11 #include "macro.h"
12 #include "memory-util.h"
13 #include "mempool.h"
14 #include "missing_syscall.h"
15 #include "process-util.h"
16 #include "random-util.h"
17 #include "set.h"
18 #include "siphash24.h"
19 #include "string-util.h"
20 #include "strv.h"
21
22 #if ENABLE_DEBUG_HASHMAP
23 #include "list.h"
24 #endif
25
26 /*
27 * Implementation of hashmaps.
28 * Addressing: open
29 * - uses less RAM compared to closed addressing (chaining), because
30 * our entries are small (especially in Sets, which tend to contain
31 * the majority of entries in systemd).
32 * Collision resolution: Robin Hood
33 * - tends to equalize displacement of entries from their optimal buckets.
34 * Probe sequence: linear
35 * - though theoretically worse than random probing/uniform hashing/double
36 * hashing, it is good for cache locality.
37 *
38 * References:
39 * Celis, P. 1986. Robin Hood Hashing.
40 * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
41 * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
42 * - The results are derived for random probing. Suggests deletion with
43 * tombstones and two mean-centered search methods. None of that works
44 * well for linear probing.
45 *
46 * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
47 * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
48 * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
49 * http://www.math.uu.se/~svante/papers/sj157.pdf
50 * - Applies to Robin Hood with linear probing. Contains remarks on
51 * the unsuitability of mean-centered search with linear probing.
52 *
53 * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
54 * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
55 * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
56 * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
57 * in a successful search), and Janson writes about displacement. C = d + 1.
58 *
59 * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
60 * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
61 * - Explanation of backward shift deletion with pictures.
62 *
63 * Khuong, P. 2013. The Other Robin Hood Hashing.
64 * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
65 * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
66 */
67
68 /*
69 * XXX Ideas for improvement:
70 * For unordered hashmaps, randomize iteration order, similarly to Perl:
71 * http://blog.booking.com/hardening-perls-hash-function.html
72 */
73
74 /* INV_KEEP_FREE = 1 / (1 - max_load_factor)
75 * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
76 #define INV_KEEP_FREE 5U
77
78 /* Fields common to entries of all hashmap/set types */
79 struct hashmap_base_entry {
80 const void *key;
81 };
82
83 /* Entry types for specific hashmap/set types
84 * hashmap_base_entry must be at the beginning of each entry struct. */
85
86 struct plain_hashmap_entry {
87 struct hashmap_base_entry b;
88 void *value;
89 };
90
91 struct ordered_hashmap_entry {
92 struct plain_hashmap_entry p;
93 unsigned iterate_next, iterate_previous;
94 };
95
96 struct set_entry {
97 struct hashmap_base_entry b;
98 };
99
100 /* In several functions it is advantageous to have the hash table extended
101 * virtually by a couple of additional buckets. We reserve special index values
102 * for these "swap" buckets. */
103 #define _IDX_SWAP_BEGIN (UINT_MAX - 3)
104 #define IDX_PUT (_IDX_SWAP_BEGIN + 0)
105 #define IDX_TMP (_IDX_SWAP_BEGIN + 1)
106 #define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2)
107
108 #define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */
109 #define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */
110
111 assert_cc(IDX_FIRST == _IDX_SWAP_END);
112 assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST);
113
114 /* Storage space for the "swap" buckets.
115 * All entry types can fit into an ordered_hashmap_entry. */
116 struct swap_entries {
117 struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN];
118 };
119
120 /* Distance from Initial Bucket */
121 typedef uint8_t dib_raw_t;
122 #define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */
123 #define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */
124 #define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */
125 #define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
126
127 #define DIB_FREE UINT_MAX
128
129 #if ENABLE_DEBUG_HASHMAP
130 struct hashmap_debug_info {
131 LIST_FIELDS(struct hashmap_debug_info, debug_list);
132 unsigned max_entries; /* high watermark of n_entries */
133
134 /* who allocated this hashmap */
135 int line;
136 const char *file;
137 const char *func;
138
139 /* fields to detect modification while iterating */
140 unsigned put_count; /* counts puts into the hashmap */
141 unsigned rem_count; /* counts removals from hashmap */
142 unsigned last_rem_idx; /* remembers last removal index */
143 };
144
145 /* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
146 static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list);
147 static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER;
148 #endif
149
150 enum HashmapType {
151 HASHMAP_TYPE_PLAIN,
152 HASHMAP_TYPE_ORDERED,
153 HASHMAP_TYPE_SET,
154 _HASHMAP_TYPE_MAX
155 };
156
157 struct _packed_ indirect_storage {
158 void *storage; /* where buckets and DIBs are stored */
159 uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */
160
161 unsigned n_entries; /* number of stored entries */
162 unsigned n_buckets; /* number of buckets */
163
164 unsigned idx_lowest_entry; /* Index below which all buckets are free.
165 Makes "while(hashmap_steal_first())" loops
166 O(n) instead of O(n^2) for unordered hashmaps. */
167 uint8_t _pad[3]; /* padding for the whole HashmapBase */
168 /* The bitfields in HashmapBase complete the alignment of the whole thing. */
169 };
170
171 struct direct_storage {
172 /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit.
173 * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit,
174 * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */
175 uint8_t storage[sizeof(struct indirect_storage)];
176 };
177
178 #define DIRECT_BUCKETS(entry_t) \
179 (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
180
181 /* We should be able to store at least one entry directly. */
182 assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1);
183
184 /* We have 3 bits for n_direct_entries. */
185 assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3));
186
187 /* Hashmaps with directly stored entries all use this shared hash key.
188 * It's no big deal if the key is guessed, because there can be only
189 * a handful of directly stored entries in a hashmap. When a hashmap
190 * outgrows direct storage, it gets its own key for indirect storage. */
191 static uint8_t shared_hash_key[HASH_KEY_SIZE];
192
193 /* Fields that all hashmap/set types must have */
194 struct HashmapBase {
195 const struct hash_ops *hash_ops; /* hash and compare ops to use */
196
197 union _packed_ {
198 struct indirect_storage indirect; /* if has_indirect */
199 struct direct_storage direct; /* if !has_indirect */
200 };
201
202 enum HashmapType type:2; /* HASHMAP_TYPE_* */
203 bool has_indirect:1; /* whether indirect storage is used */
204 unsigned n_direct_entries:3; /* Number of entries in direct storage.
205 * Only valid if !has_indirect. */
206 bool from_pool:1; /* whether was allocated from mempool */
207 bool dirty:1; /* whether dirtied since last iterated_cache_get() */
208 bool cached:1; /* whether this hashmap is being cached */
209
210 #if ENABLE_DEBUG_HASHMAP
211 struct hashmap_debug_info debug;
212 #endif
213 };
214
215 /* Specific hash types
216 * HashmapBase must be at the beginning of each hashmap struct. */
217
218 struct Hashmap {
219 struct HashmapBase b;
220 };
221
222 struct OrderedHashmap {
223 struct HashmapBase b;
224 unsigned iterate_list_head, iterate_list_tail;
225 };
226
227 struct Set {
228 struct HashmapBase b;
229 };
230
231 typedef struct CacheMem {
232 const void **ptr;
233 size_t n_populated;
234 bool active:1;
235 } CacheMem;
236
237 struct IteratedCache {
238 HashmapBase *hashmap;
239 CacheMem keys, values;
240 };
241
242 DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8);
243 DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8);
244 /* No need for a separate Set pool */
245 assert_cc(sizeof(Hashmap) == sizeof(Set));
246
247 struct hashmap_type_info {
248 size_t head_size;
249 size_t entry_size;
250 struct mempool *mempool;
251 unsigned n_direct_buckets;
252 };
253
254 static _used_ const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = {
255 [HASHMAP_TYPE_PLAIN] = {
256 .head_size = sizeof(Hashmap),
257 .entry_size = sizeof(struct plain_hashmap_entry),
258 .mempool = &hashmap_pool,
259 .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry),
260 },
261 [HASHMAP_TYPE_ORDERED] = {
262 .head_size = sizeof(OrderedHashmap),
263 .entry_size = sizeof(struct ordered_hashmap_entry),
264 .mempool = &ordered_hashmap_pool,
265 .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry),
266 },
267 [HASHMAP_TYPE_SET] = {
268 .head_size = sizeof(Set),
269 .entry_size = sizeof(struct set_entry),
270 .mempool = &hashmap_pool,
271 .n_direct_buckets = DIRECT_BUCKETS(struct set_entry),
272 },
273 };
274
275 #if VALGRIND
276 _destructor_ static void cleanup_pools(void) {
277 _cleanup_free_ char *t = NULL;
278 int r;
279
280 /* Be nice to valgrind */
281
282 /* The pool is only allocated by the main thread, but the memory can
283 * be passed to other threads. Let's clean up if we are the main thread
284 * and no other threads are live. */
285 /* We build our own is_main_thread() here, which doesn't use C11
286 * TLS based caching of the result. That's because valgrind apparently
287 * doesn't like malloc() (which C11 TLS internally uses) to be called
288 * from a GCC destructors. */
289 if (getpid() != gettid())
290 return;
291
292 r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t);
293 if (r < 0 || !streq(t, "1"))
294 return;
295
296 mempool_drop(&hashmap_pool);
297 mempool_drop(&ordered_hashmap_pool);
298 }
299 #endif
300
301 static unsigned n_buckets(HashmapBase *h) {
302 return h->has_indirect ? h->indirect.n_buckets
303 : hashmap_type_info[h->type].n_direct_buckets;
304 }
305
306 static unsigned n_entries(HashmapBase *h) {
307 return h->has_indirect ? h->indirect.n_entries
308 : h->n_direct_entries;
309 }
310
311 static void n_entries_inc(HashmapBase *h) {
312 if (h->has_indirect)
313 h->indirect.n_entries++;
314 else
315 h->n_direct_entries++;
316 }
317
318 static void n_entries_dec(HashmapBase *h) {
319 if (h->has_indirect)
320 h->indirect.n_entries--;
321 else
322 h->n_direct_entries--;
323 }
324
325 static void* storage_ptr(HashmapBase *h) {
326 return h->has_indirect ? h->indirect.storage
327 : h->direct.storage;
328 }
329
330 static uint8_t* hash_key(HashmapBase *h) {
331 return h->has_indirect ? h->indirect.hash_key
332 : shared_hash_key;
333 }
334
335 static unsigned base_bucket_hash(HashmapBase *h, const void *p) {
336 struct siphash state;
337 uint64_t hash;
338
339 siphash24_init(&state, hash_key(h));
340
341 h->hash_ops->hash(p, &state);
342
343 hash = siphash24_finalize(&state);
344
345 return (unsigned) (hash % n_buckets(h));
346 }
347 #define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
348
349 static void base_set_dirty(HashmapBase *h) {
350 h->dirty = true;
351 }
352 #define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h))
353
354 static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
355 static uint8_t current[HASH_KEY_SIZE];
356 static bool current_initialized = false;
357
358 /* Returns a hash function key to use. In order to keep things
359 * fast we will not generate a new key each time we allocate a
360 * new hash table. Instead, we'll just reuse the most recently
361 * generated one, except if we never generated one or when we
362 * are rehashing an entire hash table because we reached a
363 * fill level */
364
365 if (!current_initialized || !reuse_is_ok) {
366 random_bytes(current, sizeof(current));
367 current_initialized = true;
368 }
369
370 memcpy(hash_key, current, sizeof(current));
371 }
372
373 static struct hashmap_base_entry* bucket_at(HashmapBase *h, unsigned idx) {
374 return (struct hashmap_base_entry*)
375 ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size);
376 }
377
378 static struct plain_hashmap_entry* plain_bucket_at(Hashmap *h, unsigned idx) {
379 return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
380 }
381
382 static struct ordered_hashmap_entry* ordered_bucket_at(OrderedHashmap *h, unsigned idx) {
383 return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
384 }
385
386 static struct set_entry *set_bucket_at(Set *h, unsigned idx) {
387 return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx);
388 }
389
390 static struct ordered_hashmap_entry* bucket_at_swap(struct swap_entries *swap, unsigned idx) {
391 return &swap->e[idx - _IDX_SWAP_BEGIN];
392 }
393
394 /* Returns a pointer to the bucket at index idx.
395 * Understands real indexes and swap indexes, hence "_virtual". */
396 static struct hashmap_base_entry* bucket_at_virtual(HashmapBase *h, struct swap_entries *swap,
397 unsigned idx) {
398 if (idx < _IDX_SWAP_BEGIN)
399 return bucket_at(h, idx);
400
401 if (idx < _IDX_SWAP_END)
402 return &bucket_at_swap(swap, idx)->p.b;
403
404 assert_not_reached();
405 }
406
407 static dib_raw_t* dib_raw_ptr(HashmapBase *h) {
408 return (dib_raw_t*)
409 ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h));
410 }
411
412 static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) {
413 return idx >= from ? idx - from
414 : n_buckets(h) + idx - from;
415 }
416
417 static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) {
418 unsigned initial_bucket;
419
420 if (raw_dib == DIB_RAW_FREE)
421 return DIB_FREE;
422
423 if (_likely_(raw_dib < DIB_RAW_OVERFLOW))
424 return raw_dib;
425
426 /*
427 * Having an overflow DIB value is very unlikely. The hash function
428 * would have to be bad. For example, in a table of size 2^24 filled
429 * to load factor 0.9 the maximum observed DIB is only about 60.
430 * In theory (assuming I used Maxima correctly), for an infinite size
431 * hash table with load factor 0.8 the probability of a given entry
432 * having DIB > 40 is 1.9e-8.
433 * This returns the correct DIB value by recomputing the hash value in
434 * the unlikely case. XXX Hitting this case could be a hint to rehash.
435 */
436 initial_bucket = bucket_hash(h, bucket_at(h, idx)->key);
437 return bucket_distance(h, idx, initial_bucket);
438 }
439
440 static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) {
441 dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE;
442 }
443
444 static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) {
445 dib_raw_t *dibs;
446
447 dibs = dib_raw_ptr(h);
448
449 for ( ; idx < n_buckets(h); idx++)
450 if (dibs[idx] != DIB_RAW_FREE)
451 return idx;
452
453 return IDX_NIL;
454 }
455
456 static void bucket_mark_free(HashmapBase *h, unsigned idx) {
457 memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size);
458 bucket_set_dib(h, idx, DIB_FREE);
459 }
460
461 static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap,
462 unsigned from, unsigned to) {
463 struct hashmap_base_entry *e_from, *e_to;
464
465 assert(from != to);
466
467 e_from = bucket_at_virtual(h, swap, from);
468 e_to = bucket_at_virtual(h, swap, to);
469
470 memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size);
471
472 if (h->type == HASHMAP_TYPE_ORDERED) {
473 OrderedHashmap *lh = (OrderedHashmap*) h;
474 struct ordered_hashmap_entry *le, *le_to;
475
476 le_to = (struct ordered_hashmap_entry*) e_to;
477
478 if (le_to->iterate_next != IDX_NIL) {
479 le = (struct ordered_hashmap_entry*)
480 bucket_at_virtual(h, swap, le_to->iterate_next);
481 le->iterate_previous = to;
482 }
483
484 if (le_to->iterate_previous != IDX_NIL) {
485 le = (struct ordered_hashmap_entry*)
486 bucket_at_virtual(h, swap, le_to->iterate_previous);
487 le->iterate_next = to;
488 }
489
490 if (lh->iterate_list_head == from)
491 lh->iterate_list_head = to;
492 if (lh->iterate_list_tail == from)
493 lh->iterate_list_tail = to;
494 }
495 }
496
497 static unsigned next_idx(HashmapBase *h, unsigned idx) {
498 return (idx + 1U) % n_buckets(h);
499 }
500
501 static unsigned prev_idx(HashmapBase *h, unsigned idx) {
502 return (n_buckets(h) + idx - 1U) % n_buckets(h);
503 }
504
505 static void* entry_value(HashmapBase *h, struct hashmap_base_entry *e) {
506 switch (h->type) {
507
508 case HASHMAP_TYPE_PLAIN:
509 case HASHMAP_TYPE_ORDERED:
510 return ((struct plain_hashmap_entry*)e)->value;
511
512 case HASHMAP_TYPE_SET:
513 return (void*) e->key;
514
515 default:
516 assert_not_reached();
517 }
518 }
519
520 static void base_remove_entry(HashmapBase *h, unsigned idx) {
521 unsigned left, right, prev, dib;
522 dib_raw_t raw_dib, *dibs;
523
524 dibs = dib_raw_ptr(h);
525 assert(dibs[idx] != DIB_RAW_FREE);
526
527 #if ENABLE_DEBUG_HASHMAP
528 h->debug.rem_count++;
529 h->debug.last_rem_idx = idx;
530 #endif
531
532 left = idx;
533 /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
534 for (right = next_idx(h, left); ; right = next_idx(h, right)) {
535 raw_dib = dibs[right];
536 if (IN_SET(raw_dib, 0, DIB_RAW_FREE))
537 break;
538
539 /* The buckets are not supposed to be all occupied and with DIB > 0.
540 * That would mean we could make everyone better off by shifting them
541 * backward. This scenario is impossible. */
542 assert(left != right);
543 }
544
545 if (h->type == HASHMAP_TYPE_ORDERED) {
546 OrderedHashmap *lh = (OrderedHashmap*) h;
547 struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx);
548
549 if (le->iterate_next != IDX_NIL)
550 ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous;
551 else
552 lh->iterate_list_tail = le->iterate_previous;
553
554 if (le->iterate_previous != IDX_NIL)
555 ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next;
556 else
557 lh->iterate_list_head = le->iterate_next;
558 }
559
560 /* Now shift all buckets in the interval (left, right) one step backwards */
561 for (prev = left, left = next_idx(h, left); left != right;
562 prev = left, left = next_idx(h, left)) {
563 dib = bucket_calculate_dib(h, left, dibs[left]);
564 assert(dib != 0);
565 bucket_move_entry(h, NULL, left, prev);
566 bucket_set_dib(h, prev, dib - 1);
567 }
568
569 bucket_mark_free(h, prev);
570 n_entries_dec(h);
571 base_set_dirty(h);
572 }
573 #define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
574
575 static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) {
576 struct ordered_hashmap_entry *e;
577 unsigned idx;
578
579 assert(h);
580 assert(i);
581
582 if (i->idx == IDX_NIL)
583 goto at_end;
584
585 if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL)
586 goto at_end;
587
588 if (i->idx == IDX_FIRST) {
589 idx = h->iterate_list_head;
590 e = ordered_bucket_at(h, idx);
591 } else {
592 idx = i->idx;
593 e = ordered_bucket_at(h, idx);
594 /*
595 * We allow removing the current entry while iterating, but removal may cause
596 * a backward shift. The next entry may thus move one bucket to the left.
597 * To detect when it happens, we remember the key pointer of the entry we were
598 * going to iterate next. If it does not match, there was a backward shift.
599 */
600 if (e->p.b.key != i->next_key) {
601 idx = prev_idx(HASHMAP_BASE(h), idx);
602 e = ordered_bucket_at(h, idx);
603 }
604 assert(e->p.b.key == i->next_key);
605 }
606
607 #if ENABLE_DEBUG_HASHMAP
608 i->prev_idx = idx;
609 #endif
610
611 if (e->iterate_next != IDX_NIL) {
612 struct ordered_hashmap_entry *n;
613 i->idx = e->iterate_next;
614 n = ordered_bucket_at(h, i->idx);
615 i->next_key = n->p.b.key;
616 } else
617 i->idx = IDX_NIL;
618
619 return idx;
620
621 at_end:
622 i->idx = IDX_NIL;
623 return IDX_NIL;
624 }
625
626 static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) {
627 unsigned idx;
628
629 assert(h);
630 assert(i);
631
632 if (i->idx == IDX_NIL)
633 goto at_end;
634
635 if (i->idx == IDX_FIRST) {
636 /* fast forward to the first occupied bucket */
637 if (h->has_indirect) {
638 i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry);
639 h->indirect.idx_lowest_entry = i->idx;
640 } else
641 i->idx = skip_free_buckets(h, 0);
642
643 if (i->idx == IDX_NIL)
644 goto at_end;
645 } else {
646 struct hashmap_base_entry *e;
647
648 assert(i->idx > 0);
649
650 e = bucket_at(h, i->idx);
651 /*
652 * We allow removing the current entry while iterating, but removal may cause
653 * a backward shift. The next entry may thus move one bucket to the left.
654 * To detect when it happens, we remember the key pointer of the entry we were
655 * going to iterate next. If it does not match, there was a backward shift.
656 */
657 if (e->key != i->next_key)
658 e = bucket_at(h, --i->idx);
659
660 assert(e->key == i->next_key);
661 }
662
663 idx = i->idx;
664 #if ENABLE_DEBUG_HASHMAP
665 i->prev_idx = idx;
666 #endif
667
668 i->idx = skip_free_buckets(h, i->idx + 1);
669 if (i->idx != IDX_NIL)
670 i->next_key = bucket_at(h, i->idx)->key;
671 else
672 i->idx = IDX_NIL;
673
674 return idx;
675
676 at_end:
677 i->idx = IDX_NIL;
678 return IDX_NIL;
679 }
680
681 static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) {
682 if (!h) {
683 i->idx = IDX_NIL;
684 return IDX_NIL;
685 }
686
687 #if ENABLE_DEBUG_HASHMAP
688 if (i->idx == IDX_FIRST) {
689 i->put_count = h->debug.put_count;
690 i->rem_count = h->debug.rem_count;
691 } else {
692 /* While iterating, must not add any new entries */
693 assert(i->put_count == h->debug.put_count);
694 /* ... or remove entries other than the current one */
695 assert(i->rem_count == h->debug.rem_count ||
696 (i->rem_count == h->debug.rem_count - 1 &&
697 i->prev_idx == h->debug.last_rem_idx));
698 /* Reset our removals counter */
699 i->rem_count = h->debug.rem_count;
700 }
701 #endif
702
703 return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i)
704 : hashmap_iterate_in_internal_order(h, i);
705 }
706
707 bool _hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) {
708 struct hashmap_base_entry *e;
709 void *data;
710 unsigned idx;
711
712 idx = hashmap_iterate_entry(h, i);
713 if (idx == IDX_NIL) {
714 if (value)
715 *value = NULL;
716 if (key)
717 *key = NULL;
718
719 return false;
720 }
721
722 e = bucket_at(h, idx);
723 data = entry_value(h, e);
724 if (value)
725 *value = data;
726 if (key)
727 *key = e->key;
728
729 return true;
730 }
731
732 #define HASHMAP_FOREACH_IDX(idx, h, i) \
733 for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
734 (idx != IDX_NIL); \
735 (idx) = hashmap_iterate_entry((h), &(i)))
736
737 IteratedCache* _hashmap_iterated_cache_new(HashmapBase *h) {
738 IteratedCache *cache;
739
740 assert(h);
741 assert(!h->cached);
742
743 if (h->cached)
744 return NULL;
745
746 cache = new0(IteratedCache, 1);
747 if (!cache)
748 return NULL;
749
750 cache->hashmap = h;
751 h->cached = true;
752
753 return cache;
754 }
755
756 static void reset_direct_storage(HashmapBase *h) {
757 const struct hashmap_type_info *hi = &hashmap_type_info[h->type];
758 void *p;
759
760 assert(!h->has_indirect);
761
762 p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets);
763 memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets);
764 }
765
766 static void shared_hash_key_initialize(void) {
767 random_bytes(shared_hash_key, sizeof(shared_hash_key));
768 }
769
770 static struct HashmapBase* hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) {
771 HashmapBase *h;
772 const struct hashmap_type_info *hi = &hashmap_type_info[type];
773 bool up;
774
775 up = mempool_enabled();
776
777 h = up ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size);
778 if (!h)
779 return NULL;
780
781 h->type = type;
782 h->from_pool = up;
783 h->hash_ops = hash_ops ?: &trivial_hash_ops;
784
785 if (type == HASHMAP_TYPE_ORDERED) {
786 OrderedHashmap *lh = (OrderedHashmap*)h;
787 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
788 }
789
790 reset_direct_storage(h);
791
792 static pthread_once_t once = PTHREAD_ONCE_INIT;
793 assert_se(pthread_once(&once, shared_hash_key_initialize) == 0);
794
795 #if ENABLE_DEBUG_HASHMAP
796 h->debug.func = func;
797 h->debug.file = file;
798 h->debug.line = line;
799 assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
800 LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug);
801 assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
802 #endif
803
804 return h;
805 }
806
807 Hashmap *_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
808 return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
809 }
810
811 OrderedHashmap *_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
812 return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
813 }
814
815 Set *_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
816 return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
817 }
818
819 static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops,
820 enum HashmapType type HASHMAP_DEBUG_PARAMS) {
821 HashmapBase *q;
822
823 assert(h);
824
825 if (*h)
826 return 0;
827
828 q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS);
829 if (!q)
830 return -ENOMEM;
831
832 *h = q;
833 return 1;
834 }
835
836 int _hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
837 return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
838 }
839
840 int _ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
841 return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
842 }
843
844 int _set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
845 return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
846 }
847
848 int _hashmap_ensure_put(Hashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS) {
849 int r;
850
851 r = _hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS);
852 if (r < 0)
853 return r;
854
855 return hashmap_put(*h, key, value);
856 }
857
858 int _ordered_hashmap_ensure_put(OrderedHashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS) {
859 int r;
860
861 r = _ordered_hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS);
862 if (r < 0)
863 return r;
864
865 return ordered_hashmap_put(*h, key, value);
866 }
867
868 static void hashmap_free_no_clear(HashmapBase *h) {
869 assert(!h->has_indirect);
870 assert(h->n_direct_entries == 0);
871
872 #if ENABLE_DEBUG_HASHMAP
873 assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
874 LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug);
875 assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
876 #endif
877
878 if (h->from_pool) {
879 /* Ensure that the object didn't get migrated between threads. */
880 assert_se(is_main_thread());
881 mempool_free_tile(hashmap_type_info[h->type].mempool, h);
882 } else
883 free(h);
884 }
885
886 HashmapBase* _hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
887 if (h) {
888 _hashmap_clear(h, default_free_key, default_free_value);
889 hashmap_free_no_clear(h);
890 }
891
892 return NULL;
893 }
894
895 void _hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
896 free_func_t free_key, free_value;
897 if (!h)
898 return;
899
900 free_key = h->hash_ops->free_key ?: default_free_key;
901 free_value = h->hash_ops->free_value ?: default_free_value;
902
903 if (free_key || free_value) {
904
905 /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the
906 * hash table, and only then call the destructor functions. If these destructors then try to unregister
907 * themselves from our hash table a second time, the entry is already gone. */
908
909 while (_hashmap_size(h) > 0) {
910 void *k = NULL;
911 void *v;
912
913 v = _hashmap_first_key_and_value(h, true, &k);
914
915 if (free_key)
916 free_key(k);
917
918 if (free_value)
919 free_value(v);
920 }
921 }
922
923 if (h->has_indirect) {
924 free(h->indirect.storage);
925 h->has_indirect = false;
926 }
927
928 h->n_direct_entries = 0;
929 reset_direct_storage(h);
930
931 if (h->type == HASHMAP_TYPE_ORDERED) {
932 OrderedHashmap *lh = (OrderedHashmap*) h;
933 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
934 }
935
936 base_set_dirty(h);
937 }
938
939 static int resize_buckets(HashmapBase *h, unsigned entries_add);
940
941 /*
942 * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
943 * Performs Robin Hood swaps as it goes. The entry to put must be placed
944 * by the caller into swap slot IDX_PUT.
945 * If used for in-place resizing, may leave a displaced entry in swap slot
946 * IDX_PUT. Caller must rehash it next.
947 * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
948 * false otherwise.
949 */
950 static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx,
951 struct swap_entries *swap) {
952 dib_raw_t raw_dib, *dibs;
953 unsigned dib, distance;
954
955 #if ENABLE_DEBUG_HASHMAP
956 h->debug.put_count++;
957 #endif
958
959 dibs = dib_raw_ptr(h);
960
961 for (distance = 0; ; distance++) {
962 raw_dib = dibs[idx];
963 if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) {
964 if (raw_dib == DIB_RAW_REHASH)
965 bucket_move_entry(h, swap, idx, IDX_TMP);
966
967 if (h->has_indirect && h->indirect.idx_lowest_entry > idx)
968 h->indirect.idx_lowest_entry = idx;
969
970 bucket_set_dib(h, idx, distance);
971 bucket_move_entry(h, swap, IDX_PUT, idx);
972 if (raw_dib == DIB_RAW_REHASH) {
973 bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
974 return true;
975 }
976
977 return false;
978 }
979
980 dib = bucket_calculate_dib(h, idx, raw_dib);
981
982 if (dib < distance) {
983 /* Found a wealthier entry. Go Robin Hood! */
984 bucket_set_dib(h, idx, distance);
985
986 /* swap the entries */
987 bucket_move_entry(h, swap, idx, IDX_TMP);
988 bucket_move_entry(h, swap, IDX_PUT, idx);
989 bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
990
991 distance = dib;
992 }
993
994 idx = next_idx(h, idx);
995 }
996 }
997
998 /*
999 * Puts an entry into a hashmap, boldly - no check whether key already exists.
1000 * The caller must place the entry (only its key and value, not link indexes)
1001 * in swap slot IDX_PUT.
1002 * Caller must ensure: the key does not exist yet in the hashmap.
1003 * that resize is not needed if !may_resize.
1004 * Returns: 1 if entry was put successfully.
1005 * -ENOMEM if may_resize==true and resize failed with -ENOMEM.
1006 * Cannot return -ENOMEM if !may_resize.
1007 */
1008 static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx,
1009 struct swap_entries *swap, bool may_resize) {
1010 struct ordered_hashmap_entry *new_entry;
1011 int r;
1012
1013 assert(idx < n_buckets(h));
1014
1015 new_entry = bucket_at_swap(swap, IDX_PUT);
1016
1017 if (may_resize) {
1018 r = resize_buckets(h, 1);
1019 if (r < 0)
1020 return r;
1021 if (r > 0)
1022 idx = bucket_hash(h, new_entry->p.b.key);
1023 }
1024 assert(n_entries(h) < n_buckets(h));
1025
1026 if (h->type == HASHMAP_TYPE_ORDERED) {
1027 OrderedHashmap *lh = (OrderedHashmap*) h;
1028
1029 new_entry->iterate_next = IDX_NIL;
1030 new_entry->iterate_previous = lh->iterate_list_tail;
1031
1032 if (lh->iterate_list_tail != IDX_NIL) {
1033 struct ordered_hashmap_entry *old_tail;
1034
1035 old_tail = ordered_bucket_at(lh, lh->iterate_list_tail);
1036 assert(old_tail->iterate_next == IDX_NIL);
1037 old_tail->iterate_next = IDX_PUT;
1038 }
1039
1040 lh->iterate_list_tail = IDX_PUT;
1041 if (lh->iterate_list_head == IDX_NIL)
1042 lh->iterate_list_head = IDX_PUT;
1043 }
1044
1045 assert_se(hashmap_put_robin_hood(h, idx, swap) == false);
1046
1047 n_entries_inc(h);
1048 #if ENABLE_DEBUG_HASHMAP
1049 h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h));
1050 #endif
1051
1052 base_set_dirty(h);
1053
1054 return 1;
1055 }
1056 #define hashmap_put_boldly(h, idx, swap, may_resize) \
1057 hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
1058
1059 /*
1060 * Returns 0 if resize is not needed.
1061 * 1 if successfully resized.
1062 * -ENOMEM on allocation failure.
1063 */
1064 static int resize_buckets(HashmapBase *h, unsigned entries_add) {
1065 struct swap_entries swap;
1066 void *new_storage;
1067 dib_raw_t *old_dibs, *new_dibs;
1068 const struct hashmap_type_info *hi;
1069 unsigned idx, optimal_idx;
1070 unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries;
1071 uint8_t new_shift;
1072 bool rehash_next;
1073
1074 assert(h);
1075
1076 hi = &hashmap_type_info[h->type];
1077 new_n_entries = n_entries(h) + entries_add;
1078
1079 /* overflow? */
1080 if (_unlikely_(new_n_entries < entries_add))
1081 return -ENOMEM;
1082
1083 /* For direct storage we allow 100% load, because it's tiny. */
1084 if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets)
1085 return 0;
1086
1087 /*
1088 * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
1089 * From it follows: m = n + n/(INV_KEEP_FREE - 1)
1090 */
1091 new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1);
1092 /* overflow? */
1093 if (_unlikely_(new_n_buckets < new_n_entries))
1094 return -ENOMEM;
1095
1096 if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t))))
1097 return -ENOMEM;
1098
1099 old_n_buckets = n_buckets(h);
1100
1101 if (_likely_(new_n_buckets <= old_n_buckets))
1102 return 0;
1103
1104 new_shift = log2u_round_up(MAX(
1105 new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)),
1106 2 * sizeof(struct direct_storage)));
1107
1108 /* Realloc storage (buckets and DIB array). */
1109 new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL,
1110 1U << new_shift);
1111 if (!new_storage)
1112 return -ENOMEM;
1113
1114 /* Must upgrade direct to indirect storage. */
1115 if (!h->has_indirect) {
1116 memcpy(new_storage, h->direct.storage,
1117 old_n_buckets * (hi->entry_size + sizeof(dib_raw_t)));
1118 h->indirect.n_entries = h->n_direct_entries;
1119 h->indirect.idx_lowest_entry = 0;
1120 h->n_direct_entries = 0;
1121 }
1122
1123 /* Get a new hash key. If we've just upgraded to indirect storage,
1124 * allow reusing a previously generated key. It's still a different key
1125 * from the shared one that we used for direct storage. */
1126 get_hash_key(h->indirect.hash_key, !h->has_indirect);
1127
1128 h->has_indirect = true;
1129 h->indirect.storage = new_storage;
1130 h->indirect.n_buckets = (1U << new_shift) /
1131 (hi->entry_size + sizeof(dib_raw_t));
1132
1133 old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets);
1134 new_dibs = dib_raw_ptr(h);
1135
1136 /*
1137 * Move the DIB array to the new place, replacing valid DIB values with
1138 * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
1139 * Note: Overlap is not possible, because we have at least doubled the
1140 * number of buckets and dib_raw_t is smaller than any entry type.
1141 */
1142 for (idx = 0; idx < old_n_buckets; idx++) {
1143 assert(old_dibs[idx] != DIB_RAW_REHASH);
1144 new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE
1145 : DIB_RAW_REHASH;
1146 }
1147
1148 /* Zero the area of newly added entries (including the old DIB area) */
1149 memzero(bucket_at(h, old_n_buckets),
1150 (n_buckets(h) - old_n_buckets) * hi->entry_size);
1151
1152 /* The upper half of the new DIB array needs initialization */
1153 memset(&new_dibs[old_n_buckets], DIB_RAW_INIT,
1154 (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t));
1155
1156 /* Rehash entries that need it */
1157 n_rehashed = 0;
1158 for (idx = 0; idx < old_n_buckets; idx++) {
1159 if (new_dibs[idx] != DIB_RAW_REHASH)
1160 continue;
1161
1162 optimal_idx = bucket_hash(h, bucket_at(h, idx)->key);
1163
1164 /*
1165 * Not much to do if by luck the entry hashes to its current
1166 * location. Just set its DIB.
1167 */
1168 if (optimal_idx == idx) {
1169 new_dibs[idx] = 0;
1170 n_rehashed++;
1171 continue;
1172 }
1173
1174 new_dibs[idx] = DIB_RAW_FREE;
1175 bucket_move_entry(h, &swap, idx, IDX_PUT);
1176 /* bucket_move_entry does not clear the source */
1177 memzero(bucket_at(h, idx), hi->entry_size);
1178
1179 do {
1180 /*
1181 * Find the new bucket for the current entry. This may make
1182 * another entry homeless and load it into IDX_PUT.
1183 */
1184 rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap);
1185 n_rehashed++;
1186
1187 /* Did the current entry displace another one? */
1188 if (rehash_next)
1189 optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key);
1190 } while (rehash_next);
1191 }
1192
1193 assert(n_rehashed == n_entries(h));
1194
1195 return 1;
1196 }
1197
1198 /*
1199 * Finds an entry with a matching key
1200 * Returns: index of the found entry, or IDX_NIL if not found.
1201 */
1202 static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) {
1203 struct hashmap_base_entry *e;
1204 unsigned dib, distance;
1205 dib_raw_t *dibs = dib_raw_ptr(h);
1206
1207 assert(idx < n_buckets(h));
1208
1209 for (distance = 0; ; distance++) {
1210 if (dibs[idx] == DIB_RAW_FREE)
1211 return IDX_NIL;
1212
1213 dib = bucket_calculate_dib(h, idx, dibs[idx]);
1214
1215 if (dib < distance)
1216 return IDX_NIL;
1217 if (dib == distance) {
1218 e = bucket_at(h, idx);
1219 if (h->hash_ops->compare(e->key, key) == 0)
1220 return idx;
1221 }
1222
1223 idx = next_idx(h, idx);
1224 }
1225 }
1226 #define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
1227
1228 int hashmap_put(Hashmap *h, const void *key, void *value) {
1229 struct swap_entries swap;
1230 struct plain_hashmap_entry *e;
1231 unsigned hash, idx;
1232
1233 assert(h);
1234
1235 hash = bucket_hash(h, key);
1236 idx = bucket_scan(h, hash, key);
1237 if (idx != IDX_NIL) {
1238 e = plain_bucket_at(h, idx);
1239 if (e->value == value)
1240 return 0;
1241 return -EEXIST;
1242 }
1243
1244 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1245 e->b.key = key;
1246 e->value = value;
1247 return hashmap_put_boldly(h, hash, &swap, true);
1248 }
1249
1250 int set_put(Set *s, const void *key) {
1251 struct swap_entries swap;
1252 struct hashmap_base_entry *e;
1253 unsigned hash, idx;
1254
1255 assert(s);
1256
1257 hash = bucket_hash(s, key);
1258 idx = bucket_scan(s, hash, key);
1259 if (idx != IDX_NIL)
1260 return 0;
1261
1262 e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1263 e->key = key;
1264 return hashmap_put_boldly(s, hash, &swap, true);
1265 }
1266
1267 int _set_ensure_put(Set **s, const struct hash_ops *hash_ops, const void *key HASHMAP_DEBUG_PARAMS) {
1268 int r;
1269
1270 r = _set_ensure_allocated(s, hash_ops HASHMAP_DEBUG_PASS_ARGS);
1271 if (r < 0)
1272 return r;
1273
1274 return set_put(*s, key);
1275 }
1276
1277 int _set_ensure_consume(Set **s, const struct hash_ops *hash_ops, void *key HASHMAP_DEBUG_PARAMS) {
1278 int r;
1279
1280 r = _set_ensure_put(s, hash_ops, key HASHMAP_DEBUG_PASS_ARGS);
1281 if (r <= 0) {
1282 if (hash_ops && hash_ops->free_key)
1283 hash_ops->free_key(key);
1284 else
1285 free(key);
1286 }
1287
1288 return r;
1289 }
1290
1291 int hashmap_replace(Hashmap *h, const void *key, void *value) {
1292 struct swap_entries swap;
1293 struct plain_hashmap_entry *e;
1294 unsigned hash, idx;
1295
1296 assert(h);
1297
1298 hash = bucket_hash(h, key);
1299 idx = bucket_scan(h, hash, key);
1300 if (idx != IDX_NIL) {
1301 e = plain_bucket_at(h, idx);
1302 #if ENABLE_DEBUG_HASHMAP
1303 /* Although the key is equal, the key pointer may have changed,
1304 * and this would break our assumption for iterating. So count
1305 * this operation as incompatible with iteration. */
1306 if (e->b.key != key) {
1307 h->b.debug.put_count++;
1308 h->b.debug.rem_count++;
1309 h->b.debug.last_rem_idx = idx;
1310 }
1311 #endif
1312 e->b.key = key;
1313 e->value = value;
1314 hashmap_set_dirty(h);
1315
1316 return 0;
1317 }
1318
1319 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1320 e->b.key = key;
1321 e->value = value;
1322 return hashmap_put_boldly(h, hash, &swap, true);
1323 }
1324
1325 int hashmap_update(Hashmap *h, const void *key, void *value) {
1326 struct plain_hashmap_entry *e;
1327 unsigned hash, idx;
1328
1329 assert(h);
1330
1331 hash = bucket_hash(h, key);
1332 idx = bucket_scan(h, hash, key);
1333 if (idx == IDX_NIL)
1334 return -ENOENT;
1335
1336 e = plain_bucket_at(h, idx);
1337 e->value = value;
1338 hashmap_set_dirty(h);
1339
1340 return 0;
1341 }
1342
1343 void* _hashmap_get(HashmapBase *h, const void *key) {
1344 struct hashmap_base_entry *e;
1345 unsigned hash, idx;
1346
1347 if (!h)
1348 return NULL;
1349
1350 hash = bucket_hash(h, key);
1351 idx = bucket_scan(h, hash, key);
1352 if (idx == IDX_NIL)
1353 return NULL;
1354
1355 e = bucket_at(h, idx);
1356 return entry_value(h, e);
1357 }
1358
1359 void* hashmap_get2(Hashmap *h, const void *key, void **key2) {
1360 struct plain_hashmap_entry *e;
1361 unsigned hash, idx;
1362
1363 if (!h)
1364 return NULL;
1365
1366 hash = bucket_hash(h, key);
1367 idx = bucket_scan(h, hash, key);
1368 if (idx == IDX_NIL)
1369 return NULL;
1370
1371 e = plain_bucket_at(h, idx);
1372 if (key2)
1373 *key2 = (void*) e->b.key;
1374
1375 return e->value;
1376 }
1377
1378 bool _hashmap_contains(HashmapBase *h, const void *key) {
1379 unsigned hash;
1380
1381 if (!h)
1382 return false;
1383
1384 hash = bucket_hash(h, key);
1385 return bucket_scan(h, hash, key) != IDX_NIL;
1386 }
1387
1388 void* _hashmap_remove(HashmapBase *h, const void *key) {
1389 struct hashmap_base_entry *e;
1390 unsigned hash, idx;
1391 void *data;
1392
1393 if (!h)
1394 return NULL;
1395
1396 hash = bucket_hash(h, key);
1397 idx = bucket_scan(h, hash, key);
1398 if (idx == IDX_NIL)
1399 return NULL;
1400
1401 e = bucket_at(h, idx);
1402 data = entry_value(h, e);
1403 remove_entry(h, idx);
1404
1405 return data;
1406 }
1407
1408 void* hashmap_remove2(Hashmap *h, const void *key, void **rkey) {
1409 struct plain_hashmap_entry *e;
1410 unsigned hash, idx;
1411 void *data;
1412
1413 if (!h) {
1414 if (rkey)
1415 *rkey = NULL;
1416 return NULL;
1417 }
1418
1419 hash = bucket_hash(h, key);
1420 idx = bucket_scan(h, hash, key);
1421 if (idx == IDX_NIL) {
1422 if (rkey)
1423 *rkey = NULL;
1424 return NULL;
1425 }
1426
1427 e = plain_bucket_at(h, idx);
1428 data = e->value;
1429 if (rkey)
1430 *rkey = (void*) e->b.key;
1431
1432 remove_entry(h, idx);
1433
1434 return data;
1435 }
1436
1437 int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1438 struct swap_entries swap;
1439 struct plain_hashmap_entry *e;
1440 unsigned old_hash, new_hash, idx;
1441
1442 if (!h)
1443 return -ENOENT;
1444
1445 old_hash = bucket_hash(h, old_key);
1446 idx = bucket_scan(h, old_hash, old_key);
1447 if (idx == IDX_NIL)
1448 return -ENOENT;
1449
1450 new_hash = bucket_hash(h, new_key);
1451 if (bucket_scan(h, new_hash, new_key) != IDX_NIL)
1452 return -EEXIST;
1453
1454 remove_entry(h, idx);
1455
1456 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1457 e->b.key = new_key;
1458 e->value = value;
1459 assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1460
1461 return 0;
1462 }
1463
1464 int set_remove_and_put(Set *s, const void *old_key, const void *new_key) {
1465 struct swap_entries swap;
1466 struct hashmap_base_entry *e;
1467 unsigned old_hash, new_hash, idx;
1468
1469 if (!s)
1470 return -ENOENT;
1471
1472 old_hash = bucket_hash(s, old_key);
1473 idx = bucket_scan(s, old_hash, old_key);
1474 if (idx == IDX_NIL)
1475 return -ENOENT;
1476
1477 new_hash = bucket_hash(s, new_key);
1478 if (bucket_scan(s, new_hash, new_key) != IDX_NIL)
1479 return -EEXIST;
1480
1481 remove_entry(s, idx);
1482
1483 e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1484 e->key = new_key;
1485 assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1);
1486
1487 return 0;
1488 }
1489
1490 int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1491 struct swap_entries swap;
1492 struct plain_hashmap_entry *e;
1493 unsigned old_hash, new_hash, idx_old, idx_new;
1494
1495 if (!h)
1496 return -ENOENT;
1497
1498 old_hash = bucket_hash(h, old_key);
1499 idx_old = bucket_scan(h, old_hash, old_key);
1500 if (idx_old == IDX_NIL)
1501 return -ENOENT;
1502
1503 old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key;
1504
1505 new_hash = bucket_hash(h, new_key);
1506 idx_new = bucket_scan(h, new_hash, new_key);
1507 if (idx_new != IDX_NIL)
1508 if (idx_old != idx_new) {
1509 remove_entry(h, idx_new);
1510 /* Compensate for a possible backward shift. */
1511 if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key)
1512 idx_old = prev_idx(HASHMAP_BASE(h), idx_old);
1513 assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key);
1514 }
1515
1516 remove_entry(h, idx_old);
1517
1518 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1519 e->b.key = new_key;
1520 e->value = value;
1521 assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1522
1523 return 0;
1524 }
1525
1526 void* _hashmap_remove_value(HashmapBase *h, const void *key, void *value) {
1527 struct hashmap_base_entry *e;
1528 unsigned hash, idx;
1529
1530 if (!h)
1531 return NULL;
1532
1533 hash = bucket_hash(h, key);
1534 idx = bucket_scan(h, hash, key);
1535 if (idx == IDX_NIL)
1536 return NULL;
1537
1538 e = bucket_at(h, idx);
1539 if (entry_value(h, e) != value)
1540 return NULL;
1541
1542 remove_entry(h, idx);
1543
1544 return value;
1545 }
1546
1547 static unsigned find_first_entry(HashmapBase *h) {
1548 Iterator i = ITERATOR_FIRST;
1549
1550 if (!h || !n_entries(h))
1551 return IDX_NIL;
1552
1553 return hashmap_iterate_entry(h, &i);
1554 }
1555
1556 void* _hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) {
1557 struct hashmap_base_entry *e;
1558 void *key, *data;
1559 unsigned idx;
1560
1561 idx = find_first_entry(h);
1562 if (idx == IDX_NIL) {
1563 if (ret_key)
1564 *ret_key = NULL;
1565 return NULL;
1566 }
1567
1568 e = bucket_at(h, idx);
1569 key = (void*) e->key;
1570 data = entry_value(h, e);
1571
1572 if (remove)
1573 remove_entry(h, idx);
1574
1575 if (ret_key)
1576 *ret_key = key;
1577
1578 return data;
1579 }
1580
1581 unsigned _hashmap_size(HashmapBase *h) {
1582 if (!h)
1583 return 0;
1584
1585 return n_entries(h);
1586 }
1587
1588 unsigned _hashmap_buckets(HashmapBase *h) {
1589 if (!h)
1590 return 0;
1591
1592 return n_buckets(h);
1593 }
1594
1595 int _hashmap_merge(Hashmap *h, Hashmap *other) {
1596 Iterator i;
1597 unsigned idx;
1598
1599 assert(h);
1600
1601 HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1602 struct plain_hashmap_entry *pe = plain_bucket_at(other, idx);
1603 int r;
1604
1605 r = hashmap_put(h, pe->b.key, pe->value);
1606 if (r < 0 && r != -EEXIST)
1607 return r;
1608 }
1609
1610 return 0;
1611 }
1612
1613 int set_merge(Set *s, Set *other) {
1614 Iterator i;
1615 unsigned idx;
1616
1617 assert(s);
1618
1619 HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1620 struct set_entry *se = set_bucket_at(other, idx);
1621 int r;
1622
1623 r = set_put(s, se->b.key);
1624 if (r < 0)
1625 return r;
1626 }
1627
1628 return 0;
1629 }
1630
1631 int _hashmap_reserve(HashmapBase *h, unsigned entries_add) {
1632 int r;
1633
1634 assert(h);
1635
1636 r = resize_buckets(h, entries_add);
1637 if (r < 0)
1638 return r;
1639
1640 return 0;
1641 }
1642
1643 /*
1644 * The same as hashmap_merge(), but every new item from other is moved to h.
1645 * Keys already in h are skipped and stay in other.
1646 * Returns: 0 on success.
1647 * -ENOMEM on alloc failure, in which case no move has been done.
1648 */
1649 int _hashmap_move(HashmapBase *h, HashmapBase *other) {
1650 struct swap_entries swap;
1651 struct hashmap_base_entry *e, *n;
1652 Iterator i;
1653 unsigned idx;
1654 int r;
1655
1656 assert(h);
1657
1658 if (!other)
1659 return 0;
1660
1661 assert(other->type == h->type);
1662
1663 /*
1664 * This reserves buckets for the worst case, where none of other's
1665 * entries are yet present in h. This is preferable to risking
1666 * an allocation failure in the middle of the moving and having to
1667 * rollback or return a partial result.
1668 */
1669 r = resize_buckets(h, n_entries(other));
1670 if (r < 0)
1671 return r;
1672
1673 HASHMAP_FOREACH_IDX(idx, other, i) {
1674 unsigned h_hash;
1675
1676 e = bucket_at(other, idx);
1677 h_hash = bucket_hash(h, e->key);
1678 if (bucket_scan(h, h_hash, e->key) != IDX_NIL)
1679 continue;
1680
1681 n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1682 n->key = e->key;
1683 if (h->type != HASHMAP_TYPE_SET)
1684 ((struct plain_hashmap_entry*) n)->value =
1685 ((struct plain_hashmap_entry*) e)->value;
1686 assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1);
1687
1688 remove_entry(other, idx);
1689 }
1690
1691 return 0;
1692 }
1693
1694 int _hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) {
1695 struct swap_entries swap;
1696 unsigned h_hash, other_hash, idx;
1697 struct hashmap_base_entry *e, *n;
1698 int r;
1699
1700 assert(h);
1701
1702 h_hash = bucket_hash(h, key);
1703 if (bucket_scan(h, h_hash, key) != IDX_NIL)
1704 return -EEXIST;
1705
1706 if (!other)
1707 return -ENOENT;
1708
1709 assert(other->type == h->type);
1710
1711 other_hash = bucket_hash(other, key);
1712 idx = bucket_scan(other, other_hash, key);
1713 if (idx == IDX_NIL)
1714 return -ENOENT;
1715
1716 e = bucket_at(other, idx);
1717
1718 n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1719 n->key = e->key;
1720 if (h->type != HASHMAP_TYPE_SET)
1721 ((struct plain_hashmap_entry*) n)->value =
1722 ((struct plain_hashmap_entry*) e)->value;
1723 r = hashmap_put_boldly(h, h_hash, &swap, true);
1724 if (r < 0)
1725 return r;
1726
1727 remove_entry(other, idx);
1728 return 0;
1729 }
1730
1731 HashmapBase* _hashmap_copy(HashmapBase *h HASHMAP_DEBUG_PARAMS) {
1732 HashmapBase *copy;
1733 int r;
1734
1735 assert(h);
1736
1737 copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_PASS_ARGS);
1738 if (!copy)
1739 return NULL;
1740
1741 switch (h->type) {
1742 case HASHMAP_TYPE_PLAIN:
1743 case HASHMAP_TYPE_ORDERED:
1744 r = hashmap_merge((Hashmap*)copy, (Hashmap*)h);
1745 break;
1746 case HASHMAP_TYPE_SET:
1747 r = set_merge((Set*)copy, (Set*)h);
1748 break;
1749 default:
1750 assert_not_reached();
1751 }
1752
1753 if (r < 0)
1754 return _hashmap_free(copy, false, false);
1755
1756 return copy;
1757 }
1758
1759 char** _hashmap_get_strv(HashmapBase *h) {
1760 char **sv;
1761 Iterator i;
1762 unsigned idx, n;
1763
1764 if (!h)
1765 return new0(char*, 1);
1766
1767 sv = new(char*, n_entries(h)+1);
1768 if (!sv)
1769 return NULL;
1770
1771 n = 0;
1772 HASHMAP_FOREACH_IDX(idx, h, i)
1773 sv[n++] = entry_value(h, bucket_at(h, idx));
1774 sv[n] = NULL;
1775
1776 return sv;
1777 }
1778
1779 void* ordered_hashmap_next(OrderedHashmap *h, const void *key) {
1780 struct ordered_hashmap_entry *e;
1781 unsigned hash, idx;
1782
1783 if (!h)
1784 return NULL;
1785
1786 hash = bucket_hash(h, key);
1787 idx = bucket_scan(h, hash, key);
1788 if (idx == IDX_NIL)
1789 return NULL;
1790
1791 e = ordered_bucket_at(h, idx);
1792 if (e->iterate_next == IDX_NIL)
1793 return NULL;
1794 return ordered_bucket_at(h, e->iterate_next)->p.value;
1795 }
1796
1797 int set_consume(Set *s, void *value) {
1798 int r;
1799
1800 assert(s);
1801 assert(value);
1802
1803 r = set_put(s, value);
1804 if (r <= 0)
1805 free(value);
1806
1807 return r;
1808 }
1809
1810 int _hashmap_put_strdup_full(Hashmap **h, const struct hash_ops *hash_ops, const char *k, const char *v HASHMAP_DEBUG_PARAMS) {
1811 int r;
1812
1813 r = _hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS);
1814 if (r < 0)
1815 return r;
1816
1817 _cleanup_free_ char *kdup = NULL, *vdup = NULL;
1818
1819 kdup = strdup(k);
1820 if (!kdup)
1821 return -ENOMEM;
1822
1823 if (v) {
1824 vdup = strdup(v);
1825 if (!vdup)
1826 return -ENOMEM;
1827 }
1828
1829 r = hashmap_put(*h, kdup, vdup);
1830 if (r < 0) {
1831 if (r == -EEXIST && streq_ptr(v, hashmap_get(*h, kdup)))
1832 return 0;
1833 return r;
1834 }
1835
1836 /* 0 with non-null vdup would mean vdup is already in the hashmap, which cannot be */
1837 assert(vdup == NULL || r > 0);
1838 if (r > 0)
1839 kdup = vdup = NULL;
1840
1841 return r;
1842 }
1843
1844 int _set_put_strdup_full(Set **s, const struct hash_ops *hash_ops, const char *p HASHMAP_DEBUG_PARAMS) {
1845 char *c;
1846 int r;
1847
1848 assert(s);
1849 assert(p);
1850
1851 r = _set_ensure_allocated(s, hash_ops HASHMAP_DEBUG_PASS_ARGS);
1852 if (r < 0)
1853 return r;
1854
1855 if (set_contains(*s, (char*) p))
1856 return 0;
1857
1858 c = strdup(p);
1859 if (!c)
1860 return -ENOMEM;
1861
1862 return set_consume(*s, c);
1863 }
1864
1865 int _set_put_strdupv_full(Set **s, const struct hash_ops *hash_ops, char **l HASHMAP_DEBUG_PARAMS) {
1866 int n = 0, r;
1867
1868 assert(s);
1869
1870 STRV_FOREACH(i, l) {
1871 r = _set_put_strdup_full(s, hash_ops, *i HASHMAP_DEBUG_PASS_ARGS);
1872 if (r < 0)
1873 return r;
1874
1875 n += r;
1876 }
1877
1878 return n;
1879 }
1880
1881 int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) {
1882 const char *p = v;
1883 int r;
1884
1885 assert(s);
1886 assert(v);
1887
1888 for (;;) {
1889 char *word;
1890
1891 r = extract_first_word(&p, &word, separators, flags);
1892 if (r <= 0)
1893 return r;
1894
1895 r = set_consume(s, word);
1896 if (r < 0)
1897 return r;
1898 }
1899 }
1900
1901 /* expand the cachemem if needed, return true if newly (re)activated. */
1902 static int cachemem_maintain(CacheMem *mem, size_t size) {
1903 assert(mem);
1904
1905 if (!GREEDY_REALLOC(mem->ptr, size)) {
1906 if (size > 0)
1907 return -ENOMEM;
1908 }
1909
1910 if (!mem->active) {
1911 mem->active = true;
1912 return true;
1913 }
1914
1915 return false;
1916 }
1917
1918 int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) {
1919 bool sync_keys = false, sync_values = false;
1920 size_t size;
1921 int r;
1922
1923 assert(cache);
1924 assert(cache->hashmap);
1925
1926 size = n_entries(cache->hashmap);
1927
1928 if (res_keys) {
1929 r = cachemem_maintain(&cache->keys, size);
1930 if (r < 0)
1931 return r;
1932
1933 sync_keys = r;
1934 } else
1935 cache->keys.active = false;
1936
1937 if (res_values) {
1938 r = cachemem_maintain(&cache->values, size);
1939 if (r < 0)
1940 return r;
1941
1942 sync_values = r;
1943 } else
1944 cache->values.active = false;
1945
1946 if (cache->hashmap->dirty) {
1947 if (cache->keys.active)
1948 sync_keys = true;
1949 if (cache->values.active)
1950 sync_values = true;
1951
1952 cache->hashmap->dirty = false;
1953 }
1954
1955 if (sync_keys || sync_values) {
1956 unsigned i, idx;
1957 Iterator iter;
1958
1959 i = 0;
1960 HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) {
1961 struct hashmap_base_entry *e;
1962
1963 e = bucket_at(cache->hashmap, idx);
1964
1965 if (sync_keys)
1966 cache->keys.ptr[i] = e->key;
1967 if (sync_values)
1968 cache->values.ptr[i] = entry_value(cache->hashmap, e);
1969 i++;
1970 }
1971 }
1972
1973 if (res_keys)
1974 *res_keys = cache->keys.ptr;
1975 if (res_values)
1976 *res_values = cache->values.ptr;
1977 if (res_n_entries)
1978 *res_n_entries = size;
1979
1980 return 0;
1981 }
1982
1983 IteratedCache* iterated_cache_free(IteratedCache *cache) {
1984 if (cache) {
1985 free(cache->keys.ptr);
1986 free(cache->values.ptr);
1987 }
1988
1989 return mfree(cache);
1990 }
1991
1992 int set_strjoin(Set *s, const char *separator, bool wrap_with_separator, char **ret) {
1993 _cleanup_free_ char *str = NULL;
1994 size_t separator_len, len = 0;
1995 const char *value;
1996 bool first;
1997
1998 assert(ret);
1999
2000 if (set_isempty(s)) {
2001 *ret = NULL;
2002 return 0;
2003 }
2004
2005 separator_len = strlen_ptr(separator);
2006
2007 if (separator_len == 0)
2008 wrap_with_separator = false;
2009
2010 first = !wrap_with_separator;
2011
2012 SET_FOREACH(value, s) {
2013 size_t l = strlen_ptr(value);
2014
2015 if (l == 0)
2016 continue;
2017
2018 if (!GREEDY_REALLOC(str, len + l + (first ? 0 : separator_len) + (wrap_with_separator ? separator_len : 0) + 1))
2019 return -ENOMEM;
2020
2021 if (separator_len > 0 && !first) {
2022 memcpy(str + len, separator, separator_len);
2023 len += separator_len;
2024 }
2025
2026 memcpy(str + len, value, l);
2027 len += l;
2028 first = false;
2029 }
2030
2031 if (wrap_with_separator) {
2032 memcpy(str + len, separator, separator_len);
2033 len += separator_len;
2034 }
2035
2036 str[len] = '\0';
2037
2038 *ret = TAKE_PTR(str);
2039 return 0;
2040 }
2041
2042 bool set_equal(Set *a, Set *b) {
2043 void *p;
2044
2045 /* Checks whether each entry of 'a' is also in 'b' and vice versa, i.e. the two sets contain the same
2046 * entries */
2047
2048 if (a == b)
2049 return true;
2050
2051 if (set_isempty(a) && set_isempty(b))
2052 return true;
2053
2054 if (set_size(a) != set_size(b)) /* Cheap check that hopefully catches a lot of inequality cases
2055 * already */
2056 return false;
2057
2058 SET_FOREACH(p, a)
2059 if (!set_contains(b, p))
2060 return false;
2061
2062 /* If we have the same hashops, then we don't need to check things backwards given we compared the
2063 * size and that all of a is in b. */
2064 if (a->b.hash_ops == b->b.hash_ops)
2065 return true;
2066
2067 SET_FOREACH(p, b)
2068 if (!set_contains(a, p))
2069 return false;
2070
2071 return true;
2072 }