]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/hashmap.c
hashmap: don't allow hashmap_type_info table to be optimized away
[thirdparty/systemd.git] / src / basic / hashmap.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <stdint.h>
5 #include <stdlib.h>
6
7 #include "alloc-util.h"
8 #include "fileio.h"
9 #include "hashmap.h"
10 #include "macro.h"
11 #include "memory-util.h"
12 #include "mempool.h"
13 #include "missing_syscall.h"
14 #include "process-util.h"
15 #include "random-util.h"
16 #include "set.h"
17 #include "siphash24.h"
18 #include "string-util.h"
19 #include "strv.h"
20
21 #if ENABLE_DEBUG_HASHMAP
22 #include <pthread.h>
23 #include "list.h"
24 #endif
25
26 /*
27 * Implementation of hashmaps.
28 * Addressing: open
29 * - uses less RAM compared to closed addressing (chaining), because
30 * our entries are small (especially in Sets, which tend to contain
31 * the majority of entries in systemd).
32 * Collision resolution: Robin Hood
33 * - tends to equalize displacement of entries from their optimal buckets.
34 * Probe sequence: linear
35 * - though theoretically worse than random probing/uniform hashing/double
36 * hashing, it is good for cache locality.
37 *
38 * References:
39 * Celis, P. 1986. Robin Hood Hashing.
40 * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
41 * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
42 * - The results are derived for random probing. Suggests deletion with
43 * tombstones and two mean-centered search methods. None of that works
44 * well for linear probing.
45 *
46 * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
47 * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
48 * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
49 * http://www.math.uu.se/~svante/papers/sj157.pdf
50 * - Applies to Robin Hood with linear probing. Contains remarks on
51 * the unsuitability of mean-centered search with linear probing.
52 *
53 * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
54 * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
55 * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
56 * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
57 * in a successful search), and Janson writes about displacement. C = d + 1.
58 *
59 * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
60 * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
61 * - Explanation of backward shift deletion with pictures.
62 *
63 * Khuong, P. 2013. The Other Robin Hood Hashing.
64 * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
65 * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
66 */
67
68 /*
69 * XXX Ideas for improvement:
70 * For unordered hashmaps, randomize iteration order, similarly to Perl:
71 * http://blog.booking.com/hardening-perls-hash-function.html
72 */
73
74 /* INV_KEEP_FREE = 1 / (1 - max_load_factor)
75 * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
76 #define INV_KEEP_FREE 5U
77
78 /* Fields common to entries of all hashmap/set types */
79 struct hashmap_base_entry {
80 const void *key;
81 };
82
83 /* Entry types for specific hashmap/set types
84 * hashmap_base_entry must be at the beginning of each entry struct. */
85
86 struct plain_hashmap_entry {
87 struct hashmap_base_entry b;
88 void *value;
89 };
90
91 struct ordered_hashmap_entry {
92 struct plain_hashmap_entry p;
93 unsigned iterate_next, iterate_previous;
94 };
95
96 struct set_entry {
97 struct hashmap_base_entry b;
98 };
99
100 /* In several functions it is advantageous to have the hash table extended
101 * virtually by a couple of additional buckets. We reserve special index values
102 * for these "swap" buckets. */
103 #define _IDX_SWAP_BEGIN (UINT_MAX - 3)
104 #define IDX_PUT (_IDX_SWAP_BEGIN + 0)
105 #define IDX_TMP (_IDX_SWAP_BEGIN + 1)
106 #define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2)
107
108 #define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */
109 #define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */
110
111 assert_cc(IDX_FIRST == _IDX_SWAP_END);
112 assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST);
113
114 /* Storage space for the "swap" buckets.
115 * All entry types can fit into a ordered_hashmap_entry. */
116 struct swap_entries {
117 struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN];
118 };
119
120 /* Distance from Initial Bucket */
121 typedef uint8_t dib_raw_t;
122 #define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */
123 #define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */
124 #define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */
125 #define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
126
127 #define DIB_FREE UINT_MAX
128
129 #if ENABLE_DEBUG_HASHMAP
130 struct hashmap_debug_info {
131 LIST_FIELDS(struct hashmap_debug_info, debug_list);
132 unsigned max_entries; /* high watermark of n_entries */
133
134 /* who allocated this hashmap */
135 int line;
136 const char *file;
137 const char *func;
138
139 /* fields to detect modification while iterating */
140 unsigned put_count; /* counts puts into the hashmap */
141 unsigned rem_count; /* counts removals from hashmap */
142 unsigned last_rem_idx; /* remembers last removal index */
143 };
144
145 /* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
146 static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list);
147 static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER;
148
149 #define HASHMAP_DEBUG_FIELDS struct hashmap_debug_info debug;
150
151 #else /* !ENABLE_DEBUG_HASHMAP */
152 #define HASHMAP_DEBUG_FIELDS
153 #endif /* ENABLE_DEBUG_HASHMAP */
154
155 enum HashmapType {
156 HASHMAP_TYPE_PLAIN,
157 HASHMAP_TYPE_ORDERED,
158 HASHMAP_TYPE_SET,
159 _HASHMAP_TYPE_MAX
160 };
161
162 struct _packed_ indirect_storage {
163 void *storage; /* where buckets and DIBs are stored */
164 uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */
165
166 unsigned n_entries; /* number of stored entries */
167 unsigned n_buckets; /* number of buckets */
168
169 unsigned idx_lowest_entry; /* Index below which all buckets are free.
170 Makes "while(hashmap_steal_first())" loops
171 O(n) instead of O(n^2) for unordered hashmaps. */
172 uint8_t _pad[3]; /* padding for the whole HashmapBase */
173 /* The bitfields in HashmapBase complete the alignment of the whole thing. */
174 };
175
176 struct direct_storage {
177 /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit.
178 * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit,
179 * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */
180 uint8_t storage[sizeof(struct indirect_storage)];
181 };
182
183 #define DIRECT_BUCKETS(entry_t) \
184 (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
185
186 /* We should be able to store at least one entry directly. */
187 assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1);
188
189 /* We have 3 bits for n_direct_entries. */
190 assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3));
191
192 /* Hashmaps with directly stored entries all use this shared hash key.
193 * It's no big deal if the key is guessed, because there can be only
194 * a handful of directly stored entries in a hashmap. When a hashmap
195 * outgrows direct storage, it gets its own key for indirect storage. */
196 static uint8_t shared_hash_key[HASH_KEY_SIZE];
197 static bool shared_hash_key_initialized;
198
199 /* Fields that all hashmap/set types must have */
200 struct HashmapBase {
201 const struct hash_ops *hash_ops; /* hash and compare ops to use */
202
203 union _packed_ {
204 struct indirect_storage indirect; /* if has_indirect */
205 struct direct_storage direct; /* if !has_indirect */
206 };
207
208 enum HashmapType type:2; /* HASHMAP_TYPE_* */
209 bool has_indirect:1; /* whether indirect storage is used */
210 unsigned n_direct_entries:3; /* Number of entries in direct storage.
211 * Only valid if !has_indirect. */
212 bool from_pool:1; /* whether was allocated from mempool */
213 bool dirty:1; /* whether dirtied since last iterated_cache_get() */
214 bool cached:1; /* whether this hashmap is being cached */
215 HASHMAP_DEBUG_FIELDS /* optional hashmap_debug_info */
216 };
217
218 /* Specific hash types
219 * HashmapBase must be at the beginning of each hashmap struct. */
220
221 struct Hashmap {
222 struct HashmapBase b;
223 };
224
225 struct OrderedHashmap {
226 struct HashmapBase b;
227 unsigned iterate_list_head, iterate_list_tail;
228 };
229
230 struct Set {
231 struct HashmapBase b;
232 };
233
234 typedef struct CacheMem {
235 const void **ptr;
236 size_t n_populated, n_allocated;
237 bool active:1;
238 } CacheMem;
239
240 struct IteratedCache {
241 HashmapBase *hashmap;
242 CacheMem keys, values;
243 };
244
245 DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8);
246 DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8);
247 /* No need for a separate Set pool */
248 assert_cc(sizeof(Hashmap) == sizeof(Set));
249
250 struct hashmap_type_info {
251 size_t head_size;
252 size_t entry_size;
253 struct mempool *mempool;
254 unsigned n_direct_buckets;
255 };
256
257 static _used_ const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = {
258 [HASHMAP_TYPE_PLAIN] = {
259 .head_size = sizeof(Hashmap),
260 .entry_size = sizeof(struct plain_hashmap_entry),
261 .mempool = &hashmap_pool,
262 .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry),
263 },
264 [HASHMAP_TYPE_ORDERED] = {
265 .head_size = sizeof(OrderedHashmap),
266 .entry_size = sizeof(struct ordered_hashmap_entry),
267 .mempool = &ordered_hashmap_pool,
268 .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry),
269 },
270 [HASHMAP_TYPE_SET] = {
271 .head_size = sizeof(Set),
272 .entry_size = sizeof(struct set_entry),
273 .mempool = &hashmap_pool,
274 .n_direct_buckets = DIRECT_BUCKETS(struct set_entry),
275 },
276 };
277
278 #if VALGRIND
279 _destructor_ static void cleanup_pools(void) {
280 _cleanup_free_ char *t = NULL;
281 int r;
282
283 /* Be nice to valgrind */
284
285 /* The pool is only allocated by the main thread, but the memory can
286 * be passed to other threads. Let's clean up if we are the main thread
287 * and no other threads are live. */
288 /* We build our own is_main_thread() here, which doesn't use C11
289 * TLS based caching of the result. That's because valgrind apparently
290 * doesn't like malloc() (which C11 TLS internally uses) to be called
291 * from a GCC destructors. */
292 if (getpid() != gettid())
293 return;
294
295 r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t);
296 if (r < 0 || !streq(t, "1"))
297 return;
298
299 mempool_drop(&hashmap_pool);
300 mempool_drop(&ordered_hashmap_pool);
301 }
302 #endif
303
304 static unsigned n_buckets(HashmapBase *h) {
305 return h->has_indirect ? h->indirect.n_buckets
306 : hashmap_type_info[h->type].n_direct_buckets;
307 }
308
309 static unsigned n_entries(HashmapBase *h) {
310 return h->has_indirect ? h->indirect.n_entries
311 : h->n_direct_entries;
312 }
313
314 static void n_entries_inc(HashmapBase *h) {
315 if (h->has_indirect)
316 h->indirect.n_entries++;
317 else
318 h->n_direct_entries++;
319 }
320
321 static void n_entries_dec(HashmapBase *h) {
322 if (h->has_indirect)
323 h->indirect.n_entries--;
324 else
325 h->n_direct_entries--;
326 }
327
328 static void *storage_ptr(HashmapBase *h) {
329 return h->has_indirect ? h->indirect.storage
330 : h->direct.storage;
331 }
332
333 static uint8_t *hash_key(HashmapBase *h) {
334 return h->has_indirect ? h->indirect.hash_key
335 : shared_hash_key;
336 }
337
338 static unsigned base_bucket_hash(HashmapBase *h, const void *p) {
339 struct siphash state;
340 uint64_t hash;
341
342 siphash24_init(&state, hash_key(h));
343
344 h->hash_ops->hash(p, &state);
345
346 hash = siphash24_finalize(&state);
347
348 return (unsigned) (hash % n_buckets(h));
349 }
350 #define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
351
352 static void base_set_dirty(HashmapBase *h) {
353 h->dirty = true;
354 }
355 #define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h))
356
357 static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
358 static uint8_t current[HASH_KEY_SIZE];
359 static bool current_initialized = false;
360
361 /* Returns a hash function key to use. In order to keep things
362 * fast we will not generate a new key each time we allocate a
363 * new hash table. Instead, we'll just reuse the most recently
364 * generated one, except if we never generated one or when we
365 * are rehashing an entire hash table because we reached a
366 * fill level */
367
368 if (!current_initialized || !reuse_is_ok) {
369 random_bytes(current, sizeof(current));
370 current_initialized = true;
371 }
372
373 memcpy(hash_key, current, sizeof(current));
374 }
375
376 static struct hashmap_base_entry *bucket_at(HashmapBase *h, unsigned idx) {
377 return (struct hashmap_base_entry*)
378 ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size);
379 }
380
381 static struct plain_hashmap_entry *plain_bucket_at(Hashmap *h, unsigned idx) {
382 return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
383 }
384
385 static struct ordered_hashmap_entry *ordered_bucket_at(OrderedHashmap *h, unsigned idx) {
386 return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
387 }
388
389 static struct set_entry *set_bucket_at(Set *h, unsigned idx) {
390 return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx);
391 }
392
393 static struct ordered_hashmap_entry *bucket_at_swap(struct swap_entries *swap, unsigned idx) {
394 return &swap->e[idx - _IDX_SWAP_BEGIN];
395 }
396
397 /* Returns a pointer to the bucket at index idx.
398 * Understands real indexes and swap indexes, hence "_virtual". */
399 static struct hashmap_base_entry *bucket_at_virtual(HashmapBase *h, struct swap_entries *swap,
400 unsigned idx) {
401 if (idx < _IDX_SWAP_BEGIN)
402 return bucket_at(h, idx);
403
404 if (idx < _IDX_SWAP_END)
405 return &bucket_at_swap(swap, idx)->p.b;
406
407 assert_not_reached("Invalid index");
408 }
409
410 static dib_raw_t *dib_raw_ptr(HashmapBase *h) {
411 return (dib_raw_t*)
412 ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h));
413 }
414
415 static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) {
416 return idx >= from ? idx - from
417 : n_buckets(h) + idx - from;
418 }
419
420 static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) {
421 unsigned initial_bucket;
422
423 if (raw_dib == DIB_RAW_FREE)
424 return DIB_FREE;
425
426 if (_likely_(raw_dib < DIB_RAW_OVERFLOW))
427 return raw_dib;
428
429 /*
430 * Having an overflow DIB value is very unlikely. The hash function
431 * would have to be bad. For example, in a table of size 2^24 filled
432 * to load factor 0.9 the maximum observed DIB is only about 60.
433 * In theory (assuming I used Maxima correctly), for an infinite size
434 * hash table with load factor 0.8 the probability of a given entry
435 * having DIB > 40 is 1.9e-8.
436 * This returns the correct DIB value by recomputing the hash value in
437 * the unlikely case. XXX Hitting this case could be a hint to rehash.
438 */
439 initial_bucket = bucket_hash(h, bucket_at(h, idx)->key);
440 return bucket_distance(h, idx, initial_bucket);
441 }
442
443 static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) {
444 dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE;
445 }
446
447 static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) {
448 dib_raw_t *dibs;
449
450 dibs = dib_raw_ptr(h);
451
452 for ( ; idx < n_buckets(h); idx++)
453 if (dibs[idx] != DIB_RAW_FREE)
454 return idx;
455
456 return IDX_NIL;
457 }
458
459 static void bucket_mark_free(HashmapBase *h, unsigned idx) {
460 memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size);
461 bucket_set_dib(h, idx, DIB_FREE);
462 }
463
464 static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap,
465 unsigned from, unsigned to) {
466 struct hashmap_base_entry *e_from, *e_to;
467
468 assert(from != to);
469
470 e_from = bucket_at_virtual(h, swap, from);
471 e_to = bucket_at_virtual(h, swap, to);
472
473 memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size);
474
475 if (h->type == HASHMAP_TYPE_ORDERED) {
476 OrderedHashmap *lh = (OrderedHashmap*) h;
477 struct ordered_hashmap_entry *le, *le_to;
478
479 le_to = (struct ordered_hashmap_entry*) e_to;
480
481 if (le_to->iterate_next != IDX_NIL) {
482 le = (struct ordered_hashmap_entry*)
483 bucket_at_virtual(h, swap, le_to->iterate_next);
484 le->iterate_previous = to;
485 }
486
487 if (le_to->iterate_previous != IDX_NIL) {
488 le = (struct ordered_hashmap_entry*)
489 bucket_at_virtual(h, swap, le_to->iterate_previous);
490 le->iterate_next = to;
491 }
492
493 if (lh->iterate_list_head == from)
494 lh->iterate_list_head = to;
495 if (lh->iterate_list_tail == from)
496 lh->iterate_list_tail = to;
497 }
498 }
499
500 static unsigned next_idx(HashmapBase *h, unsigned idx) {
501 return (idx + 1U) % n_buckets(h);
502 }
503
504 static unsigned prev_idx(HashmapBase *h, unsigned idx) {
505 return (n_buckets(h) + idx - 1U) % n_buckets(h);
506 }
507
508 static void *entry_value(HashmapBase *h, struct hashmap_base_entry *e) {
509 switch (h->type) {
510
511 case HASHMAP_TYPE_PLAIN:
512 case HASHMAP_TYPE_ORDERED:
513 return ((struct plain_hashmap_entry*)e)->value;
514
515 case HASHMAP_TYPE_SET:
516 return (void*) e->key;
517
518 default:
519 assert_not_reached("Unknown hashmap type");
520 }
521 }
522
523 static void base_remove_entry(HashmapBase *h, unsigned idx) {
524 unsigned left, right, prev, dib;
525 dib_raw_t raw_dib, *dibs;
526
527 dibs = dib_raw_ptr(h);
528 assert(dibs[idx] != DIB_RAW_FREE);
529
530 #if ENABLE_DEBUG_HASHMAP
531 h->debug.rem_count++;
532 h->debug.last_rem_idx = idx;
533 #endif
534
535 left = idx;
536 /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
537 for (right = next_idx(h, left); ; right = next_idx(h, right)) {
538 raw_dib = dibs[right];
539 if (IN_SET(raw_dib, 0, DIB_RAW_FREE))
540 break;
541
542 /* The buckets are not supposed to be all occupied and with DIB > 0.
543 * That would mean we could make everyone better off by shifting them
544 * backward. This scenario is impossible. */
545 assert(left != right);
546 }
547
548 if (h->type == HASHMAP_TYPE_ORDERED) {
549 OrderedHashmap *lh = (OrderedHashmap*) h;
550 struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx);
551
552 if (le->iterate_next != IDX_NIL)
553 ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous;
554 else
555 lh->iterate_list_tail = le->iterate_previous;
556
557 if (le->iterate_previous != IDX_NIL)
558 ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next;
559 else
560 lh->iterate_list_head = le->iterate_next;
561 }
562
563 /* Now shift all buckets in the interval (left, right) one step backwards */
564 for (prev = left, left = next_idx(h, left); left != right;
565 prev = left, left = next_idx(h, left)) {
566 dib = bucket_calculate_dib(h, left, dibs[left]);
567 assert(dib != 0);
568 bucket_move_entry(h, NULL, left, prev);
569 bucket_set_dib(h, prev, dib - 1);
570 }
571
572 bucket_mark_free(h, prev);
573 n_entries_dec(h);
574 base_set_dirty(h);
575 }
576 #define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
577
578 static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) {
579 struct ordered_hashmap_entry *e;
580 unsigned idx;
581
582 assert(h);
583 assert(i);
584
585 if (i->idx == IDX_NIL)
586 goto at_end;
587
588 if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL)
589 goto at_end;
590
591 if (i->idx == IDX_FIRST) {
592 idx = h->iterate_list_head;
593 e = ordered_bucket_at(h, idx);
594 } else {
595 idx = i->idx;
596 e = ordered_bucket_at(h, idx);
597 /*
598 * We allow removing the current entry while iterating, but removal may cause
599 * a backward shift. The next entry may thus move one bucket to the left.
600 * To detect when it happens, we remember the key pointer of the entry we were
601 * going to iterate next. If it does not match, there was a backward shift.
602 */
603 if (e->p.b.key != i->next_key) {
604 idx = prev_idx(HASHMAP_BASE(h), idx);
605 e = ordered_bucket_at(h, idx);
606 }
607 assert(e->p.b.key == i->next_key);
608 }
609
610 #if ENABLE_DEBUG_HASHMAP
611 i->prev_idx = idx;
612 #endif
613
614 if (e->iterate_next != IDX_NIL) {
615 struct ordered_hashmap_entry *n;
616 i->idx = e->iterate_next;
617 n = ordered_bucket_at(h, i->idx);
618 i->next_key = n->p.b.key;
619 } else
620 i->idx = IDX_NIL;
621
622 return idx;
623
624 at_end:
625 i->idx = IDX_NIL;
626 return IDX_NIL;
627 }
628
629 static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) {
630 unsigned idx;
631
632 assert(h);
633 assert(i);
634
635 if (i->idx == IDX_NIL)
636 goto at_end;
637
638 if (i->idx == IDX_FIRST) {
639 /* fast forward to the first occupied bucket */
640 if (h->has_indirect) {
641 i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry);
642 h->indirect.idx_lowest_entry = i->idx;
643 } else
644 i->idx = skip_free_buckets(h, 0);
645
646 if (i->idx == IDX_NIL)
647 goto at_end;
648 } else {
649 struct hashmap_base_entry *e;
650
651 assert(i->idx > 0);
652
653 e = bucket_at(h, i->idx);
654 /*
655 * We allow removing the current entry while iterating, but removal may cause
656 * a backward shift. The next entry may thus move one bucket to the left.
657 * To detect when it happens, we remember the key pointer of the entry we were
658 * going to iterate next. If it does not match, there was a backward shift.
659 */
660 if (e->key != i->next_key)
661 e = bucket_at(h, --i->idx);
662
663 assert(e->key == i->next_key);
664 }
665
666 idx = i->idx;
667 #if ENABLE_DEBUG_HASHMAP
668 i->prev_idx = idx;
669 #endif
670
671 i->idx = skip_free_buckets(h, i->idx + 1);
672 if (i->idx != IDX_NIL)
673 i->next_key = bucket_at(h, i->idx)->key;
674 else
675 i->idx = IDX_NIL;
676
677 return idx;
678
679 at_end:
680 i->idx = IDX_NIL;
681 return IDX_NIL;
682 }
683
684 static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) {
685 if (!h) {
686 i->idx = IDX_NIL;
687 return IDX_NIL;
688 }
689
690 #if ENABLE_DEBUG_HASHMAP
691 if (i->idx == IDX_FIRST) {
692 i->put_count = h->debug.put_count;
693 i->rem_count = h->debug.rem_count;
694 } else {
695 /* While iterating, must not add any new entries */
696 assert(i->put_count == h->debug.put_count);
697 /* ... or remove entries other than the current one */
698 assert(i->rem_count == h->debug.rem_count ||
699 (i->rem_count == h->debug.rem_count - 1 &&
700 i->prev_idx == h->debug.last_rem_idx));
701 /* Reset our removals counter */
702 i->rem_count = h->debug.rem_count;
703 }
704 #endif
705
706 return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i)
707 : hashmap_iterate_in_internal_order(h, i);
708 }
709
710 bool internal_hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) {
711 struct hashmap_base_entry *e;
712 void *data;
713 unsigned idx;
714
715 idx = hashmap_iterate_entry(h, i);
716 if (idx == IDX_NIL) {
717 if (value)
718 *value = NULL;
719 if (key)
720 *key = NULL;
721
722 return false;
723 }
724
725 e = bucket_at(h, idx);
726 data = entry_value(h, e);
727 if (value)
728 *value = data;
729 if (key)
730 *key = e->key;
731
732 return true;
733 }
734
735 bool set_iterate(const Set *s, Iterator *i, void **value) {
736 return internal_hashmap_iterate(HASHMAP_BASE((Set*) s), i, value, NULL);
737 }
738
739 #define HASHMAP_FOREACH_IDX(idx, h, i) \
740 for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
741 (idx != IDX_NIL); \
742 (idx) = hashmap_iterate_entry((h), &(i)))
743
744 IteratedCache *internal_hashmap_iterated_cache_new(HashmapBase *h) {
745 IteratedCache *cache;
746
747 assert(h);
748 assert(!h->cached);
749
750 if (h->cached)
751 return NULL;
752
753 cache = new0(IteratedCache, 1);
754 if (!cache)
755 return NULL;
756
757 cache->hashmap = h;
758 h->cached = true;
759
760 return cache;
761 }
762
763 static void reset_direct_storage(HashmapBase *h) {
764 const struct hashmap_type_info *hi = &hashmap_type_info[h->type];
765 void *p;
766
767 assert(!h->has_indirect);
768
769 p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets);
770 memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets);
771 }
772
773 static struct HashmapBase *hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) {
774 HashmapBase *h;
775 const struct hashmap_type_info *hi = &hashmap_type_info[type];
776 bool up;
777
778 up = mempool_enabled();
779
780 h = up ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size);
781 if (!h)
782 return NULL;
783
784 h->type = type;
785 h->from_pool = up;
786 h->hash_ops = hash_ops ?: &trivial_hash_ops;
787
788 if (type == HASHMAP_TYPE_ORDERED) {
789 OrderedHashmap *lh = (OrderedHashmap*)h;
790 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
791 }
792
793 reset_direct_storage(h);
794
795 if (!shared_hash_key_initialized) {
796 random_bytes(shared_hash_key, sizeof(shared_hash_key));
797 shared_hash_key_initialized= true;
798 }
799
800 #if ENABLE_DEBUG_HASHMAP
801 h->debug.func = func;
802 h->debug.file = file;
803 h->debug.line = line;
804 assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
805 LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug);
806 assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
807 #endif
808
809 return h;
810 }
811
812 Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
813 return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
814 }
815
816 OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
817 return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
818 }
819
820 Set *internal_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
821 return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
822 }
823
824 static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops,
825 enum HashmapType type HASHMAP_DEBUG_PARAMS) {
826 HashmapBase *q;
827
828 assert(h);
829
830 if (*h)
831 return 0;
832
833 q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS);
834 if (!q)
835 return -ENOMEM;
836
837 *h = q;
838 return 0;
839 }
840
841 int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
842 return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
843 }
844
845 int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
846 return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
847 }
848
849 int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
850 return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
851 }
852
853 static void hashmap_free_no_clear(HashmapBase *h) {
854 assert(!h->has_indirect);
855 assert(h->n_direct_entries == 0);
856
857 #if ENABLE_DEBUG_HASHMAP
858 assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
859 LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug);
860 assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
861 #endif
862
863 if (h->from_pool) {
864 /* Ensure that the object didn't get migrated between threads. */
865 assert_se(is_main_thread());
866 mempool_free_tile(hashmap_type_info[h->type].mempool, h);
867 } else
868 free(h);
869 }
870
871 HashmapBase *internal_hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
872 if (h) {
873 internal_hashmap_clear(h, default_free_key, default_free_value);
874 hashmap_free_no_clear(h);
875 }
876
877 return NULL;
878 }
879
880 void internal_hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
881 free_func_t free_key, free_value;
882 if (!h)
883 return;
884
885 free_key = h->hash_ops->free_key ?: default_free_key;
886 free_value = h->hash_ops->free_value ?: default_free_value;
887
888 if (free_key || free_value) {
889
890 /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the
891 * hash table, and only then call the destructor functions. If these destructors then try to unregister
892 * themselves from our hash table a second time, the entry is already gone. */
893
894 while (internal_hashmap_size(h) > 0) {
895 void *k = NULL;
896 void *v;
897
898 v = internal_hashmap_first_key_and_value(h, true, &k);
899
900 if (free_key)
901 free_key(k);
902
903 if (free_value)
904 free_value(v);
905 }
906 }
907
908 if (h->has_indirect) {
909 free(h->indirect.storage);
910 h->has_indirect = false;
911 }
912
913 h->n_direct_entries = 0;
914 reset_direct_storage(h);
915
916 if (h->type == HASHMAP_TYPE_ORDERED) {
917 OrderedHashmap *lh = (OrderedHashmap*) h;
918 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
919 }
920
921 base_set_dirty(h);
922 }
923
924 static int resize_buckets(HashmapBase *h, unsigned entries_add);
925
926 /*
927 * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
928 * Performs Robin Hood swaps as it goes. The entry to put must be placed
929 * by the caller into swap slot IDX_PUT.
930 * If used for in-place resizing, may leave a displaced entry in swap slot
931 * IDX_PUT. Caller must rehash it next.
932 * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
933 * false otherwise.
934 */
935 static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx,
936 struct swap_entries *swap) {
937 dib_raw_t raw_dib, *dibs;
938 unsigned dib, distance;
939
940 #if ENABLE_DEBUG_HASHMAP
941 h->debug.put_count++;
942 #endif
943
944 dibs = dib_raw_ptr(h);
945
946 for (distance = 0; ; distance++) {
947 raw_dib = dibs[idx];
948 if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) {
949 if (raw_dib == DIB_RAW_REHASH)
950 bucket_move_entry(h, swap, idx, IDX_TMP);
951
952 if (h->has_indirect && h->indirect.idx_lowest_entry > idx)
953 h->indirect.idx_lowest_entry = idx;
954
955 bucket_set_dib(h, idx, distance);
956 bucket_move_entry(h, swap, IDX_PUT, idx);
957 if (raw_dib == DIB_RAW_REHASH) {
958 bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
959 return true;
960 }
961
962 return false;
963 }
964
965 dib = bucket_calculate_dib(h, idx, raw_dib);
966
967 if (dib < distance) {
968 /* Found a wealthier entry. Go Robin Hood! */
969 bucket_set_dib(h, idx, distance);
970
971 /* swap the entries */
972 bucket_move_entry(h, swap, idx, IDX_TMP);
973 bucket_move_entry(h, swap, IDX_PUT, idx);
974 bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
975
976 distance = dib;
977 }
978
979 idx = next_idx(h, idx);
980 }
981 }
982
983 /*
984 * Puts an entry into a hashmap, boldly - no check whether key already exists.
985 * The caller must place the entry (only its key and value, not link indexes)
986 * in swap slot IDX_PUT.
987 * Caller must ensure: the key does not exist yet in the hashmap.
988 * that resize is not needed if !may_resize.
989 * Returns: 1 if entry was put successfully.
990 * -ENOMEM if may_resize==true and resize failed with -ENOMEM.
991 * Cannot return -ENOMEM if !may_resize.
992 */
993 static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx,
994 struct swap_entries *swap, bool may_resize) {
995 struct ordered_hashmap_entry *new_entry;
996 int r;
997
998 assert(idx < n_buckets(h));
999
1000 new_entry = bucket_at_swap(swap, IDX_PUT);
1001
1002 if (may_resize) {
1003 r = resize_buckets(h, 1);
1004 if (r < 0)
1005 return r;
1006 if (r > 0)
1007 idx = bucket_hash(h, new_entry->p.b.key);
1008 }
1009 assert(n_entries(h) < n_buckets(h));
1010
1011 if (h->type == HASHMAP_TYPE_ORDERED) {
1012 OrderedHashmap *lh = (OrderedHashmap*) h;
1013
1014 new_entry->iterate_next = IDX_NIL;
1015 new_entry->iterate_previous = lh->iterate_list_tail;
1016
1017 if (lh->iterate_list_tail != IDX_NIL) {
1018 struct ordered_hashmap_entry *old_tail;
1019
1020 old_tail = ordered_bucket_at(lh, lh->iterate_list_tail);
1021 assert(old_tail->iterate_next == IDX_NIL);
1022 old_tail->iterate_next = IDX_PUT;
1023 }
1024
1025 lh->iterate_list_tail = IDX_PUT;
1026 if (lh->iterate_list_head == IDX_NIL)
1027 lh->iterate_list_head = IDX_PUT;
1028 }
1029
1030 assert_se(hashmap_put_robin_hood(h, idx, swap) == false);
1031
1032 n_entries_inc(h);
1033 #if ENABLE_DEBUG_HASHMAP
1034 h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h));
1035 #endif
1036
1037 base_set_dirty(h);
1038
1039 return 1;
1040 }
1041 #define hashmap_put_boldly(h, idx, swap, may_resize) \
1042 hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
1043
1044 /*
1045 * Returns 0 if resize is not needed.
1046 * 1 if successfully resized.
1047 * -ENOMEM on allocation failure.
1048 */
1049 static int resize_buckets(HashmapBase *h, unsigned entries_add) {
1050 struct swap_entries swap;
1051 void *new_storage;
1052 dib_raw_t *old_dibs, *new_dibs;
1053 const struct hashmap_type_info *hi;
1054 unsigned idx, optimal_idx;
1055 unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries;
1056 uint8_t new_shift;
1057 bool rehash_next;
1058
1059 assert(h);
1060
1061 hi = &hashmap_type_info[h->type];
1062 new_n_entries = n_entries(h) + entries_add;
1063
1064 /* overflow? */
1065 if (_unlikely_(new_n_entries < entries_add))
1066 return -ENOMEM;
1067
1068 /* For direct storage we allow 100% load, because it's tiny. */
1069 if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets)
1070 return 0;
1071
1072 /*
1073 * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
1074 * From it follows: m = n + n/(INV_KEEP_FREE - 1)
1075 */
1076 new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1);
1077 /* overflow? */
1078 if (_unlikely_(new_n_buckets < new_n_entries))
1079 return -ENOMEM;
1080
1081 if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t))))
1082 return -ENOMEM;
1083
1084 old_n_buckets = n_buckets(h);
1085
1086 if (_likely_(new_n_buckets <= old_n_buckets))
1087 return 0;
1088
1089 new_shift = log2u_round_up(MAX(
1090 new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)),
1091 2 * sizeof(struct direct_storage)));
1092
1093 /* Realloc storage (buckets and DIB array). */
1094 new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL,
1095 1U << new_shift);
1096 if (!new_storage)
1097 return -ENOMEM;
1098
1099 /* Must upgrade direct to indirect storage. */
1100 if (!h->has_indirect) {
1101 memcpy(new_storage, h->direct.storage,
1102 old_n_buckets * (hi->entry_size + sizeof(dib_raw_t)));
1103 h->indirect.n_entries = h->n_direct_entries;
1104 h->indirect.idx_lowest_entry = 0;
1105 h->n_direct_entries = 0;
1106 }
1107
1108 /* Get a new hash key. If we've just upgraded to indirect storage,
1109 * allow reusing a previously generated key. It's still a different key
1110 * from the shared one that we used for direct storage. */
1111 get_hash_key(h->indirect.hash_key, !h->has_indirect);
1112
1113 h->has_indirect = true;
1114 h->indirect.storage = new_storage;
1115 h->indirect.n_buckets = (1U << new_shift) /
1116 (hi->entry_size + sizeof(dib_raw_t));
1117
1118 old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets);
1119 new_dibs = dib_raw_ptr(h);
1120
1121 /*
1122 * Move the DIB array to the new place, replacing valid DIB values with
1123 * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
1124 * Note: Overlap is not possible, because we have at least doubled the
1125 * number of buckets and dib_raw_t is smaller than any entry type.
1126 */
1127 for (idx = 0; idx < old_n_buckets; idx++) {
1128 assert(old_dibs[idx] != DIB_RAW_REHASH);
1129 new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE
1130 : DIB_RAW_REHASH;
1131 }
1132
1133 /* Zero the area of newly added entries (including the old DIB area) */
1134 memzero(bucket_at(h, old_n_buckets),
1135 (n_buckets(h) - old_n_buckets) * hi->entry_size);
1136
1137 /* The upper half of the new DIB array needs initialization */
1138 memset(&new_dibs[old_n_buckets], DIB_RAW_INIT,
1139 (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t));
1140
1141 /* Rehash entries that need it */
1142 n_rehashed = 0;
1143 for (idx = 0; idx < old_n_buckets; idx++) {
1144 if (new_dibs[idx] != DIB_RAW_REHASH)
1145 continue;
1146
1147 optimal_idx = bucket_hash(h, bucket_at(h, idx)->key);
1148
1149 /*
1150 * Not much to do if by luck the entry hashes to its current
1151 * location. Just set its DIB.
1152 */
1153 if (optimal_idx == idx) {
1154 new_dibs[idx] = 0;
1155 n_rehashed++;
1156 continue;
1157 }
1158
1159 new_dibs[idx] = DIB_RAW_FREE;
1160 bucket_move_entry(h, &swap, idx, IDX_PUT);
1161 /* bucket_move_entry does not clear the source */
1162 memzero(bucket_at(h, idx), hi->entry_size);
1163
1164 do {
1165 /*
1166 * Find the new bucket for the current entry. This may make
1167 * another entry homeless and load it into IDX_PUT.
1168 */
1169 rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap);
1170 n_rehashed++;
1171
1172 /* Did the current entry displace another one? */
1173 if (rehash_next)
1174 optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key);
1175 } while (rehash_next);
1176 }
1177
1178 assert(n_rehashed == n_entries(h));
1179
1180 return 1;
1181 }
1182
1183 /*
1184 * Finds an entry with a matching key
1185 * Returns: index of the found entry, or IDX_NIL if not found.
1186 */
1187 static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) {
1188 struct hashmap_base_entry *e;
1189 unsigned dib, distance;
1190 dib_raw_t *dibs = dib_raw_ptr(h);
1191
1192 assert(idx < n_buckets(h));
1193
1194 for (distance = 0; ; distance++) {
1195 if (dibs[idx] == DIB_RAW_FREE)
1196 return IDX_NIL;
1197
1198 dib = bucket_calculate_dib(h, idx, dibs[idx]);
1199
1200 if (dib < distance)
1201 return IDX_NIL;
1202 if (dib == distance) {
1203 e = bucket_at(h, idx);
1204 if (h->hash_ops->compare(e->key, key) == 0)
1205 return idx;
1206 }
1207
1208 idx = next_idx(h, idx);
1209 }
1210 }
1211 #define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
1212
1213 int hashmap_put(Hashmap *h, const void *key, void *value) {
1214 struct swap_entries swap;
1215 struct plain_hashmap_entry *e;
1216 unsigned hash, idx;
1217
1218 assert(h);
1219
1220 hash = bucket_hash(h, key);
1221 idx = bucket_scan(h, hash, key);
1222 if (idx != IDX_NIL) {
1223 e = plain_bucket_at(h, idx);
1224 if (e->value == value)
1225 return 0;
1226 return -EEXIST;
1227 }
1228
1229 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1230 e->b.key = key;
1231 e->value = value;
1232 return hashmap_put_boldly(h, hash, &swap, true);
1233 }
1234
1235 int set_put(Set *s, const void *key) {
1236 struct swap_entries swap;
1237 struct hashmap_base_entry *e;
1238 unsigned hash, idx;
1239
1240 assert(s);
1241
1242 hash = bucket_hash(s, key);
1243 idx = bucket_scan(s, hash, key);
1244 if (idx != IDX_NIL)
1245 return 0;
1246
1247 e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1248 e->key = key;
1249 return hashmap_put_boldly(s, hash, &swap, true);
1250 }
1251
1252 int hashmap_replace(Hashmap *h, const void *key, void *value) {
1253 struct swap_entries swap;
1254 struct plain_hashmap_entry *e;
1255 unsigned hash, idx;
1256
1257 assert(h);
1258
1259 hash = bucket_hash(h, key);
1260 idx = bucket_scan(h, hash, key);
1261 if (idx != IDX_NIL) {
1262 e = plain_bucket_at(h, idx);
1263 #if ENABLE_DEBUG_HASHMAP
1264 /* Although the key is equal, the key pointer may have changed,
1265 * and this would break our assumption for iterating. So count
1266 * this operation as incompatible with iteration. */
1267 if (e->b.key != key) {
1268 h->b.debug.put_count++;
1269 h->b.debug.rem_count++;
1270 h->b.debug.last_rem_idx = idx;
1271 }
1272 #endif
1273 e->b.key = key;
1274 e->value = value;
1275 hashmap_set_dirty(h);
1276
1277 return 0;
1278 }
1279
1280 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1281 e->b.key = key;
1282 e->value = value;
1283 return hashmap_put_boldly(h, hash, &swap, true);
1284 }
1285
1286 int hashmap_update(Hashmap *h, const void *key, void *value) {
1287 struct plain_hashmap_entry *e;
1288 unsigned hash, idx;
1289
1290 assert(h);
1291
1292 hash = bucket_hash(h, key);
1293 idx = bucket_scan(h, hash, key);
1294 if (idx == IDX_NIL)
1295 return -ENOENT;
1296
1297 e = plain_bucket_at(h, idx);
1298 e->value = value;
1299 hashmap_set_dirty(h);
1300
1301 return 0;
1302 }
1303
1304 void *internal_hashmap_get(HashmapBase *h, const void *key) {
1305 struct hashmap_base_entry *e;
1306 unsigned hash, idx;
1307
1308 if (!h)
1309 return NULL;
1310
1311 hash = bucket_hash(h, key);
1312 idx = bucket_scan(h, hash, key);
1313 if (idx == IDX_NIL)
1314 return NULL;
1315
1316 e = bucket_at(h, idx);
1317 return entry_value(h, e);
1318 }
1319
1320 void *hashmap_get2(Hashmap *h, const void *key, void **key2) {
1321 struct plain_hashmap_entry *e;
1322 unsigned hash, idx;
1323
1324 if (!h)
1325 return NULL;
1326
1327 hash = bucket_hash(h, key);
1328 idx = bucket_scan(h, hash, key);
1329 if (idx == IDX_NIL)
1330 return NULL;
1331
1332 e = plain_bucket_at(h, idx);
1333 if (key2)
1334 *key2 = (void*) e->b.key;
1335
1336 return e->value;
1337 }
1338
1339 bool internal_hashmap_contains(HashmapBase *h, const void *key) {
1340 unsigned hash;
1341
1342 if (!h)
1343 return false;
1344
1345 hash = bucket_hash(h, key);
1346 return bucket_scan(h, hash, key) != IDX_NIL;
1347 }
1348
1349 void *internal_hashmap_remove(HashmapBase *h, const void *key) {
1350 struct hashmap_base_entry *e;
1351 unsigned hash, idx;
1352 void *data;
1353
1354 if (!h)
1355 return NULL;
1356
1357 hash = bucket_hash(h, key);
1358 idx = bucket_scan(h, hash, key);
1359 if (idx == IDX_NIL)
1360 return NULL;
1361
1362 e = bucket_at(h, idx);
1363 data = entry_value(h, e);
1364 remove_entry(h, idx);
1365
1366 return data;
1367 }
1368
1369 void *hashmap_remove2(Hashmap *h, const void *key, void **rkey) {
1370 struct plain_hashmap_entry *e;
1371 unsigned hash, idx;
1372 void *data;
1373
1374 if (!h) {
1375 if (rkey)
1376 *rkey = NULL;
1377 return NULL;
1378 }
1379
1380 hash = bucket_hash(h, key);
1381 idx = bucket_scan(h, hash, key);
1382 if (idx == IDX_NIL) {
1383 if (rkey)
1384 *rkey = NULL;
1385 return NULL;
1386 }
1387
1388 e = plain_bucket_at(h, idx);
1389 data = e->value;
1390 if (rkey)
1391 *rkey = (void*) e->b.key;
1392
1393 remove_entry(h, idx);
1394
1395 return data;
1396 }
1397
1398 int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1399 struct swap_entries swap;
1400 struct plain_hashmap_entry *e;
1401 unsigned old_hash, new_hash, idx;
1402
1403 if (!h)
1404 return -ENOENT;
1405
1406 old_hash = bucket_hash(h, old_key);
1407 idx = bucket_scan(h, old_hash, old_key);
1408 if (idx == IDX_NIL)
1409 return -ENOENT;
1410
1411 new_hash = bucket_hash(h, new_key);
1412 if (bucket_scan(h, new_hash, new_key) != IDX_NIL)
1413 return -EEXIST;
1414
1415 remove_entry(h, idx);
1416
1417 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1418 e->b.key = new_key;
1419 e->value = value;
1420 assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1421
1422 return 0;
1423 }
1424
1425 int set_remove_and_put(Set *s, const void *old_key, const void *new_key) {
1426 struct swap_entries swap;
1427 struct hashmap_base_entry *e;
1428 unsigned old_hash, new_hash, idx;
1429
1430 if (!s)
1431 return -ENOENT;
1432
1433 old_hash = bucket_hash(s, old_key);
1434 idx = bucket_scan(s, old_hash, old_key);
1435 if (idx == IDX_NIL)
1436 return -ENOENT;
1437
1438 new_hash = bucket_hash(s, new_key);
1439 if (bucket_scan(s, new_hash, new_key) != IDX_NIL)
1440 return -EEXIST;
1441
1442 remove_entry(s, idx);
1443
1444 e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1445 e->key = new_key;
1446 assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1);
1447
1448 return 0;
1449 }
1450
1451 int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1452 struct swap_entries swap;
1453 struct plain_hashmap_entry *e;
1454 unsigned old_hash, new_hash, idx_old, idx_new;
1455
1456 if (!h)
1457 return -ENOENT;
1458
1459 old_hash = bucket_hash(h, old_key);
1460 idx_old = bucket_scan(h, old_hash, old_key);
1461 if (idx_old == IDX_NIL)
1462 return -ENOENT;
1463
1464 old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key;
1465
1466 new_hash = bucket_hash(h, new_key);
1467 idx_new = bucket_scan(h, new_hash, new_key);
1468 if (idx_new != IDX_NIL)
1469 if (idx_old != idx_new) {
1470 remove_entry(h, idx_new);
1471 /* Compensate for a possible backward shift. */
1472 if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key)
1473 idx_old = prev_idx(HASHMAP_BASE(h), idx_old);
1474 assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key);
1475 }
1476
1477 remove_entry(h, idx_old);
1478
1479 e = &bucket_at_swap(&swap, IDX_PUT)->p;
1480 e->b.key = new_key;
1481 e->value = value;
1482 assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1483
1484 return 0;
1485 }
1486
1487 void *internal_hashmap_remove_value(HashmapBase *h, const void *key, void *value) {
1488 struct hashmap_base_entry *e;
1489 unsigned hash, idx;
1490
1491 if (!h)
1492 return NULL;
1493
1494 hash = bucket_hash(h, key);
1495 idx = bucket_scan(h, hash, key);
1496 if (idx == IDX_NIL)
1497 return NULL;
1498
1499 e = bucket_at(h, idx);
1500 if (entry_value(h, e) != value)
1501 return NULL;
1502
1503 remove_entry(h, idx);
1504
1505 return value;
1506 }
1507
1508 static unsigned find_first_entry(HashmapBase *h) {
1509 Iterator i = ITERATOR_FIRST;
1510
1511 if (!h || !n_entries(h))
1512 return IDX_NIL;
1513
1514 return hashmap_iterate_entry(h, &i);
1515 }
1516
1517 void *internal_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) {
1518 struct hashmap_base_entry *e;
1519 void *key, *data;
1520 unsigned idx;
1521
1522 idx = find_first_entry(h);
1523 if (idx == IDX_NIL) {
1524 if (ret_key)
1525 *ret_key = NULL;
1526 return NULL;
1527 }
1528
1529 e = bucket_at(h, idx);
1530 key = (void*) e->key;
1531 data = entry_value(h, e);
1532
1533 if (remove)
1534 remove_entry(h, idx);
1535
1536 if (ret_key)
1537 *ret_key = key;
1538
1539 return data;
1540 }
1541
1542 unsigned internal_hashmap_size(HashmapBase *h) {
1543 if (!h)
1544 return 0;
1545
1546 return n_entries(h);
1547 }
1548
1549 unsigned internal_hashmap_buckets(HashmapBase *h) {
1550 if (!h)
1551 return 0;
1552
1553 return n_buckets(h);
1554 }
1555
1556 int internal_hashmap_merge(Hashmap *h, Hashmap *other) {
1557 Iterator i;
1558 unsigned idx;
1559
1560 assert(h);
1561
1562 HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1563 struct plain_hashmap_entry *pe = plain_bucket_at(other, idx);
1564 int r;
1565
1566 r = hashmap_put(h, pe->b.key, pe->value);
1567 if (r < 0 && r != -EEXIST)
1568 return r;
1569 }
1570
1571 return 0;
1572 }
1573
1574 int set_merge(Set *s, Set *other) {
1575 Iterator i;
1576 unsigned idx;
1577
1578 assert(s);
1579
1580 HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1581 struct set_entry *se = set_bucket_at(other, idx);
1582 int r;
1583
1584 r = set_put(s, se->b.key);
1585 if (r < 0)
1586 return r;
1587 }
1588
1589 return 0;
1590 }
1591
1592 int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add) {
1593 int r;
1594
1595 assert(h);
1596
1597 r = resize_buckets(h, entries_add);
1598 if (r < 0)
1599 return r;
1600
1601 return 0;
1602 }
1603
1604 /*
1605 * The same as hashmap_merge(), but every new item from other is moved to h.
1606 * Keys already in h are skipped and stay in other.
1607 * Returns: 0 on success.
1608 * -ENOMEM on alloc failure, in which case no move has been done.
1609 */
1610 int internal_hashmap_move(HashmapBase *h, HashmapBase *other) {
1611 struct swap_entries swap;
1612 struct hashmap_base_entry *e, *n;
1613 Iterator i;
1614 unsigned idx;
1615 int r;
1616
1617 assert(h);
1618
1619 if (!other)
1620 return 0;
1621
1622 assert(other->type == h->type);
1623
1624 /*
1625 * This reserves buckets for the worst case, where none of other's
1626 * entries are yet present in h. This is preferable to risking
1627 * an allocation failure in the middle of the moving and having to
1628 * rollback or return a partial result.
1629 */
1630 r = resize_buckets(h, n_entries(other));
1631 if (r < 0)
1632 return r;
1633
1634 HASHMAP_FOREACH_IDX(idx, other, i) {
1635 unsigned h_hash;
1636
1637 e = bucket_at(other, idx);
1638 h_hash = bucket_hash(h, e->key);
1639 if (bucket_scan(h, h_hash, e->key) != IDX_NIL)
1640 continue;
1641
1642 n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1643 n->key = e->key;
1644 if (h->type != HASHMAP_TYPE_SET)
1645 ((struct plain_hashmap_entry*) n)->value =
1646 ((struct plain_hashmap_entry*) e)->value;
1647 assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1);
1648
1649 remove_entry(other, idx);
1650 }
1651
1652 return 0;
1653 }
1654
1655 int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) {
1656 struct swap_entries swap;
1657 unsigned h_hash, other_hash, idx;
1658 struct hashmap_base_entry *e, *n;
1659 int r;
1660
1661 assert(h);
1662
1663 h_hash = bucket_hash(h, key);
1664 if (bucket_scan(h, h_hash, key) != IDX_NIL)
1665 return -EEXIST;
1666
1667 if (!other)
1668 return -ENOENT;
1669
1670 assert(other->type == h->type);
1671
1672 other_hash = bucket_hash(other, key);
1673 idx = bucket_scan(other, other_hash, key);
1674 if (idx == IDX_NIL)
1675 return -ENOENT;
1676
1677 e = bucket_at(other, idx);
1678
1679 n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1680 n->key = e->key;
1681 if (h->type != HASHMAP_TYPE_SET)
1682 ((struct plain_hashmap_entry*) n)->value =
1683 ((struct plain_hashmap_entry*) e)->value;
1684 r = hashmap_put_boldly(h, h_hash, &swap, true);
1685 if (r < 0)
1686 return r;
1687
1688 remove_entry(other, idx);
1689 return 0;
1690 }
1691
1692 HashmapBase *internal_hashmap_copy(HashmapBase *h) {
1693 HashmapBase *copy;
1694 int r;
1695
1696 assert(h);
1697
1698 copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_SRC_ARGS);
1699 if (!copy)
1700 return NULL;
1701
1702 switch (h->type) {
1703 case HASHMAP_TYPE_PLAIN:
1704 case HASHMAP_TYPE_ORDERED:
1705 r = hashmap_merge((Hashmap*)copy, (Hashmap*)h);
1706 break;
1707 case HASHMAP_TYPE_SET:
1708 r = set_merge((Set*)copy, (Set*)h);
1709 break;
1710 default:
1711 assert_not_reached("Unknown hashmap type");
1712 }
1713
1714 if (r < 0) {
1715 internal_hashmap_free(copy, false, false);
1716 return NULL;
1717 }
1718
1719 return copy;
1720 }
1721
1722 char **internal_hashmap_get_strv(HashmapBase *h) {
1723 char **sv;
1724 Iterator i;
1725 unsigned idx, n;
1726
1727 sv = new(char*, n_entries(h)+1);
1728 if (!sv)
1729 return NULL;
1730
1731 n = 0;
1732 HASHMAP_FOREACH_IDX(idx, h, i)
1733 sv[n++] = entry_value(h, bucket_at(h, idx));
1734 sv[n] = NULL;
1735
1736 return sv;
1737 }
1738
1739 void *ordered_hashmap_next(OrderedHashmap *h, const void *key) {
1740 struct ordered_hashmap_entry *e;
1741 unsigned hash, idx;
1742
1743 if (!h)
1744 return NULL;
1745
1746 hash = bucket_hash(h, key);
1747 idx = bucket_scan(h, hash, key);
1748 if (idx == IDX_NIL)
1749 return NULL;
1750
1751 e = ordered_bucket_at(h, idx);
1752 if (e->iterate_next == IDX_NIL)
1753 return NULL;
1754 return ordered_bucket_at(h, e->iterate_next)->p.value;
1755 }
1756
1757 int set_consume(Set *s, void *value) {
1758 int r;
1759
1760 assert(s);
1761 assert(value);
1762
1763 r = set_put(s, value);
1764 if (r <= 0)
1765 free(value);
1766
1767 return r;
1768 }
1769
1770 int hashmap_put_strdup(Hashmap **h, const char *k, const char *v) {
1771 int r;
1772
1773 r = hashmap_ensure_allocated(h, &string_hash_ops_free_free);
1774 if (r < 0)
1775 return r;
1776
1777 _cleanup_free_ char *kdup = NULL, *vdup = NULL;
1778
1779 kdup = strdup(k);
1780 if (!kdup)
1781 return -ENOMEM;
1782
1783 if (v) {
1784 vdup = strdup(v);
1785 if (!vdup)
1786 return -ENOMEM;
1787 }
1788
1789 r = hashmap_put(*h, kdup, vdup);
1790 if (r < 0) {
1791 if (r == -EEXIST && streq_ptr(v, hashmap_get(*h, kdup)))
1792 return 0;
1793 return r;
1794 }
1795
1796 /* 0 with non-null vdup would mean vdup is already in the hashmap, which cannot be */
1797 assert(vdup == NULL || r > 0);
1798 if (r > 0)
1799 kdup = vdup = NULL;
1800
1801 return r;
1802 }
1803
1804 int set_put_strdup(Set **s, const char *p) {
1805 char *c;
1806 int r;
1807
1808 assert(s);
1809 assert(p);
1810
1811 r = set_ensure_allocated(s, &string_hash_ops_free);
1812 if (r < 0)
1813 return r;
1814
1815 if (set_contains(*s, (char*) p))
1816 return 0;
1817
1818 c = strdup(p);
1819 if (!c)
1820 return -ENOMEM;
1821
1822 return set_consume(*s, c);
1823 }
1824
1825 int set_put_strdupv(Set **s, char **l) {
1826 int n = 0, r;
1827 char **i;
1828
1829 assert(s);
1830
1831 STRV_FOREACH(i, l) {
1832 r = set_put_strdup(s, *i);
1833 if (r < 0)
1834 return r;
1835
1836 n += r;
1837 }
1838
1839 return n;
1840 }
1841
1842 int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) {
1843 const char *p = v;
1844 int r;
1845
1846 assert(s);
1847 assert(v);
1848
1849 for (;;) {
1850 char *word;
1851
1852 r = extract_first_word(&p, &word, separators, flags);
1853 if (r <= 0)
1854 return r;
1855
1856 r = set_consume(s, word);
1857 if (r < 0)
1858 return r;
1859 }
1860 }
1861
1862 /* expand the cachemem if needed, return true if newly (re)activated. */
1863 static int cachemem_maintain(CacheMem *mem, unsigned size) {
1864 assert(mem);
1865
1866 if (!GREEDY_REALLOC(mem->ptr, mem->n_allocated, size)) {
1867 if (size > 0)
1868 return -ENOMEM;
1869 }
1870
1871 if (!mem->active) {
1872 mem->active = true;
1873 return true;
1874 }
1875
1876 return false;
1877 }
1878
1879 int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) {
1880 bool sync_keys = false, sync_values = false;
1881 unsigned size;
1882 int r;
1883
1884 assert(cache);
1885 assert(cache->hashmap);
1886
1887 size = n_entries(cache->hashmap);
1888
1889 if (res_keys) {
1890 r = cachemem_maintain(&cache->keys, size);
1891 if (r < 0)
1892 return r;
1893
1894 sync_keys = r;
1895 } else
1896 cache->keys.active = false;
1897
1898 if (res_values) {
1899 r = cachemem_maintain(&cache->values, size);
1900 if (r < 0)
1901 return r;
1902
1903 sync_values = r;
1904 } else
1905 cache->values.active = false;
1906
1907 if (cache->hashmap->dirty) {
1908 if (cache->keys.active)
1909 sync_keys = true;
1910 if (cache->values.active)
1911 sync_values = true;
1912
1913 cache->hashmap->dirty = false;
1914 }
1915
1916 if (sync_keys || sync_values) {
1917 unsigned i, idx;
1918 Iterator iter;
1919
1920 i = 0;
1921 HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) {
1922 struct hashmap_base_entry *e;
1923
1924 e = bucket_at(cache->hashmap, idx);
1925
1926 if (sync_keys)
1927 cache->keys.ptr[i] = e->key;
1928 if (sync_values)
1929 cache->values.ptr[i] = entry_value(cache->hashmap, e);
1930 i++;
1931 }
1932 }
1933
1934 if (res_keys)
1935 *res_keys = cache->keys.ptr;
1936 if (res_values)
1937 *res_values = cache->values.ptr;
1938 if (res_n_entries)
1939 *res_n_entries = size;
1940
1941 return 0;
1942 }
1943
1944 IteratedCache *iterated_cache_free(IteratedCache *cache) {
1945 if (cache) {
1946 free(cache->keys.ptr);
1947 free(cache->values.ptr);
1948 }
1949
1950 return mfree(cache);
1951 }