src/basic/hashmap.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <errno.h>
   4 #include <stdint.h>
   5 #include <stdlib.h>
   6 #include <string.h>
   7
   8 #include "alloc-util.h"
   9 #include "fileio.h"
  10 #include "hashmap.h"
  11 #include "macro.h"
  12 #include "mempool.h"
  13 #include "process-util.h"
  14 #include "random-util.h"
  15 #include "set.h"
  16 #include "siphash24.h"
  17 #include "string-util.h"
  18 #include "strv.h"
  19 #include "util.h"
  20
  21 #if ENABLE_DEBUG_HASHMAP
  22 #include <pthread.h>
  23 #include "list.h"
  24 #endif
  25
  26 /*
  27  * Implementation of hashmaps.
  28  * Addressing: open
  29  *   - uses less RAM compared to closed addressing (chaining), because
  30  *     our entries are small (especially in Sets, which tend to contain
  31  *     the majority of entries in systemd).
  32  * Collision resolution: Robin Hood
  33  *   - tends to equalize displacement of entries from their optimal buckets.
  34  * Probe sequence: linear
  35  *   - though theoretically worse than random probing/uniform hashing/double
  36  *     hashing, it is good for cache locality.
  37  *
  38  * References:
  39  * Celis, P. 1986. Robin Hood Hashing.
  40  * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
  41  * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
  42  * - The results are derived for random probing. Suggests deletion with
  43  *   tombstones and two mean-centered search methods. None of that works
  44  *   well for linear probing.
  45  *
  46  * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
  47  * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
  48  * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
  49  * http://www.math.uu.se/~svante/papers/sj157.pdf
  50  * - Applies to Robin Hood with linear probing. Contains remarks on
  51  *   the unsuitability of mean-centered search with linear probing.
  52  *
  53  * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
  54  * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
  55  * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
  56  * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
  57  *   in a successful search), and Janson writes about displacement. C = d + 1.
  58  *
  59  * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
  60  * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
  61  * - Explanation of backward shift deletion with pictures.
  62  *
  63  * Khuong, P. 2013. The Other Robin Hood Hashing.
  64  * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
  65  * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
  66  */
  67
  68 /*
  69  * XXX Ideas for improvement:
  70  * For unordered hashmaps, randomize iteration order, similarly to Perl:
  71  * http://blog.booking.com/hardening-perls-hash-function.html
  72  */
  73
  74 /* INV_KEEP_FREE = 1 / (1 - max_load_factor)
  75  * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
  76 #define INV_KEEP_FREE            5U
  77
  78 /* Fields common to entries of all hashmap/set types */
  79 struct hashmap_base_entry {
  80         const void *key;
  81 };
  82
  83 /* Entry types for specific hashmap/set types
  84  * hashmap_base_entry must be at the beginning of each entry struct. */
  85
  86 struct plain_hashmap_entry {
  87         struct hashmap_base_entry b;
  88         void *value;
  89 };
  90
  91 struct ordered_hashmap_entry {
  92         struct plain_hashmap_entry p;
  93         unsigned iterate_next, iterate_previous;
  94 };
  95
  96 struct set_entry {
  97         struct hashmap_base_entry b;
  98 };
  99
 100 /* In several functions it is advantageous to have the hash table extended
 101  * virtually by a couple of additional buckets. We reserve special index values
 102  * for these "swap" buckets. */
 103 #define _IDX_SWAP_BEGIN     (UINT_MAX - 3)
 104 #define IDX_PUT             (_IDX_SWAP_BEGIN + 0)
 105 #define IDX_TMP             (_IDX_SWAP_BEGIN + 1)
 106 #define _IDX_SWAP_END       (_IDX_SWAP_BEGIN + 2)
 107
 108 #define IDX_FIRST           (UINT_MAX - 1) /* special index for freshly initialized iterators */
 109 #define IDX_NIL             UINT_MAX       /* special index value meaning "none" or "end" */
 110
 111 assert_cc(IDX_FIRST == _IDX_SWAP_END);
 112 assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST);
 113
 114 /* Storage space for the "swap" buckets.
 115  * All entry types can fit into a ordered_hashmap_entry. */
 116 struct swap_entries {
 117         struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN];
 118 };
 119
 120 /* Distance from Initial Bucket */
 121 typedef uint8_t dib_raw_t;
 122 #define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU)   /* indicates DIB value is greater than representable */
 123 #define DIB_RAW_REHASH   ((dib_raw_t)0xfeU)   /* entry yet to be rehashed during in-place resize */
 124 #define DIB_RAW_FREE     ((dib_raw_t)0xffU)   /* a free bucket */
 125 #define DIB_RAW_INIT     ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
 126
 127 #define DIB_FREE UINT_MAX
 128
 129 #if ENABLE_DEBUG_HASHMAP
 130 struct hashmap_debug_info {
 131         LIST_FIELDS(struct hashmap_debug_info, debug_list);
 132         unsigned max_entries;  /* high watermark of n_entries */
 133
 134         /* who allocated this hashmap */
 135         int line;
 136         const char *file;
 137         const char *func;
 138
 139         /* fields to detect modification while iterating */
 140         unsigned put_count;    /* counts puts into the hashmap */
 141         unsigned rem_count;    /* counts removals from hashmap */
 142         unsigned last_rem_idx; /* remembers last removal index */
 143 };
 144
 145 /* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
 146 static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list);
 147 static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER;
 148
 149 #define HASHMAP_DEBUG_FIELDS struct hashmap_debug_info debug;
 150
 151 #else /* !ENABLE_DEBUG_HASHMAP */
 152 #define HASHMAP_DEBUG_FIELDS
 153 #endif /* ENABLE_DEBUG_HASHMAP */
 154
 155 enum HashmapType {
 156         HASHMAP_TYPE_PLAIN,
 157         HASHMAP_TYPE_ORDERED,
 158         HASHMAP_TYPE_SET,
 159         _HASHMAP_TYPE_MAX
 160 };
 161
 162 struct _packed_ indirect_storage {
 163         void *storage;                     /* where buckets and DIBs are stored */
 164         uint8_t  hash_key[HASH_KEY_SIZE];  /* hash key; changes during resize */
 165
 166         unsigned n_entries;                /* number of stored entries */
 167         unsigned n_buckets;                /* number of buckets */
 168
 169         unsigned idx_lowest_entry;         /* Index below which all buckets are free.
 170                                               Makes "while(hashmap_steal_first())" loops
 171                                               O(n) instead of O(n^2) for unordered hashmaps. */
 172         uint8_t  _pad[3];                  /* padding for the whole HashmapBase */
 173         /* The bitfields in HashmapBase complete the alignment of the whole thing. */
 174 };
 175
 176 struct direct_storage {
 177         /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit.
 178          * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit,
 179          *              or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */
 180         uint8_t storage[sizeof(struct indirect_storage)];
 181 };
 182
 183 #define DIRECT_BUCKETS(entry_t) \
 184         (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
 185
 186 /* We should be able to store at least one entry directly. */
 187 assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1);
 188
 189 /* We have 3 bits for n_direct_entries. */
 190 assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3));
 191
 192 /* Hashmaps with directly stored entries all use this shared hash key.
 193  * It's no big deal if the key is guessed, because there can be only
 194  * a handful of directly stored entries in a hashmap. When a hashmap
 195  * outgrows direct storage, it gets its own key for indirect storage. */
 196 static uint8_t shared_hash_key[HASH_KEY_SIZE];
 197 static bool shared_hash_key_initialized;
 198
 199 /* Fields that all hashmap/set types must have */
 200 struct HashmapBase {
 201         const struct hash_ops *hash_ops;  /* hash and compare ops to use */
 202
 203         union _packed_ {
 204                 struct indirect_storage indirect; /* if  has_indirect */
 205                 struct direct_storage direct;     /* if !has_indirect */
 206         };
 207
 208         enum HashmapType type:2;     /* HASHMAP_TYPE_* */
 209         bool has_indirect:1;         /* whether indirect storage is used */
 210         unsigned n_direct_entries:3; /* Number of entries in direct storage.
 211                                       * Only valid if !has_indirect. */
 212         bool from_pool:1;            /* whether was allocated from mempool */
 213         bool dirty:1;                /* whether dirtied since last iterated_cache_get() */
 214         bool cached:1;               /* whether this hashmap is being cached */
 215         HASHMAP_DEBUG_FIELDS         /* optional hashmap_debug_info */
 216 };
 217
 218 /* Specific hash types
 219  * HashmapBase must be at the beginning of each hashmap struct. */
 220
 221 struct Hashmap {
 222         struct HashmapBase b;
 223 };
 224
 225 struct OrderedHashmap {
 226         struct HashmapBase b;
 227         unsigned iterate_list_head, iterate_list_tail;
 228 };
 229
 230 struct Set {
 231         struct HashmapBase b;
 232 };
 233
 234 typedef struct CacheMem {
 235         const void **ptr;
 236         size_t n_populated, n_allocated;
 237         bool active:1;
 238 } CacheMem;
 239
 240 struct IteratedCache {
 241         HashmapBase *hashmap;
 242         CacheMem keys, values;
 243 };
 244
 245 DEFINE_MEMPOOL(hashmap_pool,         Hashmap,        8);
 246 DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8);
 247 /* No need for a separate Set pool */
 248 assert_cc(sizeof(Hashmap) == sizeof(Set));
 249
 250 struct hashmap_type_info {
 251         size_t head_size;
 252         size_t entry_size;
 253         struct mempool *mempool;
 254         unsigned n_direct_buckets;
 255 };
 256
 257 static const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = {
 258         [HASHMAP_TYPE_PLAIN] = {
 259                 .head_size        = sizeof(Hashmap),
 260                 .entry_size       = sizeof(struct plain_hashmap_entry),
 261                 .mempool          = &hashmap_pool,
 262                 .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry),
 263         },
 264         [HASHMAP_TYPE_ORDERED] = {
 265                 .head_size        = sizeof(OrderedHashmap),
 266                 .entry_size       = sizeof(struct ordered_hashmap_entry),
 267                 .mempool          = &ordered_hashmap_pool,
 268                 .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry),
 269         },
 270         [HASHMAP_TYPE_SET] = {
 271                 .head_size        = sizeof(Set),
 272                 .entry_size       = sizeof(struct set_entry),
 273                 .mempool          = &hashmap_pool,
 274                 .n_direct_buckets = DIRECT_BUCKETS(struct set_entry),
 275         },
 276 };
 277
 278 #if VALGRIND
 279 _destructor_ static void cleanup_pools(void) {
 280         _cleanup_free_ char *t = NULL;
 281         int r;
 282
 283         /* Be nice to valgrind */
 284
 285         /* The pool is only allocated by the main thread, but the memory can
 286          * be passed to other threads. Let's clean up if we are the main thread
 287          * and no other threads are live. */
 288         if (!is_main_thread())
 289                 return;
 290
 291         r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t);
 292         if (r < 0 || !streq(t, "1"))
 293                 return;
 294
 295         mempool_drop(&hashmap_pool);
 296         mempool_drop(&ordered_hashmap_pool);
 297 }
 298 #endif
 299
 300 static unsigned n_buckets(HashmapBase *h) {
 301         return h->has_indirect ? h->indirect.n_buckets
 302                                : hashmap_type_info[h->type].n_direct_buckets;
 303 }
 304
 305 static unsigned n_entries(HashmapBase *h) {
 306         return h->has_indirect ? h->indirect.n_entries
 307                                : h->n_direct_entries;
 308 }
 309
 310 static void n_entries_inc(HashmapBase *h) {
 311         if (h->has_indirect)
 312                 h->indirect.n_entries++;
 313         else
 314                 h->n_direct_entries++;
 315 }
 316
 317 static void n_entries_dec(HashmapBase *h) {
 318         if (h->has_indirect)
 319                 h->indirect.n_entries--;
 320         else
 321                 h->n_direct_entries--;
 322 }
 323
 324 static void *storage_ptr(HashmapBase *h) {
 325         return h->has_indirect ? h->indirect.storage
 326                                : h->direct.storage;
 327 }
 328
 329 static uint8_t *hash_key(HashmapBase *h) {
 330         return h->has_indirect ? h->indirect.hash_key
 331                                : shared_hash_key;
 332 }
 333
 334 static unsigned base_bucket_hash(HashmapBase *h, const void *p) {
 335         struct siphash state;
 336         uint64_t hash;
 337
 338         siphash24_init(&state, hash_key(h));
 339
 340         h->hash_ops->hash(p, &state);
 341
 342         hash = siphash24_finalize(&state);
 343
 344         return (unsigned) (hash % n_buckets(h));
 345 }
 346 #define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
 347
 348 static void base_set_dirty(HashmapBase *h) {
 349         h->dirty = true;
 350 }
 351 #define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h))
 352
 353 static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
 354         static uint8_t current[HASH_KEY_SIZE];
 355         static bool current_initialized = false;
 356
 357         /* Returns a hash function key to use. In order to keep things
 358          * fast we will not generate a new key each time we allocate a
 359          * new hash table. Instead, we'll just reuse the most recently
 360          * generated one, except if we never generated one or when we
 361          * are rehashing an entire hash table because we reached a
 362          * fill level */
 363
 364         if (!current_initialized || !reuse_is_ok) {
 365                 random_bytes(current, sizeof(current));
 366                 current_initialized = true;
 367         }
 368
 369         memcpy(hash_key, current, sizeof(current));
 370 }
 371
 372 static struct hashmap_base_entry *bucket_at(HashmapBase *h, unsigned idx) {
 373         return (struct hashmap_base_entry*)
 374                 ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size);
 375 }
 376
 377 static struct plain_hashmap_entry *plain_bucket_at(Hashmap *h, unsigned idx) {
 378         return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
 379 }
 380
 381 static struct ordered_hashmap_entry *ordered_bucket_at(OrderedHashmap *h, unsigned idx) {
 382         return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
 383 }
 384
 385 static struct set_entry *set_bucket_at(Set *h, unsigned idx) {
 386         return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx);
 387 }
 388
 389 static struct ordered_hashmap_entry *bucket_at_swap(struct swap_entries *swap, unsigned idx) {
 390         return &swap->e[idx - _IDX_SWAP_BEGIN];
 391 }
 392
 393 /* Returns a pointer to the bucket at index idx.
 394  * Understands real indexes and swap indexes, hence "_virtual". */
 395 static struct hashmap_base_entry *bucket_at_virtual(HashmapBase *h, struct swap_entries *swap,
 396                                                     unsigned idx) {
 397         if (idx < _IDX_SWAP_BEGIN)
 398                 return bucket_at(h, idx);
 399
 400         if (idx < _IDX_SWAP_END)
 401                 return &bucket_at_swap(swap, idx)->p.b;
 402
 403         assert_not_reached("Invalid index");
 404 }
 405
 406 static dib_raw_t *dib_raw_ptr(HashmapBase *h) {
 407         return (dib_raw_t*)
 408                 ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h));
 409 }
 410
 411 static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) {
 412         return idx >= from ? idx - from
 413                            : n_buckets(h) + idx - from;
 414 }
 415
 416 static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) {
 417         unsigned initial_bucket;
 418
 419         if (raw_dib == DIB_RAW_FREE)
 420                 return DIB_FREE;
 421
 422         if (_likely_(raw_dib < DIB_RAW_OVERFLOW))
 423                 return raw_dib;
 424
 425         /*
 426          * Having an overflow DIB value is very unlikely. The hash function
 427          * would have to be bad. For example, in a table of size 2^24 filled
 428          * to load factor 0.9 the maximum observed DIB is only about 60.
 429          * In theory (assuming I used Maxima correctly), for an infinite size
 430          * hash table with load factor 0.8 the probability of a given entry
 431          * having DIB > 40 is 1.9e-8.
 432          * This returns the correct DIB value by recomputing the hash value in
 433          * the unlikely case. XXX Hitting this case could be a hint to rehash.
 434          */
 435         initial_bucket = bucket_hash(h, bucket_at(h, idx)->key);
 436         return bucket_distance(h, idx, initial_bucket);
 437 }
 438
 439 static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) {
 440         dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE;
 441 }
 442
 443 static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) {
 444         dib_raw_t *dibs;
 445
 446         dibs = dib_raw_ptr(h);
 447
 448         for ( ; idx < n_buckets(h); idx++)
 449                 if (dibs[idx] != DIB_RAW_FREE)
 450                         return idx;
 451
 452         return IDX_NIL;
 453 }
 454
 455 static void bucket_mark_free(HashmapBase *h, unsigned idx) {
 456         memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size);
 457         bucket_set_dib(h, idx, DIB_FREE);
 458 }
 459
 460 static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap,
 461                               unsigned from, unsigned to) {
 462         struct hashmap_base_entry *e_from, *e_to;
 463
 464         assert(from != to);
 465
 466         e_from = bucket_at_virtual(h, swap, from);
 467         e_to   = bucket_at_virtual(h, swap, to);
 468
 469         memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size);
 470
 471         if (h->type == HASHMAP_TYPE_ORDERED) {
 472                 OrderedHashmap *lh = (OrderedHashmap*) h;
 473                 struct ordered_hashmap_entry *le, *le_to;
 474
 475                 le_to = (struct ordered_hashmap_entry*) e_to;
 476
 477                 if (le_to->iterate_next != IDX_NIL) {
 478                         le = (struct ordered_hashmap_entry*)
 479                              bucket_at_virtual(h, swap, le_to->iterate_next);
 480                         le->iterate_previous = to;
 481                 }
 482
 483                 if (le_to->iterate_previous != IDX_NIL) {
 484                         le = (struct ordered_hashmap_entry*)
 485                              bucket_at_virtual(h, swap, le_to->iterate_previous);
 486                         le->iterate_next = to;
 487                 }
 488
 489                 if (lh->iterate_list_head == from)
 490                         lh->iterate_list_head = to;
 491                 if (lh->iterate_list_tail == from)
 492                         lh->iterate_list_tail = to;
 493         }
 494 }
 495
 496 static unsigned next_idx(HashmapBase *h, unsigned idx) {
 497         return (idx + 1U) % n_buckets(h);
 498 }
 499
 500 static unsigned prev_idx(HashmapBase *h, unsigned idx) {
 501         return (n_buckets(h) + idx - 1U) % n_buckets(h);
 502 }
 503
 504 static void *entry_value(HashmapBase *h, struct hashmap_base_entry *e) {
 505         switch (h->type) {
 506
 507         case HASHMAP_TYPE_PLAIN:
 508         case HASHMAP_TYPE_ORDERED:
 509                 return ((struct plain_hashmap_entry*)e)->value;
 510
 511         case HASHMAP_TYPE_SET:
 512                 return (void*) e->key;
 513
 514         default:
 515                 assert_not_reached("Unknown hashmap type");
 516         }
 517 }
 518
 519 static void base_remove_entry(HashmapBase *h, unsigned idx) {
 520         unsigned left, right, prev, dib;
 521         dib_raw_t raw_dib, *dibs;
 522
 523         dibs = dib_raw_ptr(h);
 524         assert(dibs[idx] != DIB_RAW_FREE);
 525
 526 #if ENABLE_DEBUG_HASHMAP
 527         h->debug.rem_count++;
 528         h->debug.last_rem_idx = idx;
 529 #endif
 530
 531         left = idx;
 532         /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
 533         for (right = next_idx(h, left); ; right = next_idx(h, right)) {
 534                 raw_dib = dibs[right];
 535                 if (IN_SET(raw_dib, 0, DIB_RAW_FREE))
 536                         break;
 537
 538                 /* The buckets are not supposed to be all occupied and with DIB > 0.
 539                  * That would mean we could make everyone better off by shifting them
 540                  * backward. This scenario is impossible. */
 541                 assert(left != right);
 542         }
 543
 544         if (h->type == HASHMAP_TYPE_ORDERED) {
 545                 OrderedHashmap *lh = (OrderedHashmap*) h;
 546                 struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx);
 547
 548                 if (le->iterate_next != IDX_NIL)
 549                         ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous;
 550                 else
 551                         lh->iterate_list_tail = le->iterate_previous;
 552
 553                 if (le->iterate_previous != IDX_NIL)
 554                         ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next;
 555                 else
 556                         lh->iterate_list_head = le->iterate_next;
 557         }
 558
 559         /* Now shift all buckets in the interval (left, right) one step backwards */
 560         for (prev = left, left = next_idx(h, left); left != right;
 561              prev = left, left = next_idx(h, left)) {
 562                 dib = bucket_calculate_dib(h, left, dibs[left]);
 563                 assert(dib != 0);
 564                 bucket_move_entry(h, NULL, left, prev);
 565                 bucket_set_dib(h, prev, dib - 1);
 566         }
 567
 568         bucket_mark_free(h, prev);
 569         n_entries_dec(h);
 570         base_set_dirty(h);
 571 }
 572 #define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
 573
 574 static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) {
 575         struct ordered_hashmap_entry *e;
 576         unsigned idx;
 577
 578         assert(h);
 579         assert(i);
 580
 581         if (i->idx == IDX_NIL)
 582                 goto at_end;
 583
 584         if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL)
 585                 goto at_end;
 586
 587         if (i->idx == IDX_FIRST) {
 588                 idx = h->iterate_list_head;
 589                 e = ordered_bucket_at(h, idx);
 590         } else {
 591                 idx = i->idx;
 592                 e = ordered_bucket_at(h, idx);
 593                 /*
 594                  * We allow removing the current entry while iterating, but removal may cause
 595                  * a backward shift. The next entry may thus move one bucket to the left.
 596                  * To detect when it happens, we remember the key pointer of the entry we were
 597                  * going to iterate next. If it does not match, there was a backward shift.
 598                  */
 599                 if (e->p.b.key != i->next_key) {
 600                         idx = prev_idx(HASHMAP_BASE(h), idx);
 601                         e = ordered_bucket_at(h, idx);
 602                 }
 603                 assert(e->p.b.key == i->next_key);
 604         }
 605
 606 #if ENABLE_DEBUG_HASHMAP
 607         i->prev_idx = idx;
 608 #endif
 609
 610         if (e->iterate_next != IDX_NIL) {
 611                 struct ordered_hashmap_entry *n;
 612                 i->idx = e->iterate_next;
 613                 n = ordered_bucket_at(h, i->idx);
 614                 i->next_key = n->p.b.key;
 615         } else
 616                 i->idx = IDX_NIL;
 617
 618         return idx;
 619
 620 at_end:
 621         i->idx = IDX_NIL;
 622         return IDX_NIL;
 623 }
 624
 625 static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) {
 626         unsigned idx;
 627
 628         assert(h);
 629         assert(i);
 630
 631         if (i->idx == IDX_NIL)
 632                 goto at_end;
 633
 634         if (i->idx == IDX_FIRST) {
 635                 /* fast forward to the first occupied bucket */
 636                 if (h->has_indirect) {
 637                         i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry);
 638                         h->indirect.idx_lowest_entry = i->idx;
 639                 } else
 640                         i->idx = skip_free_buckets(h, 0);
 641
 642                 if (i->idx == IDX_NIL)
 643                         goto at_end;
 644         } else {
 645                 struct hashmap_base_entry *e;
 646
 647                 assert(i->idx > 0);
 648
 649                 e = bucket_at(h, i->idx);
 650                 /*
 651                  * We allow removing the current entry while iterating, but removal may cause
 652                  * a backward shift. The next entry may thus move one bucket to the left.
 653                  * To detect when it happens, we remember the key pointer of the entry we were
 654                  * going to iterate next. If it does not match, there was a backward shift.
 655                  */
 656                 if (e->key != i->next_key)
 657                         e = bucket_at(h, --i->idx);
 658
 659                 assert(e->key == i->next_key);
 660         }
 661
 662         idx = i->idx;
 663 #if ENABLE_DEBUG_HASHMAP
 664         i->prev_idx = idx;
 665 #endif
 666
 667         i->idx = skip_free_buckets(h, i->idx + 1);
 668         if (i->idx != IDX_NIL)
 669                 i->next_key = bucket_at(h, i->idx)->key;
 670         else
 671                 i->idx = IDX_NIL;
 672
 673         return idx;
 674
 675 at_end:
 676         i->idx = IDX_NIL;
 677         return IDX_NIL;
 678 }
 679
 680 static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) {
 681         if (!h) {
 682                 i->idx = IDX_NIL;
 683                 return IDX_NIL;
 684         }
 685
 686 #if ENABLE_DEBUG_HASHMAP
 687         if (i->idx == IDX_FIRST) {
 688                 i->put_count = h->debug.put_count;
 689                 i->rem_count = h->debug.rem_count;
 690         } else {
 691                 /* While iterating, must not add any new entries */
 692                 assert(i->put_count == h->debug.put_count);
 693                 /* ... or remove entries other than the current one */
 694                 assert(i->rem_count == h->debug.rem_count ||
 695                        (i->rem_count == h->debug.rem_count - 1 &&
 696                         i->prev_idx == h->debug.last_rem_idx));
 697                 /* Reset our removals counter */
 698                 i->rem_count = h->debug.rem_count;
 699         }
 700 #endif
 701
 702         return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i)
 703                                                : hashmap_iterate_in_internal_order(h, i);
 704 }
 705
 706 bool internal_hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) {
 707         struct hashmap_base_entry *e;
 708         void *data;
 709         unsigned idx;
 710
 711         idx = hashmap_iterate_entry(h, i);
 712         if (idx == IDX_NIL) {
 713                 if (value)
 714                         *value = NULL;
 715                 if (key)
 716                         *key = NULL;
 717
 718                 return false;
 719         }
 720
 721         e = bucket_at(h, idx);
 722         data = entry_value(h, e);
 723         if (value)
 724                 *value = data;
 725         if (key)
 726                 *key = e->key;
 727
 728         return true;
 729 }
 730
 731 bool set_iterate(Set *s, Iterator *i, void **value) {
 732         return internal_hashmap_iterate(HASHMAP_BASE(s), i, value, NULL);
 733 }
 734
 735 #define HASHMAP_FOREACH_IDX(idx, h, i) \
 736         for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
 737              (idx != IDX_NIL); \
 738              (idx) = hashmap_iterate_entry((h), &(i)))
 739
 740 IteratedCache *internal_hashmap_iterated_cache_new(HashmapBase *h) {
 741         IteratedCache *cache;
 742
 743         assert(h);
 744         assert(!h->cached);
 745
 746         if (h->cached)
 747                 return NULL;
 748
 749         cache = new0(IteratedCache, 1);
 750         if (!cache)
 751                 return NULL;
 752
 753         cache->hashmap = h;
 754         h->cached = true;
 755
 756         return cache;
 757 }
 758
 759 static void reset_direct_storage(HashmapBase *h) {
 760         const struct hashmap_type_info *hi = &hashmap_type_info[h->type];
 761         void *p;
 762
 763         assert(!h->has_indirect);
 764
 765         p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets);
 766         memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets);
 767 }
 768
 769 static struct HashmapBase *hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) {
 770         HashmapBase *h;
 771         const struct hashmap_type_info *hi = &hashmap_type_info[type];
 772         bool up;
 773
 774         up = mempool_enabled();
 775
 776         h = up ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size);
 777         if (!h)
 778                 return NULL;
 779
 780         h->type = type;
 781         h->from_pool = up;
 782         h->hash_ops = hash_ops ?: &trivial_hash_ops;
 783
 784         if (type == HASHMAP_TYPE_ORDERED) {
 785                 OrderedHashmap *lh = (OrderedHashmap*)h;
 786                 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
 787         }
 788
 789         reset_direct_storage(h);
 790
 791         if (!shared_hash_key_initialized) {
 792                 random_bytes(shared_hash_key, sizeof(shared_hash_key));
 793                 shared_hash_key_initialized= true;
 794         }
 795
 796 #if ENABLE_DEBUG_HASHMAP
 797         h->debug.func = func;
 798         h->debug.file = file;
 799         h->debug.line = line;
 800         assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
 801         LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug);
 802         assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
 803 #endif
 804
 805         return h;
 806 }
 807
 808 Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 809         return (Hashmap*)        hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
 810 }
 811
 812 OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 813         return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
 814 }
 815
 816 Set *internal_set_new(const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 817         return (Set*)            hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
 818 }
 819
 820 static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops,
 821                                          enum HashmapType type HASHMAP_DEBUG_PARAMS) {
 822         HashmapBase *q;
 823
 824         assert(h);
 825
 826         if (*h)
 827                 return 0;
 828
 829         q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS);
 830         if (!q)
 831                 return -ENOMEM;
 832
 833         *h = q;
 834         return 0;
 835 }
 836
 837 int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 838         return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
 839 }
 840
 841 int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 842         return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
 843 }
 844
 845 int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 846         return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
 847 }
 848
 849 static void hashmap_free_no_clear(HashmapBase *h) {
 850         assert(!h->has_indirect);
 851         assert(h->n_direct_entries == 0);
 852
 853 #if ENABLE_DEBUG_HASHMAP
 854         assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
 855         LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug);
 856         assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
 857 #endif
 858
 859         if (h->from_pool) {
 860                 /* Ensure that the object didn't get migrated between threads. */
 861                 assert_se(is_main_thread());
 862                 mempool_free_tile(hashmap_type_info[h->type].mempool, h);
 863         } else
 864                 free(h);
 865 }
 866
 867 HashmapBase *internal_hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
 868         if (h) {
 869                 internal_hashmap_clear(h, default_free_key, default_free_value);
 870                 hashmap_free_no_clear(h);
 871         }
 872
 873         return NULL;
 874 }
 875
 876 void internal_hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
 877         free_func_t free_key, free_value;
 878         if (!h)
 879                 return;
 880
 881         free_key = h->hash_ops->free_key ?: default_free_key;
 882         free_value = h->hash_ops->free_value ?: default_free_value;
 883
 884         if (free_key || free_value) {
 885
 886                 /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the
 887                  * hash table, and only then call the destructor functions. If these destructors then try to unregister
 888                  * themselves from our hash table a second time, the entry is already gone. */
 889
 890                 while (internal_hashmap_size(h) > 0) {
 891                         void *v, *k;
 892
 893                         v = internal_hashmap_first_key_and_value(h, true, &k);
 894
 895                         if (free_key)
 896                                 free_key(k);
 897
 898                         if (free_value)
 899                                 free_value(v);
 900                 }
 901         }
 902
 903         if (h->has_indirect) {
 904                 free(h->indirect.storage);
 905                 h->has_indirect = false;
 906         }
 907
 908         h->n_direct_entries = 0;
 909         reset_direct_storage(h);
 910
 911         if (h->type == HASHMAP_TYPE_ORDERED) {
 912                 OrderedHashmap *lh = (OrderedHashmap*) h;
 913                 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
 914         }
 915
 916         base_set_dirty(h);
 917 }
 918
 919 static int resize_buckets(HashmapBase *h, unsigned entries_add);
 920
 921 /*
 922  * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
 923  * Performs Robin Hood swaps as it goes. The entry to put must be placed
 924  * by the caller into swap slot IDX_PUT.
 925  * If used for in-place resizing, may leave a displaced entry in swap slot
 926  * IDX_PUT. Caller must rehash it next.
 927  * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
 928  *          false otherwise.
 929  */
 930 static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx,
 931                                    struct swap_entries *swap) {
 932         dib_raw_t raw_dib, *dibs;
 933         unsigned dib, distance;
 934
 935 #if ENABLE_DEBUG_HASHMAP
 936         h->debug.put_count++;
 937 #endif
 938
 939         dibs = dib_raw_ptr(h);
 940
 941         for (distance = 0; ; distance++) {
 942                 raw_dib = dibs[idx];
 943                 if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) {
 944                         if (raw_dib == DIB_RAW_REHASH)
 945                                 bucket_move_entry(h, swap, idx, IDX_TMP);
 946
 947                         if (h->has_indirect && h->indirect.idx_lowest_entry > idx)
 948                                 h->indirect.idx_lowest_entry = idx;
 949
 950                         bucket_set_dib(h, idx, distance);
 951                         bucket_move_entry(h, swap, IDX_PUT, idx);
 952                         if (raw_dib == DIB_RAW_REHASH) {
 953                                 bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
 954                                 return true;
 955                         }
 956
 957                         return false;
 958                 }
 959
 960                 dib = bucket_calculate_dib(h, idx, raw_dib);
 961
 962                 if (dib < distance) {
 963                         /* Found a wealthier entry. Go Robin Hood! */
 964                         bucket_set_dib(h, idx, distance);
 965
 966                         /* swap the entries */
 967                         bucket_move_entry(h, swap, idx, IDX_TMP);
 968                         bucket_move_entry(h, swap, IDX_PUT, idx);
 969                         bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
 970
 971                         distance = dib;
 972                 }
 973
 974                 idx = next_idx(h, idx);
 975         }
 976 }
 977
 978 /*
 979  * Puts an entry into a hashmap, boldly - no check whether key already exists.
 980  * The caller must place the entry (only its key and value, not link indexes)
 981  * in swap slot IDX_PUT.
 982  * Caller must ensure: the key does not exist yet in the hashmap.
 983  *                     that resize is not needed if !may_resize.
 984  * Returns: 1 if entry was put successfully.
 985  *          -ENOMEM if may_resize==true and resize failed with -ENOMEM.
 986  *          Cannot return -ENOMEM if !may_resize.
 987  */
 988 static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx,
 989                                    struct swap_entries *swap, bool may_resize) {
 990         struct ordered_hashmap_entry *new_entry;
 991         int r;
 992
 993         assert(idx < n_buckets(h));
 994
 995         new_entry = bucket_at_swap(swap, IDX_PUT);
 996
 997         if (may_resize) {
 998                 r = resize_buckets(h, 1);
 999                 if (r < 0)
1000                         return r;
1001                 if (r > 0)
1002                         idx = bucket_hash(h, new_entry->p.b.key);
1003         }
1004         assert(n_entries(h) < n_buckets(h));
1005
1006         if (h->type == HASHMAP_TYPE_ORDERED) {
1007                 OrderedHashmap *lh = (OrderedHashmap*) h;
1008
1009                 new_entry->iterate_next = IDX_NIL;
1010                 new_entry->iterate_previous = lh->iterate_list_tail;
1011
1012                 if (lh->iterate_list_tail != IDX_NIL) {
1013                         struct ordered_hashmap_entry *old_tail;
1014
1015                         old_tail = ordered_bucket_at(lh, lh->iterate_list_tail);
1016                         assert(old_tail->iterate_next == IDX_NIL);
1017                         old_tail->iterate_next = IDX_PUT;
1018                 }
1019
1020                 lh->iterate_list_tail = IDX_PUT;
1021                 if (lh->iterate_list_head == IDX_NIL)
1022                         lh->iterate_list_head = IDX_PUT;
1023         }
1024
1025         assert_se(hashmap_put_robin_hood(h, idx, swap) == false);
1026
1027         n_entries_inc(h);
1028 #if ENABLE_DEBUG_HASHMAP
1029         h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h));
1030 #endif
1031
1032         base_set_dirty(h);
1033
1034         return 1;
1035 }
1036 #define hashmap_put_boldly(h, idx, swap, may_resize) \
1037         hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
1038
1039 /*
1040  * Returns 0 if resize is not needed.
1041  *         1 if successfully resized.
1042  *         -ENOMEM on allocation failure.
1043  */
1044 static int resize_buckets(HashmapBase *h, unsigned entries_add) {
1045         struct swap_entries swap;
1046         void *new_storage;
1047         dib_raw_t *old_dibs, *new_dibs;
1048         const struct hashmap_type_info *hi;
1049         unsigned idx, optimal_idx;
1050         unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries;
1051         uint8_t new_shift;
1052         bool rehash_next;
1053
1054         assert(h);
1055
1056         hi = &hashmap_type_info[h->type];
1057         new_n_entries = n_entries(h) + entries_add;
1058
1059         /* overflow? */
1060         if (_unlikely_(new_n_entries < entries_add))
1061                 return -ENOMEM;
1062
1063         /* For direct storage we allow 100% load, because it's tiny. */
1064         if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets)
1065                 return 0;
1066
1067         /*
1068          * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
1069          * From it follows: m = n + n/(INV_KEEP_FREE - 1)
1070          */
1071         new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1);
1072         /* overflow? */
1073         if (_unlikely_(new_n_buckets < new_n_entries))
1074                 return -ENOMEM;
1075
1076         if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t))))
1077                 return -ENOMEM;
1078
1079         old_n_buckets = n_buckets(h);
1080
1081         if (_likely_(new_n_buckets <= old_n_buckets))
1082                 return 0;
1083
1084         new_shift = log2u_round_up(MAX(
1085                         new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)),
1086                         2 * sizeof(struct direct_storage)));
1087
1088         /* Realloc storage (buckets and DIB array). */
1089         new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL,
1090                               1U << new_shift);
1091         if (!new_storage)
1092                 return -ENOMEM;
1093
1094         /* Must upgrade direct to indirect storage. */
1095         if (!h->has_indirect) {
1096                 memcpy(new_storage, h->direct.storage,
1097                        old_n_buckets * (hi->entry_size + sizeof(dib_raw_t)));
1098                 h->indirect.n_entries = h->n_direct_entries;
1099                 h->indirect.idx_lowest_entry = 0;
1100                 h->n_direct_entries = 0;
1101         }
1102
1103         /* Get a new hash key. If we've just upgraded to indirect storage,
1104          * allow reusing a previously generated key. It's still a different key
1105          * from the shared one that we used for direct storage. */
1106         get_hash_key(h->indirect.hash_key, !h->has_indirect);
1107
1108         h->has_indirect = true;
1109         h->indirect.storage = new_storage;
1110         h->indirect.n_buckets = (1U << new_shift) /
1111                                 (hi->entry_size + sizeof(dib_raw_t));
1112
1113         old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets);
1114         new_dibs = dib_raw_ptr(h);
1115
1116         /*
1117          * Move the DIB array to the new place, replacing valid DIB values with
1118          * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
1119          * Note: Overlap is not possible, because we have at least doubled the
1120          * number of buckets and dib_raw_t is smaller than any entry type.
1121          */
1122         for (idx = 0; idx < old_n_buckets; idx++) {
1123                 assert(old_dibs[idx] != DIB_RAW_REHASH);
1124                 new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE
1125                                                               : DIB_RAW_REHASH;
1126         }
1127
1128         /* Zero the area of newly added entries (including the old DIB area) */
1129         memzero(bucket_at(h, old_n_buckets),
1130                (n_buckets(h) - old_n_buckets) * hi->entry_size);
1131
1132         /* The upper half of the new DIB array needs initialization */
1133         memset(&new_dibs[old_n_buckets], DIB_RAW_INIT,
1134                (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t));
1135
1136         /* Rehash entries that need it */
1137         n_rehashed = 0;
1138         for (idx = 0; idx < old_n_buckets; idx++) {
1139                 if (new_dibs[idx] != DIB_RAW_REHASH)
1140                         continue;
1141
1142                 optimal_idx = bucket_hash(h, bucket_at(h, idx)->key);
1143
1144                 /*
1145                  * Not much to do if by luck the entry hashes to its current
1146                  * location. Just set its DIB.
1147                  */
1148                 if (optimal_idx == idx) {
1149                         new_dibs[idx] = 0;
1150                         n_rehashed++;
1151                         continue;
1152                 }
1153
1154                 new_dibs[idx] = DIB_RAW_FREE;
1155                 bucket_move_entry(h, &swap, idx, IDX_PUT);
1156                 /* bucket_move_entry does not clear the source */
1157                 memzero(bucket_at(h, idx), hi->entry_size);
1158
1159                 do {
1160                         /*
1161                          * Find the new bucket for the current entry. This may make
1162                          * another entry homeless and load it into IDX_PUT.
1163                          */
1164                         rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap);
1165                         n_rehashed++;
1166
1167                         /* Did the current entry displace another one? */
1168                         if (rehash_next)
1169                                 optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key);
1170                 } while (rehash_next);
1171         }
1172
1173         assert(n_rehashed == n_entries(h));
1174
1175         return 1;
1176 }
1177
1178 /*
1179  * Finds an entry with a matching key
1180  * Returns: index of the found entry, or IDX_NIL if not found.
1181  */
1182 static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) {
1183         struct hashmap_base_entry *e;
1184         unsigned dib, distance;
1185         dib_raw_t *dibs = dib_raw_ptr(h);
1186
1187         assert(idx < n_buckets(h));
1188
1189         for (distance = 0; ; distance++) {
1190                 if (dibs[idx] == DIB_RAW_FREE)
1191                         return IDX_NIL;
1192
1193                 dib = bucket_calculate_dib(h, idx, dibs[idx]);
1194
1195                 if (dib < distance)
1196                         return IDX_NIL;
1197                 if (dib == distance) {
1198                         e = bucket_at(h, idx);
1199                         if (h->hash_ops->compare(e->key, key) == 0)
1200                                 return idx;
1201                 }
1202
1203                 idx = next_idx(h, idx);
1204         }
1205 }
1206 #define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
1207
1208 int hashmap_put(Hashmap *h, const void *key, void *value) {
1209         struct swap_entries swap;
1210         struct plain_hashmap_entry *e;
1211         unsigned hash, idx;
1212
1213         assert(h);
1214
1215         hash = bucket_hash(h, key);
1216         idx = bucket_scan(h, hash, key);
1217         if (idx != IDX_NIL) {
1218                 e = plain_bucket_at(h, idx);
1219                 if (e->value == value)
1220                         return 0;
1221                 return -EEXIST;
1222         }
1223
1224         e = &bucket_at_swap(&swap, IDX_PUT)->p;
1225         e->b.key = key;
1226         e->value = value;
1227         return hashmap_put_boldly(h, hash, &swap, true);
1228 }
1229
1230 int set_put(Set *s, const void *key) {
1231         struct swap_entries swap;
1232         struct hashmap_base_entry *e;
1233         unsigned hash, idx;
1234
1235         assert(s);
1236
1237         hash = bucket_hash(s, key);
1238         idx = bucket_scan(s, hash, key);
1239         if (idx != IDX_NIL)
1240                 return 0;
1241
1242         e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1243         e->key = key;
1244         return hashmap_put_boldly(s, hash, &swap, true);
1245 }
1246
1247 int hashmap_replace(Hashmap *h, const void *key, void *value) {
1248         struct swap_entries swap;
1249         struct plain_hashmap_entry *e;
1250         unsigned hash, idx;
1251
1252         assert(h);
1253
1254         hash = bucket_hash(h, key);
1255         idx = bucket_scan(h, hash, key);
1256         if (idx != IDX_NIL) {
1257                 e = plain_bucket_at(h, idx);
1258 #if ENABLE_DEBUG_HASHMAP
1259                 /* Although the key is equal, the key pointer may have changed,
1260                  * and this would break our assumption for iterating. So count
1261                  * this operation as incompatible with iteration. */
1262                 if (e->b.key != key) {
1263                         h->b.debug.put_count++;
1264                         h->b.debug.rem_count++;
1265                         h->b.debug.last_rem_idx = idx;
1266                 }
1267 #endif
1268                 e->b.key = key;
1269                 e->value = value;
1270                 hashmap_set_dirty(h);
1271
1272                 return 0;
1273         }
1274
1275         e = &bucket_at_swap(&swap, IDX_PUT)->p;
1276         e->b.key = key;
1277         e->value = value;
1278         return hashmap_put_boldly(h, hash, &swap, true);
1279 }
1280
1281 int hashmap_update(Hashmap *h, const void *key, void *value) {
1282         struct plain_hashmap_entry *e;
1283         unsigned hash, idx;
1284
1285         assert(h);
1286
1287         hash = bucket_hash(h, key);
1288         idx = bucket_scan(h, hash, key);
1289         if (idx == IDX_NIL)
1290                 return -ENOENT;
1291
1292         e = plain_bucket_at(h, idx);
1293         e->value = value;
1294         hashmap_set_dirty(h);
1295
1296         return 0;
1297 }
1298
1299 void *internal_hashmap_get(HashmapBase *h, const void *key) {
1300         struct hashmap_base_entry *e;
1301         unsigned hash, idx;
1302
1303         if (!h)
1304                 return NULL;
1305
1306         hash = bucket_hash(h, key);
1307         idx = bucket_scan(h, hash, key);
1308         if (idx == IDX_NIL)
1309                 return NULL;
1310
1311         e = bucket_at(h, idx);
1312         return entry_value(h, e);
1313 }
1314
1315 void *hashmap_get2(Hashmap *h, const void *key, void **key2) {
1316         struct plain_hashmap_entry *e;
1317         unsigned hash, idx;
1318
1319         if (!h)
1320                 return NULL;
1321
1322         hash = bucket_hash(h, key);
1323         idx = bucket_scan(h, hash, key);
1324         if (idx == IDX_NIL)
1325                 return NULL;
1326
1327         e = plain_bucket_at(h, idx);
1328         if (key2)
1329                 *key2 = (void*) e->b.key;
1330
1331         return e->value;
1332 }
1333
1334 bool internal_hashmap_contains(HashmapBase *h, const void *key) {
1335         unsigned hash;
1336
1337         if (!h)
1338                 return false;
1339
1340         hash = bucket_hash(h, key);
1341         return bucket_scan(h, hash, key) != IDX_NIL;
1342 }
1343
1344 void *internal_hashmap_remove(HashmapBase *h, const void *key) {
1345         struct hashmap_base_entry *e;
1346         unsigned hash, idx;
1347         void *data;
1348
1349         if (!h)
1350                 return NULL;
1351
1352         hash = bucket_hash(h, key);
1353         idx = bucket_scan(h, hash, key);
1354         if (idx == IDX_NIL)
1355                 return NULL;
1356
1357         e = bucket_at(h, idx);
1358         data = entry_value(h, e);
1359         remove_entry(h, idx);
1360
1361         return data;
1362 }
1363
1364 void *hashmap_remove2(Hashmap *h, const void *key, void **rkey) {
1365         struct plain_hashmap_entry *e;
1366         unsigned hash, idx;
1367         void *data;
1368
1369         if (!h) {
1370                 if (rkey)
1371                         *rkey = NULL;
1372                 return NULL;
1373         }
1374
1375         hash = bucket_hash(h, key);
1376         idx = bucket_scan(h, hash, key);
1377         if (idx == IDX_NIL) {
1378                 if (rkey)
1379                         *rkey = NULL;
1380                 return NULL;
1381         }
1382
1383         e = plain_bucket_at(h, idx);
1384         data = e->value;
1385         if (rkey)
1386                 *rkey = (void*) e->b.key;
1387
1388         remove_entry(h, idx);
1389
1390         return data;
1391 }
1392
1393 int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1394         struct swap_entries swap;
1395         struct plain_hashmap_entry *e;
1396         unsigned old_hash, new_hash, idx;
1397
1398         if (!h)
1399                 return -ENOENT;
1400
1401         old_hash = bucket_hash(h, old_key);
1402         idx = bucket_scan(h, old_hash, old_key);
1403         if (idx == IDX_NIL)
1404                 return -ENOENT;
1405
1406         new_hash = bucket_hash(h, new_key);
1407         if (bucket_scan(h, new_hash, new_key) != IDX_NIL)
1408                 return -EEXIST;
1409
1410         remove_entry(h, idx);
1411
1412         e = &bucket_at_swap(&swap, IDX_PUT)->p;
1413         e->b.key = new_key;
1414         e->value = value;
1415         assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1416
1417         return 0;
1418 }
1419
1420 int set_remove_and_put(Set *s, const void *old_key, const void *new_key) {
1421         struct swap_entries swap;
1422         struct hashmap_base_entry *e;
1423         unsigned old_hash, new_hash, idx;
1424
1425         if (!s)
1426                 return -ENOENT;
1427
1428         old_hash = bucket_hash(s, old_key);
1429         idx = bucket_scan(s, old_hash, old_key);
1430         if (idx == IDX_NIL)
1431                 return -ENOENT;
1432
1433         new_hash = bucket_hash(s, new_key);
1434         if (bucket_scan(s, new_hash, new_key) != IDX_NIL)
1435                 return -EEXIST;
1436
1437         remove_entry(s, idx);
1438
1439         e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1440         e->key = new_key;
1441         assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1);
1442
1443         return 0;
1444 }
1445
1446 int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1447         struct swap_entries swap;
1448         struct plain_hashmap_entry *e;
1449         unsigned old_hash, new_hash, idx_old, idx_new;
1450
1451         if (!h)
1452                 return -ENOENT;
1453
1454         old_hash = bucket_hash(h, old_key);
1455         idx_old = bucket_scan(h, old_hash, old_key);
1456         if (idx_old == IDX_NIL)
1457                 return -ENOENT;
1458
1459         old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key;
1460
1461         new_hash = bucket_hash(h, new_key);
1462         idx_new = bucket_scan(h, new_hash, new_key);
1463         if (idx_new != IDX_NIL)
1464                 if (idx_old != idx_new) {
1465                         remove_entry(h, idx_new);
1466                         /* Compensate for a possible backward shift. */
1467                         if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key)
1468                                 idx_old = prev_idx(HASHMAP_BASE(h), idx_old);
1469                         assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key);
1470                 }
1471
1472         remove_entry(h, idx_old);
1473
1474         e = &bucket_at_swap(&swap, IDX_PUT)->p;
1475         e->b.key = new_key;
1476         e->value = value;
1477         assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1478
1479         return 0;
1480 }
1481
1482 void *internal_hashmap_remove_value(HashmapBase *h, const void *key, void *value) {
1483         struct hashmap_base_entry *e;
1484         unsigned hash, idx;
1485
1486         if (!h)
1487                 return NULL;
1488
1489         hash = bucket_hash(h, key);
1490         idx = bucket_scan(h, hash, key);
1491         if (idx == IDX_NIL)
1492                 return NULL;
1493
1494         e = bucket_at(h, idx);
1495         if (entry_value(h, e) != value)
1496                 return NULL;
1497
1498         remove_entry(h, idx);
1499
1500         return value;
1501 }
1502
1503 static unsigned find_first_entry(HashmapBase *h) {
1504         Iterator i = ITERATOR_FIRST;
1505
1506         if (!h || !n_entries(h))
1507                 return IDX_NIL;
1508
1509         return hashmap_iterate_entry(h, &i);
1510 }
1511
1512 void *internal_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) {
1513         struct hashmap_base_entry *e;
1514         void *key, *data;
1515         unsigned idx;
1516
1517         idx = find_first_entry(h);
1518         if (idx == IDX_NIL)
1519                 return NULL;
1520
1521         e = bucket_at(h, idx);
1522         key = (void*) e->key;
1523         data = entry_value(h, e);
1524
1525         if (remove)
1526                 remove_entry(h, idx);
1527
1528         if (ret_key)
1529                 *ret_key = key;
1530
1531         return data;
1532 }
1533
1534 unsigned internal_hashmap_size(HashmapBase *h) {
1535
1536         if (!h)
1537                 return 0;
1538
1539         return n_entries(h);
1540 }
1541
1542 unsigned internal_hashmap_buckets(HashmapBase *h) {
1543
1544         if (!h)
1545                 return 0;
1546
1547         return n_buckets(h);
1548 }
1549
1550 int internal_hashmap_merge(Hashmap *h, Hashmap *other) {
1551         Iterator i;
1552         unsigned idx;
1553
1554         assert(h);
1555
1556         HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1557                 struct plain_hashmap_entry *pe = plain_bucket_at(other, idx);
1558                 int r;
1559
1560                 r = hashmap_put(h, pe->b.key, pe->value);
1561                 if (r < 0 && r != -EEXIST)
1562                         return r;
1563         }
1564
1565         return 0;
1566 }
1567
1568 int set_merge(Set *s, Set *other) {
1569         Iterator i;
1570         unsigned idx;
1571
1572         assert(s);
1573
1574         HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1575                 struct set_entry *se = set_bucket_at(other, idx);
1576                 int r;
1577
1578                 r = set_put(s, se->b.key);
1579                 if (r < 0)
1580                         return r;
1581         }
1582
1583         return 0;
1584 }
1585
1586 int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add) {
1587         int r;
1588
1589         assert(h);
1590
1591         r = resize_buckets(h, entries_add);
1592         if (r < 0)
1593                 return r;
1594
1595         return 0;
1596 }
1597
1598 /*
1599  * The same as hashmap_merge(), but every new item from other is moved to h.
1600  * Keys already in h are skipped and stay in other.
1601  * Returns: 0 on success.
1602  *          -ENOMEM on alloc failure, in which case no move has been done.
1603  */
1604 int internal_hashmap_move(HashmapBase *h, HashmapBase *other) {
1605         struct swap_entries swap;
1606         struct hashmap_base_entry *e, *n;
1607         Iterator i;
1608         unsigned idx;
1609         int r;
1610
1611         assert(h);
1612
1613         if (!other)
1614                 return 0;
1615
1616         assert(other->type == h->type);
1617
1618         /*
1619          * This reserves buckets for the worst case, where none of other's
1620          * entries are yet present in h. This is preferable to risking
1621          * an allocation failure in the middle of the moving and having to
1622          * rollback or return a partial result.
1623          */
1624         r = resize_buckets(h, n_entries(other));
1625         if (r < 0)
1626                 return r;
1627
1628         HASHMAP_FOREACH_IDX(idx, other, i) {
1629                 unsigned h_hash;
1630
1631                 e = bucket_at(other, idx);
1632                 h_hash = bucket_hash(h, e->key);
1633                 if (bucket_scan(h, h_hash, e->key) != IDX_NIL)
1634                         continue;
1635
1636                 n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1637                 n->key = e->key;
1638                 if (h->type != HASHMAP_TYPE_SET)
1639                         ((struct plain_hashmap_entry*) n)->value =
1640                                 ((struct plain_hashmap_entry*) e)->value;
1641                 assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1);
1642
1643                 remove_entry(other, idx);
1644         }
1645
1646         return 0;
1647 }
1648
1649 int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) {
1650         struct swap_entries swap;
1651         unsigned h_hash, other_hash, idx;
1652         struct hashmap_base_entry *e, *n;
1653         int r;
1654
1655         assert(h);
1656
1657         h_hash = bucket_hash(h, key);
1658         if (bucket_scan(h, h_hash, key) != IDX_NIL)
1659                 return -EEXIST;
1660
1661         if (!other)
1662                 return -ENOENT;
1663
1664         assert(other->type == h->type);
1665
1666         other_hash = bucket_hash(other, key);
1667         idx = bucket_scan(other, other_hash, key);
1668         if (idx == IDX_NIL)
1669                 return -ENOENT;
1670
1671         e = bucket_at(other, idx);
1672
1673         n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1674         n->key = e->key;
1675         if (h->type != HASHMAP_TYPE_SET)
1676                 ((struct plain_hashmap_entry*) n)->value =
1677                         ((struct plain_hashmap_entry*) e)->value;
1678         r = hashmap_put_boldly(h, h_hash, &swap, true);
1679         if (r < 0)
1680                 return r;
1681
1682         remove_entry(other, idx);
1683         return 0;
1684 }
1685
1686 HashmapBase *internal_hashmap_copy(HashmapBase *h) {
1687         HashmapBase *copy;
1688         int r;
1689
1690         assert(h);
1691
1692         copy = hashmap_base_new(h->hash_ops, h->type  HASHMAP_DEBUG_SRC_ARGS);
1693         if (!copy)
1694                 return NULL;
1695
1696         switch (h->type) {
1697         case HASHMAP_TYPE_PLAIN:
1698         case HASHMAP_TYPE_ORDERED:
1699                 r = hashmap_merge((Hashmap*)copy, (Hashmap*)h);
1700                 break;
1701         case HASHMAP_TYPE_SET:
1702                 r = set_merge((Set*)copy, (Set*)h);
1703                 break;
1704         default:
1705                 assert_not_reached("Unknown hashmap type");
1706         }
1707
1708         if (r < 0) {
1709                 internal_hashmap_free(copy, false, false);
1710                 return NULL;
1711         }
1712
1713         return copy;
1714 }
1715
1716 char **internal_hashmap_get_strv(HashmapBase *h) {
1717         char **sv;
1718         Iterator i;
1719         unsigned idx, n;
1720
1721         sv = new(char*, n_entries(h)+1);
1722         if (!sv)
1723                 return NULL;
1724
1725         n = 0;
1726         HASHMAP_FOREACH_IDX(idx, h, i)
1727                 sv[n++] = entry_value(h, bucket_at(h, idx));
1728         sv[n] = NULL;
1729
1730         return sv;
1731 }
1732
1733 void *ordered_hashmap_next(OrderedHashmap *h, const void *key) {
1734         struct ordered_hashmap_entry *e;
1735         unsigned hash, idx;
1736
1737         if (!h)
1738                 return NULL;
1739
1740         hash = bucket_hash(h, key);
1741         idx = bucket_scan(h, hash, key);
1742         if (idx == IDX_NIL)
1743                 return NULL;
1744
1745         e = ordered_bucket_at(h, idx);
1746         if (e->iterate_next == IDX_NIL)
1747                 return NULL;
1748         return ordered_bucket_at(h, e->iterate_next)->p.value;
1749 }
1750
1751 int set_consume(Set *s, void *value) {
1752         int r;
1753
1754         assert(s);
1755         assert(value);
1756
1757         r = set_put(s, value);
1758         if (r <= 0)
1759                 free(value);
1760
1761         return r;
1762 }
1763
1764 int set_put_strdup(Set *s, const char *p) {
1765         char *c;
1766
1767         assert(s);
1768         assert(p);
1769
1770         if (set_contains(s, (char*) p))
1771                 return 0;
1772
1773         c = strdup(p);
1774         if (!c)
1775                 return -ENOMEM;
1776
1777         return set_consume(s, c);
1778 }
1779
1780 int set_put_strdupv(Set *s, char **l) {
1781         int n = 0, r;
1782         char **i;
1783
1784         assert(s);
1785
1786         STRV_FOREACH(i, l) {
1787                 r = set_put_strdup(s, *i);
1788                 if (r < 0)
1789                         return r;
1790
1791                 n += r;
1792         }
1793
1794         return n;
1795 }
1796
1797 int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) {
1798         const char *p = v;
1799         int r;
1800
1801         assert(s);
1802         assert(v);
1803
1804         for (;;) {
1805                 char *word;
1806
1807                 r = extract_first_word(&p, &word, separators, flags);
1808                 if (r <= 0)
1809                         return r;
1810
1811                 r = set_consume(s, word);
1812                 if (r < 0)
1813                         return r;
1814         }
1815 }
1816
1817 /* expand the cachemem if needed, return true if newly (re)activated. */
1818 static int cachemem_maintain(CacheMem *mem, unsigned size) {
1819         assert(mem);
1820
1821         if (!GREEDY_REALLOC(mem->ptr, mem->n_allocated, size)) {
1822                 if (size > 0)
1823                         return -ENOMEM;
1824         }
1825
1826         if (!mem->active) {
1827                 mem->active = true;
1828                 return true;
1829         }
1830
1831         return false;
1832 }
1833
1834 int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) {
1835         bool sync_keys = false, sync_values = false;
1836         unsigned size;
1837         int r;
1838
1839         assert(cache);
1840         assert(cache->hashmap);
1841
1842         size = n_entries(cache->hashmap);
1843
1844         if (res_keys) {
1845                 r = cachemem_maintain(&cache->keys, size);
1846                 if (r < 0)
1847                         return r;
1848
1849                 sync_keys = r;
1850         } else
1851                 cache->keys.active = false;
1852
1853         if (res_values) {
1854                 r = cachemem_maintain(&cache->values, size);
1855                 if (r < 0)
1856                         return r;
1857
1858                 sync_values = r;
1859         } else
1860                 cache->values.active = false;
1861
1862         if (cache->hashmap->dirty) {
1863                 if (cache->keys.active)
1864                         sync_keys = true;
1865                 if (cache->values.active)
1866                         sync_values = true;
1867
1868                 cache->hashmap->dirty = false;
1869         }
1870
1871         if (sync_keys || sync_values) {
1872                 unsigned i, idx;
1873                 Iterator iter;
1874
1875                 i = 0;
1876                 HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) {
1877                         struct hashmap_base_entry *e;
1878
1879                         e = bucket_at(cache->hashmap, idx);
1880
1881                         if (sync_keys)
1882                                 cache->keys.ptr[i] = e->key;
1883                         if (sync_values)
1884                                 cache->values.ptr[i] = entry_value(cache->hashmap, e);
1885                         i++;
1886                 }
1887         }
1888
1889         if (res_keys)
1890                 *res_keys = cache->keys.ptr;
1891         if (res_values)
1892                 *res_values = cache->values.ptr;
1893         if (res_n_entries)
1894                 *res_n_entries = size;
1895
1896         return 0;
1897 }
1898
1899 IteratedCache *iterated_cache_free(IteratedCache *cache) {
1900         if (cache) {
1901                 free(cache->keys.ptr);
1902                 free(cache->values.ptr);
1903                 free(cache);
1904         }
1905
1906         return NULL;
1907 }