src/basic/hashmap.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <errno.h>
   4 #include <stdint.h>
   5 #include <stdlib.h>
   6 #include <string.h>
   7
   8 #include "alloc-util.h"
   9 #include "fileio.h"
  10 #include "hashmap.h"
  11 #include "macro.h"
  12 #include "mempool.h"
  13 #include "process-util.h"
  14 #include "random-util.h"
  15 #include "set.h"
  16 #include "siphash24.h"
  17 #include "string-util.h"
  18 #include "strv.h"
  19 #include "util.h"
  20
  21 #if ENABLE_DEBUG_HASHMAP
  22 #include <pthread.h>
  23 #include "list.h"
  24 #endif
  25
  26 /*
  27  * Implementation of hashmaps.
  28  * Addressing: open
  29  *   - uses less RAM compared to closed addressing (chaining), because
  30  *     our entries are small (especially in Sets, which tend to contain
  31  *     the majority of entries in systemd).
  32  * Collision resolution: Robin Hood
  33  *   - tends to equalize displacement of entries from their optimal buckets.
  34  * Probe sequence: linear
  35  *   - though theoretically worse than random probing/uniform hashing/double
  36  *     hashing, it is good for cache locality.
  37  *
  38  * References:
  39  * Celis, P. 1986. Robin Hood Hashing.
  40  * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
  41  * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
  42  * - The results are derived for random probing. Suggests deletion with
  43  *   tombstones and two mean-centered search methods. None of that works
  44  *   well for linear probing.
  45  *
  46  * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
  47  * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
  48  * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
  49  * http://www.math.uu.se/~svante/papers/sj157.pdf
  50  * - Applies to Robin Hood with linear probing. Contains remarks on
  51  *   the unsuitability of mean-centered search with linear probing.
  52  *
  53  * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
  54  * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
  55  * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
  56  * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
  57  *   in a successful search), and Janson writes about displacement. C = d + 1.
  58  *
  59  * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
  60  * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
  61  * - Explanation of backward shift deletion with pictures.
  62  *
  63  * Khuong, P. 2013. The Other Robin Hood Hashing.
  64  * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
  65  * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
  66  */
  67
  68 /*
  69  * XXX Ideas for improvement:
  70  * For unordered hashmaps, randomize iteration order, similarly to Perl:
  71  * http://blog.booking.com/hardening-perls-hash-function.html
  72  */
  73
  74 /* INV_KEEP_FREE = 1 / (1 - max_load_factor)
  75  * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
  76 #define INV_KEEP_FREE            5U
  77
  78 /* Fields common to entries of all hashmap/set types */
  79 struct hashmap_base_entry {
  80         const void *key;
  81 };
  82
  83 /* Entry types for specific hashmap/set types
  84  * hashmap_base_entry must be at the beginning of each entry struct. */
  85
  86 struct plain_hashmap_entry {
  87         struct hashmap_base_entry b;
  88         void *value;
  89 };
  90
  91 struct ordered_hashmap_entry {
  92         struct plain_hashmap_entry p;
  93         unsigned iterate_next, iterate_previous;
  94 };
  95
  96 struct set_entry {
  97         struct hashmap_base_entry b;
  98 };
  99
 100 /* In several functions it is advantageous to have the hash table extended
 101  * virtually by a couple of additional buckets. We reserve special index values
 102  * for these "swap" buckets. */
 103 #define _IDX_SWAP_BEGIN     (UINT_MAX - 3)
 104 #define IDX_PUT             (_IDX_SWAP_BEGIN + 0)
 105 #define IDX_TMP             (_IDX_SWAP_BEGIN + 1)
 106 #define _IDX_SWAP_END       (_IDX_SWAP_BEGIN + 2)
 107
 108 #define IDX_FIRST           (UINT_MAX - 1) /* special index for freshly initialized iterators */
 109 #define IDX_NIL             UINT_MAX       /* special index value meaning "none" or "end" */
 110
 111 assert_cc(IDX_FIRST == _IDX_SWAP_END);
 112 assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST);
 113
 114 /* Storage space for the "swap" buckets.
 115  * All entry types can fit into a ordered_hashmap_entry. */
 116 struct swap_entries {
 117         struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN];
 118 };
 119
 120 /* Distance from Initial Bucket */
 121 typedef uint8_t dib_raw_t;
 122 #define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU)   /* indicates DIB value is greater than representable */
 123 #define DIB_RAW_REHASH   ((dib_raw_t)0xfeU)   /* entry yet to be rehashed during in-place resize */
 124 #define DIB_RAW_FREE     ((dib_raw_t)0xffU)   /* a free bucket */
 125 #define DIB_RAW_INIT     ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
 126
 127 #define DIB_FREE UINT_MAX
 128
 129 #if ENABLE_DEBUG_HASHMAP
 130 struct hashmap_debug_info {
 131         LIST_FIELDS(struct hashmap_debug_info, debug_list);
 132         unsigned max_entries;  /* high watermark of n_entries */
 133
 134         /* who allocated this hashmap */
 135         int line;
 136         const char *file;
 137         const char *func;
 138
 139         /* fields to detect modification while iterating */
 140         unsigned put_count;    /* counts puts into the hashmap */
 141         unsigned rem_count;    /* counts removals from hashmap */
 142         unsigned last_rem_idx; /* remembers last removal index */
 143 };
 144
 145 /* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
 146 static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list);
 147 static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER;
 148
 149 #define HASHMAP_DEBUG_FIELDS struct hashmap_debug_info debug;
 150
 151 #else /* !ENABLE_DEBUG_HASHMAP */
 152 #define HASHMAP_DEBUG_FIELDS
 153 #endif /* ENABLE_DEBUG_HASHMAP */
 154
 155 enum HashmapType {
 156         HASHMAP_TYPE_PLAIN,
 157         HASHMAP_TYPE_ORDERED,
 158         HASHMAP_TYPE_SET,
 159         _HASHMAP_TYPE_MAX
 160 };
 161
 162 struct _packed_ indirect_storage {
 163         void *storage;                     /* where buckets and DIBs are stored */
 164         uint8_t  hash_key[HASH_KEY_SIZE];  /* hash key; changes during resize */
 165
 166         unsigned n_entries;                /* number of stored entries */
 167         unsigned n_buckets;                /* number of buckets */
 168
 169         unsigned idx_lowest_entry;         /* Index below which all buckets are free.
 170                                               Makes "while(hashmap_steal_first())" loops
 171                                               O(n) instead of O(n^2) for unordered hashmaps. */
 172         uint8_t  _pad[3];                  /* padding for the whole HashmapBase */
 173         /* The bitfields in HashmapBase complete the alignment of the whole thing. */
 174 };
 175
 176 struct direct_storage {
 177         /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit.
 178          * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit,
 179          *              or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */
 180         uint8_t storage[sizeof(struct indirect_storage)];
 181 };
 182
 183 #define DIRECT_BUCKETS(entry_t) \
 184         (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
 185
 186 /* We should be able to store at least one entry directly. */
 187 assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1);
 188
 189 /* We have 3 bits for n_direct_entries. */
 190 assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3));
 191
 192 /* Hashmaps with directly stored entries all use this shared hash key.
 193  * It's no big deal if the key is guessed, because there can be only
 194  * a handful of directly stored entries in a hashmap. When a hashmap
 195  * outgrows direct storage, it gets its own key for indirect storage. */
 196 static uint8_t shared_hash_key[HASH_KEY_SIZE];
 197 static bool shared_hash_key_initialized;
 198
 199 /* Fields that all hashmap/set types must have */
 200 struct HashmapBase {
 201         const struct hash_ops *hash_ops;  /* hash and compare ops to use */
 202
 203         union _packed_ {
 204                 struct indirect_storage indirect; /* if  has_indirect */
 205                 struct direct_storage direct;     /* if !has_indirect */
 206         };
 207
 208         enum HashmapType type:2;     /* HASHMAP_TYPE_* */
 209         bool has_indirect:1;         /* whether indirect storage is used */
 210         unsigned n_direct_entries:3; /* Number of entries in direct storage.
 211                                       * Only valid if !has_indirect. */
 212         bool from_pool:1;            /* whether was allocated from mempool */
 213         bool dirty:1;                /* whether dirtied since last iterated_cache_get() */
 214         bool cached:1;               /* whether this hashmap is being cached */
 215         HASHMAP_DEBUG_FIELDS         /* optional hashmap_debug_info */
 216 };
 217
 218 /* Specific hash types
 219  * HashmapBase must be at the beginning of each hashmap struct. */
 220
 221 struct Hashmap {
 222         struct HashmapBase b;
 223 };
 224
 225 struct OrderedHashmap {
 226         struct HashmapBase b;
 227         unsigned iterate_list_head, iterate_list_tail;
 228 };
 229
 230 struct Set {
 231         struct HashmapBase b;
 232 };
 233
 234 typedef struct CacheMem {
 235         const void **ptr;
 236         size_t n_populated, n_allocated;
 237         bool active:1;
 238 } CacheMem;
 239
 240 struct IteratedCache {
 241         HashmapBase *hashmap;
 242         CacheMem keys, values;
 243 };
 244
 245 DEFINE_MEMPOOL(hashmap_pool,         Hashmap,        8);
 246 DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8);
 247 /* No need for a separate Set pool */
 248 assert_cc(sizeof(Hashmap) == sizeof(Set));
 249
 250 struct hashmap_type_info {
 251         size_t head_size;
 252         size_t entry_size;
 253         struct mempool *mempool;
 254         unsigned n_direct_buckets;
 255 };
 256
 257 static const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = {
 258         [HASHMAP_TYPE_PLAIN] = {
 259                 .head_size        = sizeof(Hashmap),
 260                 .entry_size       = sizeof(struct plain_hashmap_entry),
 261                 .mempool          = &hashmap_pool,
 262                 .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry),
 263         },
 264         [HASHMAP_TYPE_ORDERED] = {
 265                 .head_size        = sizeof(OrderedHashmap),
 266                 .entry_size       = sizeof(struct ordered_hashmap_entry),
 267                 .mempool          = &ordered_hashmap_pool,
 268                 .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry),
 269         },
 270         [HASHMAP_TYPE_SET] = {
 271                 .head_size        = sizeof(Set),
 272                 .entry_size       = sizeof(struct set_entry),
 273                 .mempool          = &hashmap_pool,
 274                 .n_direct_buckets = DIRECT_BUCKETS(struct set_entry),
 275         },
 276 };
 277
 278 #if VALGRIND
 279 _destructor_ static void cleanup_pools(void) {
 280         _cleanup_free_ char *t = NULL;
 281         int r;
 282
 283         /* Be nice to valgrind */
 284
 285         /* The pool is only allocated by the main thread, but the memory can
 286          * be passed to other threads. Let's clean up if we are the main thread
 287          * and no other threads are live. */
 288         if (!is_main_thread())
 289                 return;
 290
 291         r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t);
 292         if (r < 0 || !streq(t, "1"))
 293                 return;
 294
 295         mempool_drop(&hashmap_pool);
 296         mempool_drop(&ordered_hashmap_pool);
 297 }
 298 #endif
 299
 300 static unsigned n_buckets(HashmapBase *h) {
 301         return h->has_indirect ? h->indirect.n_buckets
 302                                : hashmap_type_info[h->type].n_direct_buckets;
 303 }
 304
 305 static unsigned n_entries(HashmapBase *h) {
 306         return h->has_indirect ? h->indirect.n_entries
 307                                : h->n_direct_entries;
 308 }
 309
 310 static void n_entries_inc(HashmapBase *h) {
 311         if (h->has_indirect)
 312                 h->indirect.n_entries++;
 313         else
 314                 h->n_direct_entries++;
 315 }
 316
 317 static void n_entries_dec(HashmapBase *h) {
 318         if (h->has_indirect)
 319                 h->indirect.n_entries--;
 320         else
 321                 h->n_direct_entries--;
 322 }
 323
 324 static void *storage_ptr(HashmapBase *h) {
 325         return h->has_indirect ? h->indirect.storage
 326                                : h->direct.storage;
 327 }
 328
 329 static uint8_t *hash_key(HashmapBase *h) {
 330         return h->has_indirect ? h->indirect.hash_key
 331                                : shared_hash_key;
 332 }
 333
 334 static unsigned base_bucket_hash(HashmapBase *h, const void *p) {
 335         struct siphash state;
 336         uint64_t hash;
 337
 338         siphash24_init(&state, hash_key(h));
 339
 340         h->hash_ops->hash(p, &state);
 341
 342         hash = siphash24_finalize(&state);
 343
 344         return (unsigned) (hash % n_buckets(h));
 345 }
 346 #define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
 347
 348 static inline void base_set_dirty(HashmapBase *h) {
 349         h->dirty = true;
 350 }
 351 #define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h))
 352
 353 static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
 354         static uint8_t current[HASH_KEY_SIZE];
 355         static bool current_initialized = false;
 356
 357         /* Returns a hash function key to use. In order to keep things
 358          * fast we will not generate a new key each time we allocate a
 359          * new hash table. Instead, we'll just reuse the most recently
 360          * generated one, except if we never generated one or when we
 361          * are rehashing an entire hash table because we reached a
 362          * fill level */
 363
 364         if (!current_initialized || !reuse_is_ok) {
 365                 random_bytes(current, sizeof(current));
 366                 current_initialized = true;
 367         }
 368
 369         memcpy(hash_key, current, sizeof(current));
 370 }
 371
 372 static struct hashmap_base_entry *bucket_at(HashmapBase *h, unsigned idx) {
 373         return (struct hashmap_base_entry*)
 374                 ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size);
 375 }
 376
 377 static struct plain_hashmap_entry *plain_bucket_at(Hashmap *h, unsigned idx) {
 378         return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
 379 }
 380
 381 static struct ordered_hashmap_entry *ordered_bucket_at(OrderedHashmap *h, unsigned idx) {
 382         return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
 383 }
 384
 385 static struct set_entry *set_bucket_at(Set *h, unsigned idx) {
 386         return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx);
 387 }
 388
 389 static struct ordered_hashmap_entry *bucket_at_swap(struct swap_entries *swap, unsigned idx) {
 390         return &swap->e[idx - _IDX_SWAP_BEGIN];
 391 }
 392
 393 /* Returns a pointer to the bucket at index idx.
 394  * Understands real indexes and swap indexes, hence "_virtual". */
 395 static struct hashmap_base_entry *bucket_at_virtual(HashmapBase *h, struct swap_entries *swap,
 396                                                     unsigned idx) {
 397         if (idx < _IDX_SWAP_BEGIN)
 398                 return bucket_at(h, idx);
 399
 400         if (idx < _IDX_SWAP_END)
 401                 return &bucket_at_swap(swap, idx)->p.b;
 402
 403         assert_not_reached("Invalid index");
 404 }
 405
 406 static dib_raw_t *dib_raw_ptr(HashmapBase *h) {
 407         return (dib_raw_t*)
 408                 ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h));
 409 }
 410
 411 static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) {
 412         return idx >= from ? idx - from
 413                            : n_buckets(h) + idx - from;
 414 }
 415
 416 static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) {
 417         unsigned initial_bucket;
 418
 419         if (raw_dib == DIB_RAW_FREE)
 420                 return DIB_FREE;
 421
 422         if (_likely_(raw_dib < DIB_RAW_OVERFLOW))
 423                 return raw_dib;
 424
 425         /*
 426          * Having an overflow DIB value is very unlikely. The hash function
 427          * would have to be bad. For example, in a table of size 2^24 filled
 428          * to load factor 0.9 the maximum observed DIB is only about 60.
 429          * In theory (assuming I used Maxima correctly), for an infinite size
 430          * hash table with load factor 0.8 the probability of a given entry
 431          * having DIB > 40 is 1.9e-8.
 432          * This returns the correct DIB value by recomputing the hash value in
 433          * the unlikely case. XXX Hitting this case could be a hint to rehash.
 434          */
 435         initial_bucket = bucket_hash(h, bucket_at(h, idx)->key);
 436         return bucket_distance(h, idx, initial_bucket);
 437 }
 438
 439 static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) {
 440         dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE;
 441 }
 442
 443 static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) {
 444         dib_raw_t *dibs;
 445
 446         dibs = dib_raw_ptr(h);
 447
 448         for ( ; idx < n_buckets(h); idx++)
 449                 if (dibs[idx] != DIB_RAW_FREE)
 450                         return idx;
 451
 452         return IDX_NIL;
 453 }
 454
 455 static void bucket_mark_free(HashmapBase *h, unsigned idx) {
 456         memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size);
 457         bucket_set_dib(h, idx, DIB_FREE);
 458 }
 459
 460 static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap,
 461                               unsigned from, unsigned to) {
 462         struct hashmap_base_entry *e_from, *e_to;
 463
 464         assert(from != to);
 465
 466         e_from = bucket_at_virtual(h, swap, from);
 467         e_to   = bucket_at_virtual(h, swap, to);
 468
 469         memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size);
 470
 471         if (h->type == HASHMAP_TYPE_ORDERED) {
 472                 OrderedHashmap *lh = (OrderedHashmap*) h;
 473                 struct ordered_hashmap_entry *le, *le_to;
 474
 475                 le_to = (struct ordered_hashmap_entry*) e_to;
 476
 477                 if (le_to->iterate_next != IDX_NIL) {
 478                         le = (struct ordered_hashmap_entry*)
 479                              bucket_at_virtual(h, swap, le_to->iterate_next);
 480                         le->iterate_previous = to;
 481                 }
 482
 483                 if (le_to->iterate_previous != IDX_NIL) {
 484                         le = (struct ordered_hashmap_entry*)
 485                              bucket_at_virtual(h, swap, le_to->iterate_previous);
 486                         le->iterate_next = to;
 487                 }
 488
 489                 if (lh->iterate_list_head == from)
 490                         lh->iterate_list_head = to;
 491                 if (lh->iterate_list_tail == from)
 492                         lh->iterate_list_tail = to;
 493         }
 494 }
 495
 496 static unsigned next_idx(HashmapBase *h, unsigned idx) {
 497         return (idx + 1U) % n_buckets(h);
 498 }
 499
 500 static unsigned prev_idx(HashmapBase *h, unsigned idx) {
 501         return (n_buckets(h) + idx - 1U) % n_buckets(h);
 502 }
 503
 504 static void *entry_value(HashmapBase *h, struct hashmap_base_entry *e) {
 505         switch (h->type) {
 506
 507         case HASHMAP_TYPE_PLAIN:
 508         case HASHMAP_TYPE_ORDERED:
 509                 return ((struct plain_hashmap_entry*)e)->value;
 510
 511         case HASHMAP_TYPE_SET:
 512                 return (void*) e->key;
 513
 514         default:
 515                 assert_not_reached("Unknown hashmap type");
 516         }
 517 }
 518
 519 static void base_remove_entry(HashmapBase *h, unsigned idx) {
 520         unsigned left, right, prev, dib;
 521         dib_raw_t raw_dib, *dibs;
 522
 523         dibs = dib_raw_ptr(h);
 524         assert(dibs[idx] != DIB_RAW_FREE);
 525
 526 #if ENABLE_DEBUG_HASHMAP
 527         h->debug.rem_count++;
 528         h->debug.last_rem_idx = idx;
 529 #endif
 530
 531         left = idx;
 532         /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
 533         for (right = next_idx(h, left); ; right = next_idx(h, right)) {
 534                 raw_dib = dibs[right];
 535                 if (IN_SET(raw_dib, 0, DIB_RAW_FREE))
 536                         break;
 537
 538                 /* The buckets are not supposed to be all occupied and with DIB > 0.
 539                  * That would mean we could make everyone better off by shifting them
 540                  * backward. This scenario is impossible. */
 541                 assert(left != right);
 542         }
 543
 544         if (h->type == HASHMAP_TYPE_ORDERED) {
 545                 OrderedHashmap *lh = (OrderedHashmap*) h;
 546                 struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx);
 547
 548                 if (le->iterate_next != IDX_NIL)
 549                         ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous;
 550                 else
 551                         lh->iterate_list_tail = le->iterate_previous;
 552
 553                 if (le->iterate_previous != IDX_NIL)
 554                         ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next;
 555                 else
 556                         lh->iterate_list_head = le->iterate_next;
 557         }
 558
 559         /* Now shift all buckets in the interval (left, right) one step backwards */
 560         for (prev = left, left = next_idx(h, left); left != right;
 561              prev = left, left = next_idx(h, left)) {
 562                 dib = bucket_calculate_dib(h, left, dibs[left]);
 563                 assert(dib != 0);
 564                 bucket_move_entry(h, NULL, left, prev);
 565                 bucket_set_dib(h, prev, dib - 1);
 566         }
 567
 568         bucket_mark_free(h, prev);
 569         n_entries_dec(h);
 570         base_set_dirty(h);
 571 }
 572 #define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
 573
 574 static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) {
 575         struct ordered_hashmap_entry *e;
 576         unsigned idx;
 577
 578         assert(h);
 579         assert(i);
 580
 581         if (i->idx == IDX_NIL)
 582                 goto at_end;
 583
 584         if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL)
 585                 goto at_end;
 586
 587         if (i->idx == IDX_FIRST) {
 588                 idx = h->iterate_list_head;
 589                 e = ordered_bucket_at(h, idx);
 590         } else {
 591                 idx = i->idx;
 592                 e = ordered_bucket_at(h, idx);
 593                 /*
 594                  * We allow removing the current entry while iterating, but removal may cause
 595                  * a backward shift. The next entry may thus move one bucket to the left.
 596                  * To detect when it happens, we remember the key pointer of the entry we were
 597                  * going to iterate next. If it does not match, there was a backward shift.
 598                  */
 599                 if (e->p.b.key != i->next_key) {
 600                         idx = prev_idx(HASHMAP_BASE(h), idx);
 601                         e = ordered_bucket_at(h, idx);
 602                 }
 603                 assert(e->p.b.key == i->next_key);
 604         }
 605
 606 #if ENABLE_DEBUG_HASHMAP
 607         i->prev_idx = idx;
 608 #endif
 609
 610         if (e->iterate_next != IDX_NIL) {
 611                 struct ordered_hashmap_entry *n;
 612                 i->idx = e->iterate_next;
 613                 n = ordered_bucket_at(h, i->idx);
 614                 i->next_key = n->p.b.key;
 615         } else
 616                 i->idx = IDX_NIL;
 617
 618         return idx;
 619
 620 at_end:
 621         i->idx = IDX_NIL;
 622         return IDX_NIL;
 623 }
 624
 625 static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) {
 626         unsigned idx;
 627
 628         assert(h);
 629         assert(i);
 630
 631         if (i->idx == IDX_NIL)
 632                 goto at_end;
 633
 634         if (i->idx == IDX_FIRST) {
 635                 /* fast forward to the first occupied bucket */
 636                 if (h->has_indirect) {
 637                         i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry);
 638                         h->indirect.idx_lowest_entry = i->idx;
 639                 } else
 640                         i->idx = skip_free_buckets(h, 0);
 641
 642                 if (i->idx == IDX_NIL)
 643                         goto at_end;
 644         } else {
 645                 struct hashmap_base_entry *e;
 646
 647                 assert(i->idx > 0);
 648
 649                 e = bucket_at(h, i->idx);
 650                 /*
 651                  * We allow removing the current entry while iterating, but removal may cause
 652                  * a backward shift. The next entry may thus move one bucket to the left.
 653                  * To detect when it happens, we remember the key pointer of the entry we were
 654                  * going to iterate next. If it does not match, there was a backward shift.
 655                  */
 656                 if (e->key != i->next_key)
 657                         e = bucket_at(h, --i->idx);
 658
 659                 assert(e->key == i->next_key);
 660         }
 661
 662         idx = i->idx;
 663 #if ENABLE_DEBUG_HASHMAP
 664         i->prev_idx = idx;
 665 #endif
 666
 667         i->idx = skip_free_buckets(h, i->idx + 1);
 668         if (i->idx != IDX_NIL)
 669                 i->next_key = bucket_at(h, i->idx)->key;
 670         else
 671                 i->idx = IDX_NIL;
 672
 673         return idx;
 674
 675 at_end:
 676         i->idx = IDX_NIL;
 677         return IDX_NIL;
 678 }
 679
 680 static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) {
 681         if (!h) {
 682                 i->idx = IDX_NIL;
 683                 return IDX_NIL;
 684         }
 685
 686 #if ENABLE_DEBUG_HASHMAP
 687         if (i->idx == IDX_FIRST) {
 688                 i->put_count = h->debug.put_count;
 689                 i->rem_count = h->debug.rem_count;
 690         } else {
 691                 /* While iterating, must not add any new entries */
 692                 assert(i->put_count == h->debug.put_count);
 693                 /* ... or remove entries other than the current one */
 694                 assert(i->rem_count == h->debug.rem_count ||
 695                        (i->rem_count == h->debug.rem_count - 1 &&
 696                         i->prev_idx == h->debug.last_rem_idx));
 697                 /* Reset our removals counter */
 698                 i->rem_count = h->debug.rem_count;
 699         }
 700 #endif
 701
 702         return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i)
 703                                                : hashmap_iterate_in_internal_order(h, i);
 704 }
 705
 706 bool internal_hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) {
 707         struct hashmap_base_entry *e;
 708         void *data;
 709         unsigned idx;
 710
 711         idx = hashmap_iterate_entry(h, i);
 712         if (idx == IDX_NIL) {
 713                 if (value)
 714                         *value = NULL;
 715                 if (key)
 716                         *key = NULL;
 717
 718                 return false;
 719         }
 720
 721         e = bucket_at(h, idx);
 722         data = entry_value(h, e);
 723         if (value)
 724                 *value = data;
 725         if (key)
 726                 *key = e->key;
 727
 728         return true;
 729 }
 730
 731 bool set_iterate(Set *s, Iterator *i, void **value) {
 732         return internal_hashmap_iterate(HASHMAP_BASE(s), i, value, NULL);
 733 }
 734
 735 #define HASHMAP_FOREACH_IDX(idx, h, i) \
 736         for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
 737              (idx != IDX_NIL); \
 738              (idx) = hashmap_iterate_entry((h), &(i)))
 739
 740 IteratedCache *internal_hashmap_iterated_cache_new(HashmapBase *h) {
 741         IteratedCache *cache;
 742
 743         assert(h);
 744         assert(!h->cached);
 745
 746         if (h->cached)
 747                 return NULL;
 748
 749         cache = new0(IteratedCache, 1);
 750         if (!cache)
 751                 return NULL;
 752
 753         cache->hashmap = h;
 754         h->cached = true;
 755
 756         return cache;
 757 }
 758
 759 static void reset_direct_storage(HashmapBase *h) {
 760         const struct hashmap_type_info *hi = &hashmap_type_info[h->type];
 761         void *p;
 762
 763         assert(!h->has_indirect);
 764
 765         p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets);
 766         memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets);
 767 }
 768
 769 static struct HashmapBase *hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) {
 770         HashmapBase *h;
 771         const struct hashmap_type_info *hi = &hashmap_type_info[type];
 772         bool up;
 773
 774         up = mempool_enabled();
 775
 776         h = up ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size);
 777         if (!h)
 778                 return NULL;
 779
 780         h->type = type;
 781         h->from_pool = up;
 782         h->hash_ops = hash_ops ? hash_ops : &trivial_hash_ops;
 783
 784         if (type == HASHMAP_TYPE_ORDERED) {
 785                 OrderedHashmap *lh = (OrderedHashmap*)h;
 786                 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
 787         }
 788
 789         reset_direct_storage(h);
 790
 791         if (!shared_hash_key_initialized) {
 792                 random_bytes(shared_hash_key, sizeof(shared_hash_key));
 793                 shared_hash_key_initialized= true;
 794         }
 795
 796 #if ENABLE_DEBUG_HASHMAP
 797         h->debug.func = func;
 798         h->debug.file = file;
 799         h->debug.line = line;
 800         assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
 801         LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug);
 802         assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
 803 #endif
 804
 805         return h;
 806 }
 807
 808 Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 809         return (Hashmap*)        hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
 810 }
 811
 812 OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 813         return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
 814 }
 815
 816 Set *internal_set_new(const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 817         return (Set*)            hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
 818 }
 819
 820 static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops,
 821                                          enum HashmapType type HASHMAP_DEBUG_PARAMS) {
 822         HashmapBase *q;
 823
 824         assert(h);
 825
 826         if (*h)
 827                 return 0;
 828
 829         q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS);
 830         if (!q)
 831                 return -ENOMEM;
 832
 833         *h = q;
 834         return 0;
 835 }
 836
 837 int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 838         return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
 839 }
 840
 841 int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 842         return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
 843 }
 844
 845 int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops  HASHMAP_DEBUG_PARAMS) {
 846         return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
 847 }
 848
 849 static void hashmap_free_no_clear(HashmapBase *h) {
 850         assert(!h->has_indirect);
 851         assert(h->n_direct_entries == 0);
 852
 853 #if ENABLE_DEBUG_HASHMAP
 854         assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
 855         LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug);
 856         assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
 857 #endif
 858
 859         if (h->from_pool) {
 860                 /* Ensure that the object didn't get migrated between threads. */
 861                 assert_se(is_main_thread());
 862                 mempool_free_tile(hashmap_type_info[h->type].mempool, h);
 863         } else
 864                 free(h);
 865 }
 866
 867 HashmapBase *internal_hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
 868         if (h) {
 869                 internal_hashmap_clear(h, default_free_key, default_free_value);
 870                 hashmap_free_no_clear(h);
 871         }
 872
 873         return NULL;
 874 }
 875
 876 void internal_hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
 877         free_func_t free_key, free_value;
 878         if (!h)
 879                 return;
 880
 881         free_key = h->hash_ops->free_key ?: default_free_key;
 882         free_value = h->hash_ops->free_value ?: default_free_value;
 883
 884         if (free_key || free_value) {
 885                 unsigned idx;
 886
 887                 for (idx = skip_free_buckets(h, 0); idx != IDX_NIL;
 888                      idx = skip_free_buckets(h, idx + 1)) {
 889                         struct hashmap_base_entry *e = bucket_at(h, idx);
 890
 891                         if (free_key)
 892                                 free_key((void *) e->key);
 893
 894                         if (free_value)
 895                                 free_value(entry_value(h, e));
 896                 }
 897         }
 898
 899         if (h->has_indirect) {
 900                 free(h->indirect.storage);
 901                 h->has_indirect = false;
 902         }
 903
 904         h->n_direct_entries = 0;
 905         reset_direct_storage(h);
 906
 907         if (h->type == HASHMAP_TYPE_ORDERED) {
 908                 OrderedHashmap *lh = (OrderedHashmap*) h;
 909                 lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
 910         }
 911
 912         base_set_dirty(h);
 913 }
 914
 915 static int resize_buckets(HashmapBase *h, unsigned entries_add);
 916
 917 /*
 918  * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
 919  * Performs Robin Hood swaps as it goes. The entry to put must be placed
 920  * by the caller into swap slot IDX_PUT.
 921  * If used for in-place resizing, may leave a displaced entry in swap slot
 922  * IDX_PUT. Caller must rehash it next.
 923  * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
 924  *          false otherwise.
 925  */
 926 static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx,
 927                                    struct swap_entries *swap) {
 928         dib_raw_t raw_dib, *dibs;
 929         unsigned dib, distance;
 930
 931 #if ENABLE_DEBUG_HASHMAP
 932         h->debug.put_count++;
 933 #endif
 934
 935         dibs = dib_raw_ptr(h);
 936
 937         for (distance = 0; ; distance++) {
 938                 raw_dib = dibs[idx];
 939                 if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) {
 940                         if (raw_dib == DIB_RAW_REHASH)
 941                                 bucket_move_entry(h, swap, idx, IDX_TMP);
 942
 943                         if (h->has_indirect && h->indirect.idx_lowest_entry > idx)
 944                                 h->indirect.idx_lowest_entry = idx;
 945
 946                         bucket_set_dib(h, idx, distance);
 947                         bucket_move_entry(h, swap, IDX_PUT, idx);
 948                         if (raw_dib == DIB_RAW_REHASH) {
 949                                 bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
 950                                 return true;
 951                         }
 952
 953                         return false;
 954                 }
 955
 956                 dib = bucket_calculate_dib(h, idx, raw_dib);
 957
 958                 if (dib < distance) {
 959                         /* Found a wealthier entry. Go Robin Hood! */
 960                         bucket_set_dib(h, idx, distance);
 961
 962                         /* swap the entries */
 963                         bucket_move_entry(h, swap, idx, IDX_TMP);
 964                         bucket_move_entry(h, swap, IDX_PUT, idx);
 965                         bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
 966
 967                         distance = dib;
 968                 }
 969
 970                 idx = next_idx(h, idx);
 971         }
 972 }
 973
 974 /*
 975  * Puts an entry into a hashmap, boldly - no check whether key already exists.
 976  * The caller must place the entry (only its key and value, not link indexes)
 977  * in swap slot IDX_PUT.
 978  * Caller must ensure: the key does not exist yet in the hashmap.
 979  *                     that resize is not needed if !may_resize.
 980  * Returns: 1 if entry was put successfully.
 981  *          -ENOMEM if may_resize==true and resize failed with -ENOMEM.
 982  *          Cannot return -ENOMEM if !may_resize.
 983  */
 984 static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx,
 985                                    struct swap_entries *swap, bool may_resize) {
 986         struct ordered_hashmap_entry *new_entry;
 987         int r;
 988
 989         assert(idx < n_buckets(h));
 990
 991         new_entry = bucket_at_swap(swap, IDX_PUT);
 992
 993         if (may_resize) {
 994                 r = resize_buckets(h, 1);
 995                 if (r < 0)
 996                         return r;
 997                 if (r > 0)
 998                         idx = bucket_hash(h, new_entry->p.b.key);
 999         }
1000         assert(n_entries(h) < n_buckets(h));
1001
1002         if (h->type == HASHMAP_TYPE_ORDERED) {
1003                 OrderedHashmap *lh = (OrderedHashmap*) h;
1004
1005                 new_entry->iterate_next = IDX_NIL;
1006                 new_entry->iterate_previous = lh->iterate_list_tail;
1007
1008                 if (lh->iterate_list_tail != IDX_NIL) {
1009                         struct ordered_hashmap_entry *old_tail;
1010
1011                         old_tail = ordered_bucket_at(lh, lh->iterate_list_tail);
1012                         assert(old_tail->iterate_next == IDX_NIL);
1013                         old_tail->iterate_next = IDX_PUT;
1014                 }
1015
1016                 lh->iterate_list_tail = IDX_PUT;
1017                 if (lh->iterate_list_head == IDX_NIL)
1018                         lh->iterate_list_head = IDX_PUT;
1019         }
1020
1021         assert_se(hashmap_put_robin_hood(h, idx, swap) == false);
1022
1023         n_entries_inc(h);
1024 #if ENABLE_DEBUG_HASHMAP
1025         h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h));
1026 #endif
1027
1028         base_set_dirty(h);
1029
1030         return 1;
1031 }
1032 #define hashmap_put_boldly(h, idx, swap, may_resize) \
1033         hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
1034
1035 /*
1036  * Returns 0 if resize is not needed.
1037  *         1 if successfully resized.
1038  *         -ENOMEM on allocation failure.
1039  */
1040 static int resize_buckets(HashmapBase *h, unsigned entries_add) {
1041         struct swap_entries swap;
1042         void *new_storage;
1043         dib_raw_t *old_dibs, *new_dibs;
1044         const struct hashmap_type_info *hi;
1045         unsigned idx, optimal_idx;
1046         unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries;
1047         uint8_t new_shift;
1048         bool rehash_next;
1049
1050         assert(h);
1051
1052         hi = &hashmap_type_info[h->type];
1053         new_n_entries = n_entries(h) + entries_add;
1054
1055         /* overflow? */
1056         if (_unlikely_(new_n_entries < entries_add))
1057                 return -ENOMEM;
1058
1059         /* For direct storage we allow 100% load, because it's tiny. */
1060         if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets)
1061                 return 0;
1062
1063         /*
1064          * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
1065          * From it follows: m = n + n/(INV_KEEP_FREE - 1)
1066          */
1067         new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1);
1068         /* overflow? */
1069         if (_unlikely_(new_n_buckets < new_n_entries))
1070                 return -ENOMEM;
1071
1072         if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t))))
1073                 return -ENOMEM;
1074
1075         old_n_buckets = n_buckets(h);
1076
1077         if (_likely_(new_n_buckets <= old_n_buckets))
1078                 return 0;
1079
1080         new_shift = log2u_round_up(MAX(
1081                         new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)),
1082                         2 * sizeof(struct direct_storage)));
1083
1084         /* Realloc storage (buckets and DIB array). */
1085         new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL,
1086                               1U << new_shift);
1087         if (!new_storage)
1088                 return -ENOMEM;
1089
1090         /* Must upgrade direct to indirect storage. */
1091         if (!h->has_indirect) {
1092                 memcpy(new_storage, h->direct.storage,
1093                        old_n_buckets * (hi->entry_size + sizeof(dib_raw_t)));
1094                 h->indirect.n_entries = h->n_direct_entries;
1095                 h->indirect.idx_lowest_entry = 0;
1096                 h->n_direct_entries = 0;
1097         }
1098
1099         /* Get a new hash key. If we've just upgraded to indirect storage,
1100          * allow reusing a previously generated key. It's still a different key
1101          * from the shared one that we used for direct storage. */
1102         get_hash_key(h->indirect.hash_key, !h->has_indirect);
1103
1104         h->has_indirect = true;
1105         h->indirect.storage = new_storage;
1106         h->indirect.n_buckets = (1U << new_shift) /
1107                                 (hi->entry_size + sizeof(dib_raw_t));
1108
1109         old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets);
1110         new_dibs = dib_raw_ptr(h);
1111
1112         /*
1113          * Move the DIB array to the new place, replacing valid DIB values with
1114          * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
1115          * Note: Overlap is not possible, because we have at least doubled the
1116          * number of buckets and dib_raw_t is smaller than any entry type.
1117          */
1118         for (idx = 0; idx < old_n_buckets; idx++) {
1119                 assert(old_dibs[idx] != DIB_RAW_REHASH);
1120                 new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE
1121                                                               : DIB_RAW_REHASH;
1122         }
1123
1124         /* Zero the area of newly added entries (including the old DIB area) */
1125         memzero(bucket_at(h, old_n_buckets),
1126                (n_buckets(h) - old_n_buckets) * hi->entry_size);
1127
1128         /* The upper half of the new DIB array needs initialization */
1129         memset(&new_dibs[old_n_buckets], DIB_RAW_INIT,
1130                (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t));
1131
1132         /* Rehash entries that need it */
1133         n_rehashed = 0;
1134         for (idx = 0; idx < old_n_buckets; idx++) {
1135                 if (new_dibs[idx] != DIB_RAW_REHASH)
1136                         continue;
1137
1138                 optimal_idx = bucket_hash(h, bucket_at(h, idx)->key);
1139
1140                 /*
1141                  * Not much to do if by luck the entry hashes to its current
1142                  * location. Just set its DIB.
1143                  */
1144                 if (optimal_idx == idx) {
1145                         new_dibs[idx] = 0;
1146                         n_rehashed++;
1147                         continue;
1148                 }
1149
1150                 new_dibs[idx] = DIB_RAW_FREE;
1151                 bucket_move_entry(h, &swap, idx, IDX_PUT);
1152                 /* bucket_move_entry does not clear the source */
1153                 memzero(bucket_at(h, idx), hi->entry_size);
1154
1155                 do {
1156                         /*
1157                          * Find the new bucket for the current entry. This may make
1158                          * another entry homeless and load it into IDX_PUT.
1159                          */
1160                         rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap);
1161                         n_rehashed++;
1162
1163                         /* Did the current entry displace another one? */
1164                         if (rehash_next)
1165                                 optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key);
1166                 } while (rehash_next);
1167         }
1168
1169         assert(n_rehashed == n_entries(h));
1170
1171         return 1;
1172 }
1173
1174 /*
1175  * Finds an entry with a matching key
1176  * Returns: index of the found entry, or IDX_NIL if not found.
1177  */
1178 static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) {
1179         struct hashmap_base_entry *e;
1180         unsigned dib, distance;
1181         dib_raw_t *dibs = dib_raw_ptr(h);
1182
1183         assert(idx < n_buckets(h));
1184
1185         for (distance = 0; ; distance++) {
1186                 if (dibs[idx] == DIB_RAW_FREE)
1187                         return IDX_NIL;
1188
1189                 dib = bucket_calculate_dib(h, idx, dibs[idx]);
1190
1191                 if (dib < distance)
1192                         return IDX_NIL;
1193                 if (dib == distance) {
1194                         e = bucket_at(h, idx);
1195                         if (h->hash_ops->compare(e->key, key) == 0)
1196                                 return idx;
1197                 }
1198
1199                 idx = next_idx(h, idx);
1200         }
1201 }
1202 #define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
1203
1204 int hashmap_put(Hashmap *h, const void *key, void *value) {
1205         struct swap_entries swap;
1206         struct plain_hashmap_entry *e;
1207         unsigned hash, idx;
1208
1209         assert(h);
1210
1211         hash = bucket_hash(h, key);
1212         idx = bucket_scan(h, hash, key);
1213         if (idx != IDX_NIL) {
1214                 e = plain_bucket_at(h, idx);
1215                 if (e->value == value)
1216                         return 0;
1217                 return -EEXIST;
1218         }
1219
1220         e = &bucket_at_swap(&swap, IDX_PUT)->p;
1221         e->b.key = key;
1222         e->value = value;
1223         return hashmap_put_boldly(h, hash, &swap, true);
1224 }
1225
1226 int set_put(Set *s, const void *key) {
1227         struct swap_entries swap;
1228         struct hashmap_base_entry *e;
1229         unsigned hash, idx;
1230
1231         assert(s);
1232
1233         hash = bucket_hash(s, key);
1234         idx = bucket_scan(s, hash, key);
1235         if (idx != IDX_NIL)
1236                 return 0;
1237
1238         e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1239         e->key = key;
1240         return hashmap_put_boldly(s, hash, &swap, true);
1241 }
1242
1243 int hashmap_replace(Hashmap *h, const void *key, void *value) {
1244         struct swap_entries swap;
1245         struct plain_hashmap_entry *e;
1246         unsigned hash, idx;
1247
1248         assert(h);
1249
1250         hash = bucket_hash(h, key);
1251         idx = bucket_scan(h, hash, key);
1252         if (idx != IDX_NIL) {
1253                 e = plain_bucket_at(h, idx);
1254 #if ENABLE_DEBUG_HASHMAP
1255                 /* Although the key is equal, the key pointer may have changed,
1256                  * and this would break our assumption for iterating. So count
1257                  * this operation as incompatible with iteration. */
1258                 if (e->b.key != key) {
1259                         h->b.debug.put_count++;
1260                         h->b.debug.rem_count++;
1261                         h->b.debug.last_rem_idx = idx;
1262                 }
1263 #endif
1264                 e->b.key = key;
1265                 e->value = value;
1266                 hashmap_set_dirty(h);
1267
1268                 return 0;
1269         }
1270
1271         e = &bucket_at_swap(&swap, IDX_PUT)->p;
1272         e->b.key = key;
1273         e->value = value;
1274         return hashmap_put_boldly(h, hash, &swap, true);
1275 }
1276
1277 int hashmap_update(Hashmap *h, const void *key, void *value) {
1278         struct plain_hashmap_entry *e;
1279         unsigned hash, idx;
1280
1281         assert(h);
1282
1283         hash = bucket_hash(h, key);
1284         idx = bucket_scan(h, hash, key);
1285         if (idx == IDX_NIL)
1286                 return -ENOENT;
1287
1288         e = plain_bucket_at(h, idx);
1289         e->value = value;
1290         hashmap_set_dirty(h);
1291
1292         return 0;
1293 }
1294
1295 void *internal_hashmap_get(HashmapBase *h, const void *key) {
1296         struct hashmap_base_entry *e;
1297         unsigned hash, idx;
1298
1299         if (!h)
1300                 return NULL;
1301
1302         hash = bucket_hash(h, key);
1303         idx = bucket_scan(h, hash, key);
1304         if (idx == IDX_NIL)
1305                 return NULL;
1306
1307         e = bucket_at(h, idx);
1308         return entry_value(h, e);
1309 }
1310
1311 void *hashmap_get2(Hashmap *h, const void *key, void **key2) {
1312         struct plain_hashmap_entry *e;
1313         unsigned hash, idx;
1314
1315         if (!h)
1316                 return NULL;
1317
1318         hash = bucket_hash(h, key);
1319         idx = bucket_scan(h, hash, key);
1320         if (idx == IDX_NIL)
1321                 return NULL;
1322
1323         e = plain_bucket_at(h, idx);
1324         if (key2)
1325                 *key2 = (void*) e->b.key;
1326
1327         return e->value;
1328 }
1329
1330 bool internal_hashmap_contains(HashmapBase *h, const void *key) {
1331         unsigned hash;
1332
1333         if (!h)
1334                 return false;
1335
1336         hash = bucket_hash(h, key);
1337         return bucket_scan(h, hash, key) != IDX_NIL;
1338 }
1339
1340 void *internal_hashmap_remove(HashmapBase *h, const void *key) {
1341         struct hashmap_base_entry *e;
1342         unsigned hash, idx;
1343         void *data;
1344
1345         if (!h)
1346                 return NULL;
1347
1348         hash = bucket_hash(h, key);
1349         idx = bucket_scan(h, hash, key);
1350         if (idx == IDX_NIL)
1351                 return NULL;
1352
1353         e = bucket_at(h, idx);
1354         data = entry_value(h, e);
1355         remove_entry(h, idx);
1356
1357         return data;
1358 }
1359
1360 void *hashmap_remove2(Hashmap *h, const void *key, void **rkey) {
1361         struct plain_hashmap_entry *e;
1362         unsigned hash, idx;
1363         void *data;
1364
1365         if (!h) {
1366                 if (rkey)
1367                         *rkey = NULL;
1368                 return NULL;
1369         }
1370
1371         hash = bucket_hash(h, key);
1372         idx = bucket_scan(h, hash, key);
1373         if (idx == IDX_NIL) {
1374                 if (rkey)
1375                         *rkey = NULL;
1376                 return NULL;
1377         }
1378
1379         e = plain_bucket_at(h, idx);
1380         data = e->value;
1381         if (rkey)
1382                 *rkey = (void*) e->b.key;
1383
1384         remove_entry(h, idx);
1385
1386         return data;
1387 }
1388
1389 int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1390         struct swap_entries swap;
1391         struct plain_hashmap_entry *e;
1392         unsigned old_hash, new_hash, idx;
1393
1394         if (!h)
1395                 return -ENOENT;
1396
1397         old_hash = bucket_hash(h, old_key);
1398         idx = bucket_scan(h, old_hash, old_key);
1399         if (idx == IDX_NIL)
1400                 return -ENOENT;
1401
1402         new_hash = bucket_hash(h, new_key);
1403         if (bucket_scan(h, new_hash, new_key) != IDX_NIL)
1404                 return -EEXIST;
1405
1406         remove_entry(h, idx);
1407
1408         e = &bucket_at_swap(&swap, IDX_PUT)->p;
1409         e->b.key = new_key;
1410         e->value = value;
1411         assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1412
1413         return 0;
1414 }
1415
1416 int set_remove_and_put(Set *s, const void *old_key, const void *new_key) {
1417         struct swap_entries swap;
1418         struct hashmap_base_entry *e;
1419         unsigned old_hash, new_hash, idx;
1420
1421         if (!s)
1422                 return -ENOENT;
1423
1424         old_hash = bucket_hash(s, old_key);
1425         idx = bucket_scan(s, old_hash, old_key);
1426         if (idx == IDX_NIL)
1427                 return -ENOENT;
1428
1429         new_hash = bucket_hash(s, new_key);
1430         if (bucket_scan(s, new_hash, new_key) != IDX_NIL)
1431                 return -EEXIST;
1432
1433         remove_entry(s, idx);
1434
1435         e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1436         e->key = new_key;
1437         assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1);
1438
1439         return 0;
1440 }
1441
1442 int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) {
1443         struct swap_entries swap;
1444         struct plain_hashmap_entry *e;
1445         unsigned old_hash, new_hash, idx_old, idx_new;
1446
1447         if (!h)
1448                 return -ENOENT;
1449
1450         old_hash = bucket_hash(h, old_key);
1451         idx_old = bucket_scan(h, old_hash, old_key);
1452         if (idx_old == IDX_NIL)
1453                 return -ENOENT;
1454
1455         old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key;
1456
1457         new_hash = bucket_hash(h, new_key);
1458         idx_new = bucket_scan(h, new_hash, new_key);
1459         if (idx_new != IDX_NIL)
1460                 if (idx_old != idx_new) {
1461                         remove_entry(h, idx_new);
1462                         /* Compensate for a possible backward shift. */
1463                         if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key)
1464                                 idx_old = prev_idx(HASHMAP_BASE(h), idx_old);
1465                         assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key);
1466                 }
1467
1468         remove_entry(h, idx_old);
1469
1470         e = &bucket_at_swap(&swap, IDX_PUT)->p;
1471         e->b.key = new_key;
1472         e->value = value;
1473         assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
1474
1475         return 0;
1476 }
1477
1478 void *hashmap_remove_value(Hashmap *h, const void *key, void *value) {
1479         struct plain_hashmap_entry *e;
1480         unsigned hash, idx;
1481
1482         if (!h)
1483                 return NULL;
1484
1485         hash = bucket_hash(h, key);
1486         idx = bucket_scan(h, hash, key);
1487         if (idx == IDX_NIL)
1488                 return NULL;
1489
1490         e = plain_bucket_at(h, idx);
1491         if (e->value != value)
1492                 return NULL;
1493
1494         remove_entry(h, idx);
1495
1496         return value;
1497 }
1498
1499 static unsigned find_first_entry(HashmapBase *h) {
1500         Iterator i = ITERATOR_FIRST;
1501
1502         if (!h || !n_entries(h))
1503                 return IDX_NIL;
1504
1505         return hashmap_iterate_entry(h, &i);
1506 }
1507
1508 void *internal_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) {
1509         struct hashmap_base_entry *e;
1510         void *key, *data;
1511         unsigned idx;
1512
1513         idx = find_first_entry(h);
1514         if (idx == IDX_NIL)
1515                 return NULL;
1516
1517         e = bucket_at(h, idx);
1518         key = (void*) e->key;
1519         data = entry_value(h, e);
1520
1521         if (remove)
1522                 remove_entry(h, idx);
1523
1524         if (ret_key)
1525                 *ret_key = key;
1526
1527         return data;
1528 }
1529
1530 unsigned internal_hashmap_size(HashmapBase *h) {
1531
1532         if (!h)
1533                 return 0;
1534
1535         return n_entries(h);
1536 }
1537
1538 unsigned internal_hashmap_buckets(HashmapBase *h) {
1539
1540         if (!h)
1541                 return 0;
1542
1543         return n_buckets(h);
1544 }
1545
1546 int internal_hashmap_merge(Hashmap *h, Hashmap *other) {
1547         Iterator i;
1548         unsigned idx;
1549
1550         assert(h);
1551
1552         HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1553                 struct plain_hashmap_entry *pe = plain_bucket_at(other, idx);
1554                 int r;
1555
1556                 r = hashmap_put(h, pe->b.key, pe->value);
1557                 if (r < 0 && r != -EEXIST)
1558                         return r;
1559         }
1560
1561         return 0;
1562 }
1563
1564 int set_merge(Set *s, Set *other) {
1565         Iterator i;
1566         unsigned idx;
1567
1568         assert(s);
1569
1570         HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
1571                 struct set_entry *se = set_bucket_at(other, idx);
1572                 int r;
1573
1574                 r = set_put(s, se->b.key);
1575                 if (r < 0)
1576                         return r;
1577         }
1578
1579         return 0;
1580 }
1581
1582 int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add) {
1583         int r;
1584
1585         assert(h);
1586
1587         r = resize_buckets(h, entries_add);
1588         if (r < 0)
1589                 return r;
1590
1591         return 0;
1592 }
1593
1594 /*
1595  * The same as hashmap_merge(), but every new item from other is moved to h.
1596  * Keys already in h are skipped and stay in other.
1597  * Returns: 0 on success.
1598  *          -ENOMEM on alloc failure, in which case no move has been done.
1599  */
1600 int internal_hashmap_move(HashmapBase *h, HashmapBase *other) {
1601         struct swap_entries swap;
1602         struct hashmap_base_entry *e, *n;
1603         Iterator i;
1604         unsigned idx;
1605         int r;
1606
1607         assert(h);
1608
1609         if (!other)
1610                 return 0;
1611
1612         assert(other->type == h->type);
1613
1614         /*
1615          * This reserves buckets for the worst case, where none of other's
1616          * entries are yet present in h. This is preferable to risking
1617          * an allocation failure in the middle of the moving and having to
1618          * rollback or return a partial result.
1619          */
1620         r = resize_buckets(h, n_entries(other));
1621         if (r < 0)
1622                 return r;
1623
1624         HASHMAP_FOREACH_IDX(idx, other, i) {
1625                 unsigned h_hash;
1626
1627                 e = bucket_at(other, idx);
1628                 h_hash = bucket_hash(h, e->key);
1629                 if (bucket_scan(h, h_hash, e->key) != IDX_NIL)
1630                         continue;
1631
1632                 n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1633                 n->key = e->key;
1634                 if (h->type != HASHMAP_TYPE_SET)
1635                         ((struct plain_hashmap_entry*) n)->value =
1636                                 ((struct plain_hashmap_entry*) e)->value;
1637                 assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1);
1638
1639                 remove_entry(other, idx);
1640         }
1641
1642         return 0;
1643 }
1644
1645 int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) {
1646         struct swap_entries swap;
1647         unsigned h_hash, other_hash, idx;
1648         struct hashmap_base_entry *e, *n;
1649         int r;
1650
1651         assert(h);
1652
1653         h_hash = bucket_hash(h, key);
1654         if (bucket_scan(h, h_hash, key) != IDX_NIL)
1655                 return -EEXIST;
1656
1657         if (!other)
1658                 return -ENOENT;
1659
1660         assert(other->type == h->type);
1661
1662         other_hash = bucket_hash(other, key);
1663         idx = bucket_scan(other, other_hash, key);
1664         if (idx == IDX_NIL)
1665                 return -ENOENT;
1666
1667         e = bucket_at(other, idx);
1668
1669         n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
1670         n->key = e->key;
1671         if (h->type != HASHMAP_TYPE_SET)
1672                 ((struct plain_hashmap_entry*) n)->value =
1673                         ((struct plain_hashmap_entry*) e)->value;
1674         r = hashmap_put_boldly(h, h_hash, &swap, true);
1675         if (r < 0)
1676                 return r;
1677
1678         remove_entry(other, idx);
1679         return 0;
1680 }
1681
1682 HashmapBase *internal_hashmap_copy(HashmapBase *h) {
1683         HashmapBase *copy;
1684         int r;
1685
1686         assert(h);
1687
1688         copy = hashmap_base_new(h->hash_ops, h->type  HASHMAP_DEBUG_SRC_ARGS);
1689         if (!copy)
1690                 return NULL;
1691
1692         switch (h->type) {
1693         case HASHMAP_TYPE_PLAIN:
1694         case HASHMAP_TYPE_ORDERED:
1695                 r = hashmap_merge((Hashmap*)copy, (Hashmap*)h);
1696                 break;
1697         case HASHMAP_TYPE_SET:
1698                 r = set_merge((Set*)copy, (Set*)h);
1699                 break;
1700         default:
1701                 assert_not_reached("Unknown hashmap type");
1702         }
1703
1704         if (r < 0) {
1705                 internal_hashmap_free(copy, false, false);
1706                 return NULL;
1707         }
1708
1709         return copy;
1710 }
1711
1712 char **internal_hashmap_get_strv(HashmapBase *h) {
1713         char **sv;
1714         Iterator i;
1715         unsigned idx, n;
1716
1717         sv = new(char*, n_entries(h)+1);
1718         if (!sv)
1719                 return NULL;
1720
1721         n = 0;
1722         HASHMAP_FOREACH_IDX(idx, h, i)
1723                 sv[n++] = entry_value(h, bucket_at(h, idx));
1724         sv[n] = NULL;
1725
1726         return sv;
1727 }
1728
1729 void *ordered_hashmap_next(OrderedHashmap *h, const void *key) {
1730         struct ordered_hashmap_entry *e;
1731         unsigned hash, idx;
1732
1733         if (!h)
1734                 return NULL;
1735
1736         hash = bucket_hash(h, key);
1737         idx = bucket_scan(h, hash, key);
1738         if (idx == IDX_NIL)
1739                 return NULL;
1740
1741         e = ordered_bucket_at(h, idx);
1742         if (e->iterate_next == IDX_NIL)
1743                 return NULL;
1744         return ordered_bucket_at(h, e->iterate_next)->p.value;
1745 }
1746
1747 int set_consume(Set *s, void *value) {
1748         int r;
1749
1750         assert(s);
1751         assert(value);
1752
1753         r = set_put(s, value);
1754         if (r <= 0)
1755                 free(value);
1756
1757         return r;
1758 }
1759
1760 int set_put_strdup(Set *s, const char *p) {
1761         char *c;
1762
1763         assert(s);
1764         assert(p);
1765
1766         if (set_contains(s, (char*) p))
1767                 return 0;
1768
1769         c = strdup(p);
1770         if (!c)
1771                 return -ENOMEM;
1772
1773         return set_consume(s, c);
1774 }
1775
1776 int set_put_strdupv(Set *s, char **l) {
1777         int n = 0, r;
1778         char **i;
1779
1780         assert(s);
1781
1782         STRV_FOREACH(i, l) {
1783                 r = set_put_strdup(s, *i);
1784                 if (r < 0)
1785                         return r;
1786
1787                 n += r;
1788         }
1789
1790         return n;
1791 }
1792
1793 int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) {
1794         const char *p = v;
1795         int r;
1796
1797         assert(s);
1798         assert(v);
1799
1800         for (;;) {
1801                 char *word;
1802
1803                 r = extract_first_word(&p, &word, separators, flags);
1804                 if (r <= 0)
1805                         return r;
1806
1807                 r = set_consume(s, word);
1808                 if (r < 0)
1809                         return r;
1810         }
1811 }
1812
1813 /* expand the cachemem if needed, return true if newly (re)activated. */
1814 static int cachemem_maintain(CacheMem *mem, unsigned size) {
1815         assert(mem);
1816
1817         if (!GREEDY_REALLOC(mem->ptr, mem->n_allocated, size)) {
1818                 if (size > 0)
1819                         return -ENOMEM;
1820         }
1821
1822         if (!mem->active) {
1823                 mem->active = true;
1824                 return true;
1825         }
1826
1827         return false;
1828 }
1829
1830 int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) {
1831         bool sync_keys = false, sync_values = false;
1832         unsigned size;
1833         int r;
1834
1835         assert(cache);
1836         assert(cache->hashmap);
1837
1838         size = n_entries(cache->hashmap);
1839
1840         if (res_keys) {
1841                 r = cachemem_maintain(&cache->keys, size);
1842                 if (r < 0)
1843                         return r;
1844
1845                 sync_keys = r;
1846         } else
1847                 cache->keys.active = false;
1848
1849         if (res_values) {
1850                 r = cachemem_maintain(&cache->values, size);
1851                 if (r < 0)
1852                         return r;
1853
1854                 sync_values = r;
1855         } else
1856                 cache->values.active = false;
1857
1858         if (cache->hashmap->dirty) {
1859                 if (cache->keys.active)
1860                         sync_keys = true;
1861                 if (cache->values.active)
1862                         sync_values = true;
1863
1864                 cache->hashmap->dirty = false;
1865         }
1866
1867         if (sync_keys || sync_values) {
1868                 unsigned i, idx;
1869                 Iterator iter;
1870
1871                 i = 0;
1872                 HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) {
1873                         struct hashmap_base_entry *e;
1874
1875                         e = bucket_at(cache->hashmap, idx);
1876
1877                         if (sync_keys)
1878                                 cache->keys.ptr[i] = e->key;
1879                         if (sync_values)
1880                                 cache->values.ptr[i] = entry_value(cache->hashmap, e);
1881                         i++;
1882                 }
1883         }
1884
1885         if (res_keys)
1886                 *res_keys = cache->keys.ptr;
1887         if (res_values)
1888                 *res_values = cache->values.ptr;
1889         if (res_n_entries)
1890                 *res_n_entries = size;
1891
1892         return 0;
1893 }
1894
1895 IteratedCache *iterated_cache_free(IteratedCache *cache) {
1896         if (cache) {
1897                 free(cache->keys.ptr);
1898                 free(cache->values.ptr);
1899                 free(cache);
1900         }
1901
1902         return NULL;
1903 }