Slab: Allowing concurrent allocations without locking

author Katerina Kubecova <katerina.kubecova@nic.cz>

Wed, 18 Dec 2024 11:18:17 +0000 (12:18 +0100)

committer Maria Matejka <mq@ucw.cz>

Wed, 19 Feb 2025 12:52:16 +0000 (13:52 +0100)
author Katerina Kubecova <katerina.kubecova@nic.cz>
Wed, 18 Dec 2024 11:18:17 +0000 (12:18 +0100)
committer Maria Matejka <mq@ucw.cz>
Wed, 19 Feb 2025 12:52:16 +0000 (13:52 +0100)
diff --git a/lib/bitmap.c b/lib/bitmap.c

index cc9441143dc4c0e0f898e1ea6a2484db0fec7ae9..6326028d73f67f439421dc93adc6502a28620e4b 100644 (file)
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -204,9 +204,9 @@ hmap_check(struct hmap *b)
   */
  
  void
-lmap_init(struct lmap *b, pool *p)
+lmap_init(struct lmap *b, pool *p, struct event_list *ev_l)
  {
-  b->slab = sl_new(p, 128);
+  b->slab = sl_new(p, ev_l, 128);
    b->size = 8;
    b->data = mb_allocz(p, b->size * sizeof(u32 *));
    b->root = sl_allocz(b->slab);
diff --git a/lib/bitmap.h b/lib/bitmap.h

index 303618d1e9eff434e2710f62fca2e368b1125eb0..9cff9d87c2f19f09d51dcaec2eee94d76de7c326 100644 (file)
--- a/lib/bitmap.h
+++ b/lib/bitmap.h
@@ -11,6 +11,7 @@
  #define _BIRD_BITMAP_H_
  
  #include "lib/resource.h"
+#include "lib/io-loop.h"
  
  struct bmap
  {
@@ -75,7 +76,7 @@ struct lmap
  static inline uint lmap_max(struct lmap *b)
  { return b->size << 10; }
  
-void lmap_init(struct lmap *b, pool *p);
+void lmap_init(struct lmap *b, pool *p, struct event_list *ev_l);
  void lmap_free(struct lmap *b);
  int lmap_test(struct lmap *b, uint n);
  void lmap_set(struct lmap *b, uint n);
diff --git a/lib/bitmap_test.c b/lib/bitmap_test.c

index 39fbd0ed4e361f22a1d1a5e05cf4f6d45eab8e48..b2cf705491f1ab10122a234c4365ec19629902fa 100644 (file)
--- a/lib/bitmap_test.c
+++ b/lib/bitmap_test.c
@@ -175,7 +175,7 @@ t_lmap_set_clear_fill(void)
  {
    struct lmap b;
  
-  lmap_init(&b, &root_pool);
+  lmap_init(&b, &root_pool, birdloop_event_list(&main_birdloop));
  
    char expected[MAX_NUM] = {};
    uint i, j, n;
diff --git a/lib/fib.h b/lib/fib.h

index 1fbcec5f99ca67031e8f962ab2d1b2a3291e27e6..50334873362237af078cb5f9ea9d4232c36e5ce3 100644 (file)
--- a/lib/fib.h
+++ b/lib/fib.h
@@ -61,7 +61,7 @@ static inline void * fib_node_to_user(struct fib *f, struct fib_node *e)
  static inline struct fib_node * fib_user_to_node(struct fib *f, void *e)
  { return e ? (void *) ((char *) e + f->node_offset) : NULL; }
  
-void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init);
+void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init, struct event_list *ev_l);
  void *fib_find(struct fib *, const net_addr *);        /* Find or return NULL if doesn't exist */
  void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */
  void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */
diff --git a/lib/netindex.c b/lib/netindex.c

index 5acb6a842c755fd660f45522cf7dad168a033eb1..d9f1e2046a288df013de962f0e4e5d714b683167 100644 (file)
--- a/lib/netindex.c
+++ b/lib/netindex.c
@@ -62,7 +62,7 @@ netindex_hash_new(pool *sp, event_list *cleanup_target, u8 type)
    nh->pool = p;
    nh->net_type = type;
  
-  nh->slab = net_addr_length[type] ? sl_new(nh->pool, sizeof (struct netindex) + net_addr_length[type]) : NULL;
+  nh->slab = net_addr_length[type] ? sl_new(nh->pool, cleanup_target, sizeof (struct netindex) + net_addr_length[type]) : NULL;
  
    SPINHASH_INIT(nh->hash, NETINDEX, nh->pool, cleanup_target);
    atomic_store_explicit(&nh->block_size, NETINDEX_INIT_BLOCK_SIZE, memory_order_release);
diff --git a/lib/resource.h b/lib/resource.h

index 48bf1f9ba5355110e875add8ffcf387c91555a79..a3c3140c67235311f9de18ca8feca3f2a1337d43 100644 (file)
--- a/lib/resource.h
+++ b/lib/resource.h
@@ -132,8 +132,9 @@ void tmp_flush(void);
  /* Slabs */
  
  typedef struct slab slab;
+struct event_list;
  
-slab *sl_new(pool *, unsigned size);
+struct slab *sl_new(pool *p, struct event_list *cleanup_ev_list, uint size);
  void *sl_alloc(slab *);
  void *sl_allocz(slab *);
  void sl_free(void *);
diff --git a/lib/slab.c b/lib/slab.c

index 79d2a5e9bd0aa62c8127e3216928e8a895b49bb7..cf2abc699d22c86c60e719f724d53a105b0ba792 100644 (file)
--- a/lib/slab.c
+++ b/lib/slab.c
@@ -5,6 +5,7 @@
   *
   *     (c) 1998--2000 Martin Mares <mj@ucw.cz>
   *     (c) 2020       Maria Matejka <mq@jmq.cz>
+ *     (c) 2025       Katerina Kubecova <katerina.kubecova@nic.cz>
   *
   *     Can be freely distributed and used under the terms of the GNU GPL.
   */
@@ -13,17 +14,87 @@
   * DOC: Slabs
   *
   * Slabs are collections of memory blocks of a fixed size.
- * They support very fast allocation and freeing of such blocks, prevent memory
+ *
+ * When the |DEBUGGING| switch is turned on, we automatically fill all
+ * newly allocated and freed blocks with special patterns to easier detect
+ * uninitialized or already freed memory use.
+ *
+ * Slabs support very fast allocation and freeing of such blocks, prevent memory
   * fragmentation and optimize L2 cache usage. Slabs have been invented by Jeff Bonwick
   * and published in USENIX proceedings as `The Slab Allocator: An Object-Caching Kernel
- * Memory Allocator'. Our implementation follows this article except that we don't use
- * constructors and destructors.
+ * Memory Allocator'. Our original implementation followed this article except that we
+ * didn't use constructors and destructors. Yet now, it's a little bit more complicated.
   *
- * When the |DEBUGGING| switch is turned on, we automatically fill all
- * newly allocated and freed blocks with a special pattern to make detection
- * of use of uninitialized or already freed memory easier.
+ * The slab allocates system memory pages and partitions them to allocate the blocks.
+ * Every page has its head (struct sl_head) with some basic information,
+ * a bitfield for marking allocated blocks, and then the data, aligned to the
+ * maximum requred alignment to avoid unaligned access.
+ *
+ * To allocate a block, every thread has its own page (we call them heads, actually)
+ * assigned to allocate from. It finds a free block in the head, marks it and returns it.
+ *
+ * The thread may be unable to allocate from its own page because it's full. In such cases,
+ * the thread pushes that page to the full_heads list. Then, it needs a new one, which it gets
+ * primarily from the partial_heads list (see below), and if that list is empty, it requests
+ * a new page from the kernel.
+ *
+ * The threads' own heads are arranged in an array in struct slab.
+ *
+ * To free a block, we always know that it's allocated from a page which is
+ * aligned to its size. (That's an invariant we are enforcing in the page
+ * allocation subsystem and we heavily rely on that.) With that, we can calculate
+ * the head pointer from the block pointer by zeroing the least significant (usually) 13 bits.
+ *
+ * With the head pointer in hand, we can unset the appropriate bit in the head.
+ * But there are several cases to consider with the head.
+ *
+ * (1) The head is some thread's head, indicated by slh_thread
+ *     -> no need to do anything, the block is going to be reused soon
+ * (2) The head is in the partial_heads list and there are some more blocks
+ *     in the head still allocated
+ *     -> no need to do anything, the block is going to be reused (not so) soon
+ *        but it's gonna be taken care of
+ * (3) The head is in the full_heads list and its state is slh_pre_partial.
+ *     -> no need to do anything, the help is on the way already (see below why)
+ * (4) The head is in the partial_heads list and this is the last block to free.
+ *     Note that there is no thread which could be allocating from this block
+ *     right now. -> Removing the head from the list and freeing it safely
+ *     is hard, we need to hire a specialist (schedule an event) to do it.
+ * (5) The head is in the full_heads list and its state is slh_full. This means
+ *     that it is no longer full and we need to move it to the partial_heads list
+ *     for a possible reuse. -> Removing the head from the list is hard. We
+ *     need to hire a specialist (schedule an event) to do it. We also change
+ *     the head's state to slh_pre_partial to indicate this fact.
+ *
+ * And that's all. Or is it?
+ *
+ * The Hired Specialist(TM) is an event doing the cleanup operations on the slab.
   *
- * Example: Nodes of a FIB are allocated from a per-FIB Slab.
+ * (1) It walks over the full_heads list and:
+ *     (1A) if the head is slh_full, it keeps it there,
+ *     (1B) if the head is completely empty, it frees it,
+ *     (1C) if the head is slh_pre_partial, it changes its state to slh_partial
+ *         and moves it to another (local) list for further processing.
+ * (2) It exchanges the locally gathered partial list and the partial_heads list.
+ * (3) It walks over the local list (formerly partial_heads!) and:
+ *     (3A) if the head still has some allocated blocks, it pushes it back to partial_heads,
+ *     (3B) if the head is completely empty, it frees it.
+ *
+ * The last part to mention are slab_dump and slab_memsize. But these are simple.
+ * They just walk over the full_heads, partial_heads and the thread-array, and
+ * dump or calculate the effective / overhead memory usage.
+ *
+ * And that's all. Or is it?
+ *
+ * The block allocation and free are lockless.
+ * (Are you scared already? We surely are. But we are brave.)
+ *
+ * Please read the comments in the sl_alloc and sl_free code to see the analysis
+ * why this is actually safe.
+ *
+ * And don't worry. The Hired Specialist, as well as slab_dump and slab_memsize,
+ * are locking, therefore these can't collide with themselves. Yet sl_alloc and
+ * sl_free may collide and we are catering for that.
   */
  
  #include <stdlib.h>
@@ -31,8 +102,7 @@
  
  #include "nest/bird.h"
  #include "lib/resource.h"
-#include "lib/string.h"
-#include "lib/tlists.h"
+#include "lib/io-loop.h"
  
  
  #ifdef DEBUGGING
@@ -41,63 +111,131 @@
  
  static void slab_free(resource *r);
  static void slab_dump(struct dump_request *dreq, resource *r);
-static resource *slab_lookup(resource *r, unsigned long addr);
+//static resource *slab_lookup(resource *r, unsigned long addr);
  static struct resmem slab_memsize(resource *r);
+static void sl_cleanup(void *sp);
+static void sl_thread_end(struct bird_thread_end_callback *);
  
  
  /*
- *  Real efficient version.
+ * Head state life cycle
+ ***********************
+ *
+ *             (new head)               (all blocks allocated)
+ * Alloc page     --->     slh_thread           --->            slh_full
+ *                              ^                                  |
+ *                             |                          (some blocks freed)
+ *                        (pickup a head)                         |
+ *                             |                                  v
+ * Free page     <---     slh_partial          <---       slh_pre_partial
+ *     (cleanup: empty head)           (cleanup: move between lists)
   */
  
-#define MAX_EMPTY_HEADS 1
-
  enum sl_head_state {
-  slh_empty = 2,
-  slh_partial = 0,
-  slh_full = 1,
+  slh_new = 0,
+  slh_thread = 1,
+  slh_full = 2,
+  slh_pre_partial = 3,
+  slh_partial = 4,
+  slh_dummy = 0xd0,
  } PACKED;
  
  struct sl_head {
    struct slab *slab;
-  TLIST_NODE(sl_head, struct sl_head) n;
-  u16 num_full;
-  enum sl_head_state state;
-  u32 used_bits[0];
+  struct sl_head *_Atomic next;
+  _Atomic u16 num_full;
+  _Atomic enum sl_head_state state;
+  _Atomic u32 used_bits[0];
  };
  
-#define TLIST_PREFIX sl_head
-#define TLIST_TYPE   struct sl_head
-#define TLIST_ITEM   n
-#define TLIST_WANT_WALK
-#define TLIST_WANT_ADD_HEAD
  
-#include "lib/tlists.h"
+/* These nodes must be the last nodes of full_heads and partial_heads linked lists, respectively.
+ * We need these for sanity checks and for detecting collisions of alloc/free and cleanup. */
+static struct sl_head slh_dummy_last_full = {
+    .state = slh_dummy,
+};
+static struct sl_head  slh_dummy_last_partial = {
+    .state = slh_dummy,
+};
  
  struct slab {
    resource r;
    uint obj_size, head_size, head_bitfield_len;
-  uint objs_per_slab, num_empty_heads, data_size;
-  struct sl_head_list empty_heads, partial_heads, full_heads;
+  uint objs_per_slab, data_size;
+  struct sl_head * _Atomic *threads_active_heads;      /* Array of thread-own heads */
+  struct sl_head * _Atomic partial_heads;              /* Heads available for grabbing, list ended by &sl_dummy_last_partial */
+  struct sl_head * _Atomic full_heads;                 /* Full heads, list ended by &sl_dummy_last_full */
+  event event_clean;                                   /* Cleanup event (The Hired Specialist TM) */
+  struct event_list *cleanup_ev_list;                  /* Schedule event_clean here */
+  struct bird_thread_end_callback thread_end;          /* Gets called on thread end */
  };
  
  static struct resclass sl_class = {
-  "Slab",
-  sizeof(struct slab),
-  slab_free,
-  slab_dump,
-  slab_lookup,
-  slab_memsize
+  .name = "Slab",
+  .size = sizeof(struct slab),
+  .free = slab_free,
+  .dump = slab_dump,
+  .memsize = slab_memsize,
  };
  
  #define SL_GET_HEAD(x) PAGE_HEAD(x)
+#define SL_GET_STATE(head) atomic_load_explicit(&head->state, memory_order_acquire)
+#define SL_SET_STATE(head, expected_state, new_state) \
+    ASSERT_DIE(atomic_exchange_explicit(&head->state, new_state, memory_order_acq_rel) == expected_state)
+#define SL_MAYBE_SET_STATE(head, expected_state, new_state) \
+    ({ enum sl_head_state orig = expected_state; atomic_compare_exchange_strong_explicit(&head->state, &orig, new_state, memory_order_acq_rel, memory_order_acquire); })
  
-#define SL_HEAD_CHANGE_STATE(_s, _h, _from, _to) ({ \
-    ASSERT_DIE(_h->state == slh_##_from); \
-    sl_head_rem_node(&_s->_from##_heads, _h); \
-    sl_head_add_head(&_s->_to##_heads, _h); \
-    _h->state = slh_##_to; \
-    })
  
+#if 0
+/* Please do not read this. This is awful and awfully expensive way of debugging. */
+static void
+slab_asserts(struct slab *s)
+{
+return;
+struct sl_head *c = s->full_heads;
+    while (c!= &slh_dummy_last_full)
+    {
+      ASSERT_DIE(c->slab == s);
+      enum sl_head_state state = SL_GET_STATE(c);
+      if(!(state == slh_full || state == slh_pre_partial)){
+        struct sl_head *cc = s->full_heads;
+          while (cc!= &slh_dummy_last_full)
+          {
+            state = SL_GET_STATE(cc);
+            log("cc %x state %i, s %x", cc, state, s);
+            if(!(state == slh_full || state == slh_pre_partial)){
+              bug("cc wrong full");
+            }
+            cc = cc->next;
+          }
+       }
+      c = c->next;
+    }
+  c = s->partial_heads;
+  if (c == &slh_dummy_last_partial)
+    log("no partial");
+  while (c!= &slh_dummy_last_partial)
+    {
+      ASSERT_DIE(c->slab == s);
+      enum sl_head_state state = SL_GET_STATE(c);
+      log("cc %x state %i, s %x", c, state, s);
+      if(state != slh_partial){
+        bug("cc wrong part");
+        struct sl_head *cc = s->full_heads;
+          while (cc!= &slh_dummy_last_partial)
+          {
+            state = SL_GET_STATE(cc);
+            log("cc %x state %i, s %x", cc, state, s);
+            if(state != slh_partial){
+              bug("cc wrong part");
+            }
+            cc = cc->next;
+          }
+       }
+      c = c->next;
+    }
+}
+#endif
  
  /**
   * sl_new - create a new Slab
@@ -108,17 +246,28 @@ static struct resclass sl_class = {
   * objects of size @size can be allocated.
   */
  slab *
-sl_new(pool *p, uint size)
+sl_new(pool *p, struct event_list *cleanup_ev_list, uint size)
  {
    slab *s = ralloc(p, &sl_class);
+
+  /* We have first to calculate how big the allocates objects actually should
+   * be because of alignment constrants, and also the more objects, the bigger
+   * the bitfield has to be. */
+
+  /* First, round the size up to the alignment but keep the actual requested
+   * size for memory consumption reports. */
    uint align = CPU_STRUCT_ALIGN;
    s->data_size = size;
    size = (size + align - 1) / align * align;
    s->obj_size = size;
  
+  /* Calculate how many objects fit into a head. */
    s->head_size = sizeof(struct sl_head);
  
    do {
+    /* Try just dividing the size by the size of the aligned object
+     * and hope that the remainder gives us enough space to actually fit the bitmap.
+     */
      s->objs_per_slab = (page_size - s->head_size) / size;
      s->head_bitfield_len = (s->objs_per_slab + 31) / 32;
      s->head_size = (
@@ -126,11 +275,43 @@ sl_new(pool *p, uint size)
        + sizeof(u32) * s->head_bitfield_len
        + align - 1)
      / align * align;
+
+    /* But if the overall size doesn't fit into the page, we are sure now
+     * that s->head_size is larger than at the beginning of the loop, thus
+     * s->objs_per_slab is going to decrease. After (at most) several iterations,
+     * this will converge. (Maria claims it, please believe her.) */
    } while (s->objs_per_slab * size + s->head_size > (size_t) page_size);
  
+  /* But it may converge to zero which is kinda stupid because we want to
+   * allocate some blocks, not just juggle empty pages. But that's definitely
+   * the user's fault and we won't bother. */
    if (!s->objs_per_slab)
      bug("Slab: object too large");
-  s->num_empty_heads = 0;
+
+  /* We need a block holding the active head pointer for every thread separately */
+  ASSERT_DIE(MAX_THREADS * sizeof (struct sl_head * _Atomic) <= (unsigned long) page_size);
+  void *page = alloc_page();
+  memset(page, 0, page_size);
+  s->threads_active_heads = page;
+  //atomic_store_explicit(&s->threads_active_heads, (struct sl_head *)page, memory_order_relaxed);
+
+  /* Initialize the partial_heads and full_heads lists by the dummy heads */
+  atomic_store_explicit(&s->partial_heads, &slh_dummy_last_partial, memory_order_relaxed);
+  atomic_store_explicit(&s->full_heads, &slh_dummy_last_full, memory_order_relaxed);
+
+  /* Initialize the cleanup routine */
+  s->event_clean = (event) {
+    .hook = sl_cleanup,
+    .data = s,
+  };
+
+  s->cleanup_ev_list = cleanup_ev_list;
+
+  /* Hook the thread end to get rid of active heads linked to that thread */
+  s->thread_end = (struct bird_thread_end_callback) {
+    .hook = sl_thread_end,
+  };
+  bird_thread_end_register(&s->thread_end);
  
    return s;
  }
@@ -139,7 +320,7 @@ sl_new(pool *p, uint size)
   * sl_delete - destroy an existing Slab
   * @s: slab
   *
- * This function destroys the given Slab.
+ * This function destroys the given Slab. Just a public wrapper over rfree. This calls slab_free() back internally.
   */
  void sl_delete(slab *s)
  {
@@ -147,61 +328,205 @@ void sl_delete(slab *s)
  }
  
  /**
- * sl_alloc - allocate an object from Slab
+ * sl_alloc_from_page - allocate a block from the given slab page
   * @s: slab
+ * @h: slab head (page)
   *
- * sl_alloc() allocates space for a single object from the
- * Slab and returns a pointer to the object.
+ * Allocates and returns. May return NULL if the head is actually full, sorry. Deal with it.
   */
-void *
-sl_alloc(slab *s)
+static void *
+sl_alloc_from_page(slab *s, struct sl_head *h)
  {
-  struct sl_head *h;
-  ASSERT_DIE(DG_IS_LOCKED(resource_parent(&s->r)->domain));
-
-redo:
-  if (!(h = s->partial_heads.first))
-    goto no_partial;
-okay:
-  for (uint i=0; i<s->head_bitfield_len; i++)
-    if (~h->used_bits[i])
+  ASSERT_DIE(SL_GET_STATE(h) == slh_thread);
+
+  /* This routine must never collide with itself. It's expected to run
+   * only on the head assigned to the current thread.
+   * The collision may happen though with sl_free().
+   *
+   * If no object could be allocated, we return NULL. Yet, some block
+   * could have been freed inbetween nevertheless. The caller is responsible
+   * for checking this and behaving appropriately.
+   * */
+
+  /* Looking for a zero bit in a variable-long almost-atomic bitfield */
+  for (uint i = 0; i < s->head_bitfield_len; i++)
+  {
+    u32 used_bits = atomic_load_explicit(&h->used_bits[i], memory_order_acquire);
+    if (~used_bits)
      {
-      uint pos = u32_ctz(~h->used_bits[i]);
+      /* There are some zero bits in this part of the bitfield. */
+      uint pos = u32_ctz(~used_bits);
        if (i * 32 + pos >= s->objs_per_slab)
-       break;
+       /* But too far, we don't have those objects! */
+       return NULL;
+
+      /* Set the one, claim the block */
+      u32 check = atomic_fetch_or_explicit(&h->used_bits[i], (1 << pos), memory_order_acq_rel);
  
-      h->used_bits[i] |= 1 << pos;
-      h->num_full++;
+      ASSERT_DIE(!(check & (1 << pos))); /* Sanity check: nobody claimed the same block inbetween */
+      ASSERT_DIE(!(check & (~used_bits))); /* Sanity check: nobody claimed any other block inbetween */
  
+      /* Update allocation count */
+      atomic_fetch_add_explicit(&h->num_full, 1, memory_order_acquire);
+
+      /* Take the pointer and go away */
        void *out = ((void *) h) + s->head_size + (i * 32 + pos) * s->obj_size;
  #ifdef POISON
        memset(out, 0xcd, s->data_size);
  #endif
        return out;
      }
+  }
  
-  SL_HEAD_CHANGE_STATE(s, h, partial, full);
-  goto redo;
+  /* Everything full */
+  return NULL;
+}
  
-no_partial:
-  if (h = s->empty_heads.first)
+static struct sl_head *
+sl_get_partial_head(struct slab *s)
+{
+  /* The cleanup must wait until we end */
+  rcu_read_lock();
+
+  /* Actual remove the first head */
+  struct sl_head *cur_head = atomic_load_explicit(&s->partial_heads, memory_order_acquire),
+                *new_partial;
+
+  /* This runs concurrently with adding heads from partial_heads (sl_cleanup).
+   * It is safe, because we only read partial_head (it is always valid or at least dummy),
+   * read its next pointer and do atomic exchange.
+   *
+   * The exchange says -- we try to remove the first head which is cur_head,
+   * and we store cur_head->next as the new head. If it happened that somebody
+   * else has grabbed the head inbetween, we restart the process.
+   *
+   * Or the cleanup is running and it pushed a new head there.
+   *
+   * Well, a hypothetical problem.
+   *
+   * (1) thread A grabs cur_head, reads cur_head->next,
+   *      and then gets scheduled out for a long long sleep
+   * (2) thread B picks cur_head successfully
+   * (3) thread B fills the head completely and pushes the head to full_heads
+   * (4) anybody frees something from the head
+   * (5) cleanup runs and pushes the head back to cur_head ...
+   *
+   * ... but it does not happen because the cleanup gets stuck, waiting for
+   * RCU to synchronize. And sooner or later, thread A finds out
+   * that it's screwed, it won't make any mess, and humbly takes another head.
+   */
+  do {
+    if (SL_GET_STATE(cur_head) == slh_dummy)
+    {
+      /* At the end */
+      ASSERT_DIE(cur_head == &slh_dummy_last_partial);
+      break;
+    }
+    else
      {
-      SL_HEAD_CHANGE_STATE(s, h, empty, partial);
-      s->num_empty_heads--;
-      goto okay;
+      /* Another partial found */
+      new_partial = atomic_load_explicit(&cur_head->next, memory_order_acquire);
+      ASSERT_DIE(new_partial != NULL);
      }
+  } while (!atomic_compare_exchange_strong_explicit(
+       &s->partial_heads, &cur_head, new_partial,
+       memory_order_acq_rel, memory_order_acquire));
  
-  h = alloc_page();
-  ASSERT_DIE(SL_GET_HEAD(h) == h);
+  /* Indicate that the head now belongs to a thread */
+  if (cur_head != &slh_dummy_last_partial)
+    SL_SET_STATE(cur_head, slh_partial, slh_thread);
+
+  /* The next pointer of cur_head is not changed here. We keep it for counting and dumping memory */
+
+  /* Out of critical section, now the cleanup may continue */
+  rcu_read_unlock();
+
+  if (cur_head == &slh_dummy_last_partial)
+    return NULL;
+  else
+    return cur_head;
+}
+
+/**
+ * sl_alloc - allocate an object from Slab
+ * @s: slab
+ *
+ * sl_alloc() allocates space for a single object from the
+ * Slab and returns a pointer to the object.
+ */
+void *
+sl_alloc(slab *s)
+{
+  struct sl_head *h = NULL;
+
+  /* Try to use head owned by this thread */
+  if (h = atomic_load_explicit(&s->threads_active_heads[THIS_THREAD_ID], memory_order_relaxed))
+  {
+    void *ret = sl_alloc_from_page(s, h);
+
+    if (ret)
+      return ret;
+
+    /* This thread has a head, but it is already full, put the head to full heads.
+     * We did not put the head to full heads right after we used up the last space,
+     * because someone might clean some our space. It may have been us, actually,
+     * as in many cases these allocations end up being released quite soon. */
+    atomic_store_explicit(&s->threads_active_heads[THIS_THREAD_ID], NULL, memory_order_relaxed);
+
+    /* First of all, we mark the head as being full, not belonging to a thread.
+     * This creates a window of race conditions with sl_free() where we still think
+     * that the head is full but in the meantime the head may become even completely
+     * empty.
+     *
+     * There is no other race condition for now, as the cleanup routine can not see
+     * this head yet, and no other thread may pick it from the partial heads. Remember,
+     * it's not in full_heads yet, how could it get to partials? */
+    SL_SET_STATE(h, slh_thread, slh_full);
+
+    /* We may want to detect the race condition here. In some extremely rare cases,
+     * the complete free race may have already happened now, and in such case,
+     * nobody would ever run the cleanup. But remember, this is allocation.
+     * There is definitely going to be some cleanup in the future anyway.
+     *
+     * So we don't worry and just go ahead, the cleanup routine will take care.
+     *
+     * Put the head to full heads linked list.
+     *
+     * The head->next pointer was intentionally kept set when grabbed from partial heads.
+     * It makes it much easier to dump and count memory, yet we can't now
+     * assert it to be NULL. */
+    struct sl_head *next = atomic_load_explicit(&s->full_heads, memory_order_acquire);
+    do atomic_store_explicit(&h->next, next, memory_order_release);
+    while (!atomic_compare_exchange_strong_explicit(
+         &s->full_heads, &next, h,
+         memory_order_acq_rel, memory_order_acquire));
+
+    /* After putting the head into full_heads, we can't even expect that it exists anymore.
+     * DO NOT TOUCH IT! */
+  }
+
+  /* This thread has no page head. Try to get one from partial heads */
+  h = sl_get_partial_head(s);
+  if (!h)
+  {
+    /* There are no partial heads, we need to allocate a new page */
+    h = alloc_page();
+    ASSERT_DIE(SL_GET_HEAD(h) == h);
  
  #ifdef POISON
-  memset(h, 0xba, page_size);
+    memset(h, 0xba, page_size);
  #endif
  
-  memset(h, 0, s->head_size);
-  h->slab = s;
-  sl_head_add_head(&s->partial_heads, h);
-  goto okay;
+    memset(h, 0, s->head_size);
+    h->slab = s;
+    atomic_store_explicit(&h->state, slh_thread, memory_order_relaxed);
+  }
+  ASSERT_DIE(h->slab == s);
+
+  atomic_store_explicit(&s->threads_active_heads[THIS_THREAD_ID], h, memory_order_relaxed);
+  void *ret = sl_alloc_from_page(s, h);
+  ASSERT_DIE(ret); /* Since the head is new or partial, there must be a space for allocation. */
+  return ret;
  }
  
  /**
@@ -220,6 +545,213 @@ sl_allocz(slab *s)
    return obj;
  }
  
+static void
+sl_free_page(struct sl_head *h)
+{
+#ifdef POISON
+  memset(h, 0xde, page_size);
+#endif
+  free_page(h);
+}
+
+/* Cleaning of a slab consists of two parts. This is the Hired Specialist(TM) mentioned
+ * in the algorithm overview.
+ *
+ * First, we walk over full_heads and find all heads with free blocks.
+ * These are put to a new_partials list, or if the head is completely empty,
+ * it's freed immediately.
+ *
+ * This function does this part.
+ */
+static struct sl_head *
+sl_cleanup_full_heads(struct slab *s)
+{
+  /* Prepare the end of the new partial list */
+  struct sl_head *new_partials = &slh_dummy_last_partial;
+
+  /* The topmost full head is ignored to avoid collisions with allocations.
+   * This may cause a little bit of inefficiency but we don't care so much. */
+  struct sl_head *fh = atomic_load_explicit(&s->full_heads, memory_order_acquire);
+
+  /* The topmost head is never NULL, it is always either valid, or slh_dummy_last_full. */
+  ASSERT_DIE(fh);
+  struct sl_head *next = atomic_load_explicit(&fh->next, memory_order_relaxed);
+
+  /* Avoid possible problems with very rare race conditions with sl_get_partial_head(),
+   * basically wait for everybody who still may have a pointer to any of these heads,
+   * to end. */
+  synchronize_rcu();
+
+  while (next && (SL_GET_STATE(next) != slh_dummy))
+  {
+    /* We need to store the next_next pointer now in case we free the page */
+    struct sl_head *next_next = atomic_load_explicit(&next->next, memory_order_relaxed);
+
+    /* Find out how many blocks are allocated from this slab head.
+     *
+     * Transitions between these three variants are covered in sl_free(),
+     * so that if we run the wrong variant now, somebody is already scheduling
+     * the cleanup routine again.
+     * */
+    u16 num_full = atomic_load_explicit(&next->num_full, memory_order_acquire);
+    if (num_full == 0)
+    {
+      /* Already completely empty! */
+
+      /* Remove head from the list */
+      ASSERT_DIE(atomic_exchange_explicit(&fh->next, next_next, memory_order_acq_rel) == next);
+
+      /* Free the page completely */
+      sl_free_page(next);
+    }
+    else if (num_full < s->objs_per_slab)
+    {
+      /* Somebody freed some blocks from here. */
+
+      /* Remove head from the list */
+      ASSERT_DIE(atomic_exchange_explicit(&fh->next, next_next, memory_order_acq_rel) == next);
+
+      /* We change the head's state to slh_partial to indicate where it is intended to be stored. */
+      SL_SET_STATE(next, slh_full, slh_partial);
+
+      /* Put the head into new_partials */
+      atomic_store_explicit(&next->next, new_partials, memory_order_relaxed);
+      new_partials = next;
+    }
+    else
+    {
+      /* This block is kept here. It's still full. */
+      ASSERT_DIE(num_full == s->objs_per_slab);
+      fh = next;
+    }
+
+    /* Next head, let's go! */
+    next = next_next;
+  }
+
+  return new_partials;
+}
+
+/* Slab cleanup, second part. The Hired Specialist(TM) still on the scene.
+ *
+ * Here partial_heads are cleaned. Since other threads may remove heads from partial_heads,
+ * the original partial_heads linked_list is first replaced by "new_partials" linked list
+ * and then worked on.
+ *
+ * Empty heads are freed and the rest is then put back to partial_heads one-by-one
+ * to ensure other threads always have as many partial heads as possible for grabs.
+ *
+ * The swap at the beginning might collide with another thread grabbing a head from partial_heads,
+ * hence we employ a simple read-write spinlock to temporarily block allocations.
+ *
+*/
+static void
+sl_cleanup_partial_heads(struct slab *s, struct sl_head *new_partials)
+{
+  /* Exchange the partial heads for the supplied list */
+  struct sl_head *ph = atomic_exchange_explicit(&s->partial_heads, new_partials, memory_order_acq_rel);
+  ASSERT_DIE(ph);
+
+  /* Wait for readers to realize */
+  synchronize_rcu();
+
+  /* Now nobody else sees ph and we can happily free anything we come across. Almost.
+   * And we can walk over the list and do the cleanup in peace. */
+  while (ph != &slh_dummy_last_partial)
+  {
+    ASSERT_DIE(SL_GET_STATE(ph) == slh_partial);
+    struct sl_head *next_head = atomic_load_explicit(&ph->next, memory_order_relaxed);
+    ASSERT_DIE(next_head);
+
+    if (!atomic_load_explicit(&ph->num_full, memory_order_relaxed))
+      /* The head is empty, free it. */
+      sl_free_page(ph);
+    else
+    {
+      /* Insert the head into the partial heads list.
+       * This runs concurrently with removing heads from partial_heads (sl_get_partial_head),
+       * but we are the only one pushing heads there, so any pointer we see there is unique
+       * and no heads are going to be recycled during the race condition.
+       *
+       * Thus, we can't run into the ominous race condition of colliding with both
+       * addition and removal at the same time. At least by unanimous voting of two people,
+       * we consider this safe.
+       *
+       * No, seriously. The only weird case is that sl_get_partial_head picks a head,
+       * then we push another one, then another sl_get_partial_head picks a head,
+       * then we push another one ... but in the end, they either find out that this
+       * is not the topmost one, or they serialize in the right order and everything works. */
+      struct sl_head *head = atomic_load_explicit(&s->partial_heads, memory_order_acquire);
+      do atomic_store_explicit(&ph->next, head, memory_order_release);
+      while (!atomic_compare_exchange_strong_explicit(
+          &s->partial_heads, &head, ph,
+         memory_order_acq_rel, memory_order_acquire));
+    }
+    ph = next_head;
+  }
+}
+
+static void
+sl_cleanup(void *sp)
+{
+  struct slab *s = (struct slab*) sp;
+
+  /* Cleanup does weird things and should therefore not collide
+   * with memsize and dump calls. We need to lock the pool's domain explicitly. */
+  struct domain_generic *dom = resource_parent(&s->r)->domain;
+  int locking = !DG_IS_LOCKED(dom);
+  if (locking)
+    DG_LOCK(dom);
+
+  /* Get the heads transitioning from full to partial */
+  struct sl_head *new_partials = sl_cleanup_full_heads(s);
+
+  /* And merge them with partials */
+  sl_cleanup_partial_heads(s, new_partials);
+
+  /* If we were locking, we have to unlock! */
+  if (locking)
+    DG_UNLOCK(dom);
+}
+
+static void sl_thread_end(struct bird_thread_end_callback *btec)
+{
+  SKIP_BACK_DECLARE(slab, s, thread_end, btec);
+
+  /* Getting rid of an active head of a stopping thread.
+   * We first pick the head from its place. */
+  struct sl_head *h = atomic_load_explicit(&s->threads_active_heads[THIS_THREAD_ID], memory_order_relaxed);
+  atomic_store_explicit(&s->threads_active_heads[THIS_THREAD_ID], NULL, memory_order_relaxed);
+
+  /* No such head, yay! */
+  if (h == NULL)
+    return;
+
+  /* How many items are still allocated from that head? */
+  uint num_full = atomic_load_explicit(&h->num_full, memory_order_acquire);
+  if (num_full == 0)
+    /* The page is empty, just throw it away */
+    sl_free_page(h);
+
+  else
+  {
+    /* There are some, let's put the head into the full heads list */
+    SL_SET_STATE(h, slh_thread, slh_full);
+
+    /* Put the head to full heads linked list */
+    struct sl_head *next = atomic_load_explicit(&s->full_heads, memory_order_acquire);
+    do atomic_store_explicit(&h->next, next, memory_order_release);
+    while (!atomic_compare_exchange_strong_explicit(
+         &s->full_heads, &next, h,
+         memory_order_acq_rel, memory_order_acquire));
+
+    /* And if it actually should be partial, the cleanup will take care */
+    if (num_full < s->objs_per_slab)
+      ev_send(s->cleanup_ev_list, &s->event_clean);
+  }
+}
+
+
  /**
   * sl_free - return a free object back to a Slab
   * @s: slab
@@ -233,86 +765,129 @@ sl_free(void *oo)
  {
    struct sl_head *h = SL_GET_HEAD(oo);
    struct slab *s = h->slab;
-  ASSERT_DIE(DG_IS_LOCKED(resource_parent(&s->r)->domain));
  
  #ifdef POISON
    memset(oo, 0xdb, s->data_size);
  #endif
  
+  /* Find the position of the object in page */
    uint offset = oo - ((void *) h) - s->head_size;
    ASSERT_DIE(offset % s->obj_size == 0);
    uint pos = offset / s->obj_size;
    ASSERT_DIE(pos < s->objs_per_slab);
  
-  h->used_bits[pos / 32] &= ~(1 << (pos % 32));
+  /* Remove the corresponding bit from bitfield */
+  u32 mask = ~0;
+  mask -= 1 << (pos % 32);
+  atomic_fetch_and_explicit(&h->used_bits[pos / 32], mask, memory_order_acq_rel);
  
-  if ((h->num_full-- == s->objs_per_slab) && (h->state == slh_full))
-    SL_HEAD_CHANGE_STATE(s, h, full, partial);
-  else if (!h->num_full)
-    {
-      sl_head_rem_node(&s->partial_heads, h);
-      if (s->num_empty_heads >= MAX_EMPTY_HEADS)
-      {
-#ifdef POISON
-       memset(h, 0xde, page_size);
-#endif
-       free_page(h);
-      }
-      else
-       {
-         sl_head_add_head(&s->empty_heads, h);
-         h->state = slh_empty;
-         s->num_empty_heads++;
-       }
-    }
+  u16 num_full_before = atomic_fetch_sub_explicit(&h->num_full, 1, memory_order_acq_rel);
+
+  if ((num_full_before == s->objs_per_slab) || (num_full_before == 1))
+    ev_send(s->cleanup_ev_list, &s->event_clean);
  }
  
  static void
  slab_free(resource *r)
  {
+  /* At this point, only one thread manipulating the slab is expected */
    slab *s = (slab *) r;
  
-  WALK_TLIST_DELSAFE(sl_head, h, &s->empty_heads)
-    free_page(h);
-  WALK_TLIST_DELSAFE(sl_head, h, &s->partial_heads)
-    free_page(h);
-  WALK_TLIST_DELSAFE(sl_head, h, &s->full_heads)
-    free_page(h);
+  /* No more thread ends are relevant, we are ending anyway */
+  bird_thread_end_unregister(&s->thread_end);
+
+  /* Free partial heads */
+  struct sl_head *h = atomic_load_explicit(&s->partial_heads, memory_order_relaxed);
+  while (SL_GET_STATE(h) != slh_dummy)
+  {
+    struct sl_head *nh = atomic_load_explicit(&h->next, memory_order_relaxed);
+    sl_free_page(h);
+    h = nh;
+  }
+  atomic_store_explicit(&s->partial_heads, &slh_dummy_last_partial, memory_order_relaxed);
+
+  /* Free full heads */
+  h = atomic_load_explicit(&s->full_heads, memory_order_relaxed);
+  while (SL_GET_STATE(h) != slh_dummy)
+  {
+    struct sl_head *nh = atomic_load_explicit(&h->next, memory_order_relaxed);
+    sl_free_page(h);
+    h = nh;
+  }
+  atomic_store_explicit(&s->full_heads, &slh_dummy_last_full, memory_order_relaxed);
+
+  /* Free thread heads */
+  for (long unsigned int i = 0; i < page_size / (sizeof(struct sl_head *_Atomic)); i++)
+  {
+    struct sl_head *th = atomic_load_explicit(&s->threads_active_heads[i], memory_order_relaxed);
+    if (th)
+      sl_free_page(th);
+  }
  }
  
  static void
  slab_dump(struct dump_request *dreq, resource *r)
  {
+  /* This is expected to run from the same loop as sl_cleanup */
    slab *s = (slab *) r;
    int ec=0, pc=0, fc=0;
  
    RDUMP("(%d objs per %d bytes in page)\n",
        s->objs_per_slab, s->obj_size);
  
-  RDUMP("%*sempty:\n", dreq->indent+3, "");
-  WALK_TLIST(sl_head, h, &s->empty_heads)
+  /* Dump threads */
+  RDUMP("%*sthreads:\n", dreq->indent+3, "");
+  for (long unsigned int i = 0; i < (page_size / sizeof(struct sl_head * _Atomic)); i++)
    {
-    RDUMP("%*s%p\n", dreq->indent+6, "", h);
-    ec++;
+    struct sl_head *th = atomic_load_explicit(&s->threads_active_heads[i], memory_order_relaxed);
+    if (th)
+    {
+      /* There is no guarantee the head remains slh_thread, but it won't be freed. */
+      RDUMP("%*s%p (", dreq->indent+6, "", th);
+      for (uint i=1; i<=s->head_bitfield_len; i++)
+        RDUMP("%08x", atomic_load_explicit(&th->used_bits[s->head_bitfield_len-i], memory_order_relaxed));
+      RDUMP(")\n");
+      pc++;
+    }
    }
  
-  RDUMP("%*spartial:\n", dreq->indent+3, "");
-  WALK_TLIST(sl_head, h, &s->partial_heads)
+  /* Dump full heads */
+  RDUMP("%*sfull:\n", dreq->indent+3, "");
+  struct sl_head *h = atomic_load_explicit(&s->full_heads, memory_order_relaxed);
+  while (h!= &slh_dummy_last_full)
    {
      RDUMP("%*s%p (", dreq->indent+6, "", h);
      for (uint i=1; i<=s->head_bitfield_len; i++)
-      RDUMP("%08x", h->used_bits[s->head_bitfield_len-i]);
+      RDUMP("%08x", atomic_load_explicit(&h->used_bits[s->head_bitfield_len-i], memory_order_relaxed));
      RDUMP(")\n");
      pc++;
+    h = atomic_load_explicit(&h->next, memory_order_relaxed);
    }
  
-  RDUMP("%*sfull:\n", dreq->indent+3, "");
-  WALK_TLIST(sl_head, h, &s->full_heads)
+  /* Dump partial heads */
+  RDUMP("%*spartial:\n", dreq->indent+3, "");
+  h = atomic_load_explicit(&s->partial_heads, memory_order_relaxed);
+  while (h!= &slh_dummy_last_partial)
    {
-    RDUMP("%*s%p\n", dreq->indent+6, "", h);
-    fc++;
+    RDUMP("%*s%p (", dreq->indent+6, "", h);
+    for (uint i=1; i<=s->head_bitfield_len; i++)
+      RDUMP("%08x", atomic_load_explicit(&h->used_bits[s->head_bitfield_len-i], memory_order_relaxed));
+    RDUMP(")\n");
+    pc++;
+
+    h = atomic_load_explicit(&h->next, memory_order_relaxed);
+    enum sl_head_state a = SL_GET_STATE(h);
+
+    if (a != slh_partial && a == slh_dummy)
+      /* This is ugly. A head may have changed its state, but could not disappear.
+       * The next pointer is never nulled or made invalid. If the head has changed
+       * its state, it must be because of it was grabbed from partial_heads linked list.
+       * That is why we can be sure in partial_heads linked list are only
+       * heads we did not yet see in this loop. */
+      h = atomic_load_explicit(&s->partial_heads, memory_order_relaxed);
    }
-  RDUMP("%*sempty=%d partial=%d total=%d\n", dreq->indent+3, "", ec, pc, fc);
+
+  RDUMP("%*spartial=%d full=%d total=%d\n", dreq->indent+3, "", ec, pc, fc);
  }
  
  static struct resmem
@@ -321,19 +896,46 @@ slab_memsize(resource *r)
    slab *s = (slab *) r;
    size_t heads = 0;
  
-  WALK_TLIST(sl_head, h, &s->full_heads)
-    heads++;
-
    size_t items = heads * s->objs_per_slab;
  
-  WALK_TLIST(sl_head, h, &s->partial_heads)
+  /* Fullheads memsize */
+  struct sl_head *h = atomic_load_explicit(&s->full_heads, memory_order_relaxed);
+  while (h!= &slh_dummy_last_full)
    {
      heads++;
-    items += h->num_full;
+    items += atomic_load_explicit(&h->num_full, memory_order_relaxed);
+    h = atomic_load_explicit(&h->next, memory_order_relaxed);
    }
  
-  WALK_TLIST(sl_head, h, &s->empty_heads)
+  /* Partial heads memsize */
+  h = atomic_load_explicit(&s->partial_heads, memory_order_relaxed);
+  while (h!= &slh_dummy_last_partial)
+  {
      heads++;
+    items += atomic_load_explicit(&h->num_full, memory_order_relaxed);
+
+    h = atomic_load_explicit(&h->next, memory_order_relaxed);
+    enum sl_head_state a = SL_GET_STATE(h);
+
+    if (a != slh_partial && a == slh_dummy)
+      /* This is ugly. A head may have changed its state, but could not disappear.
+       * The next pointer is never nulled or made invalid. If the head has changed
+       * its state, it must be because of it was grabbed from partial_heads linked list.
+       * That is why we can be sure in partial_heads linked list are only
+       * heads we did not yet see in this loop. */
+      h = atomic_load_explicit(&s->partial_heads, memory_order_relaxed);
+  }
+
+  /* Thread heads memsize */
+  for (long unsigned int i = 0; i < (page_size / sizeof(struct sl_head * _Atomic)); i++)
+  {
+    struct sl_head *h = atomic_load_explicit(&s->threads_active_heads[i], memory_order_relaxed);
+    if (h)
+    {
+      items += atomic_load_explicit(&h->num_full, memory_order_relaxed);
+      heads++;
+    }
+  }
  
    size_t eff = items * s->data_size;
  
@@ -343,16 +945,37 @@ slab_memsize(resource *r)
    };
  }
  
+#if 0
+/* The lookup function is almost impossible to write well and actually
+ * we should look for different methods of debug, this is too clumsy.
+ * Probably an extension for GDB or so. --Maria */
  static resource *
  slab_lookup(resource *r, unsigned long a)
  {
    slab *s = (slab *) r;
  
-  WALK_TLIST(sl_head, h, &s->partial_heads)
+  struct sl_head *h = s->full_heads;
+  while (h!= &slh_dummy_last_full)
+  {
      if ((unsigned long) h < a && (unsigned long) h + page_size < a)
        return r;
-  WALK_TLIST(sl_head, h, &s->full_heads)
+    h = h->next;
+  }
+
+  h = s->partial_heads;
+  while (h!= &slh_dummy_last_partial)
+  {
      if ((unsigned long) h < a && (unsigned long) h + page_size < a)
        return r;
+    h = h->next;
+  }
+
+  for (long unsigned int i = 0; i < (page_size / sizeof(struct sl_head * _Atomic)); i++)
+  {
+    if (s->threads_active_heads[i])
+      if ((unsigned long) h < a && (unsigned long) h + page_size < a)
+        return r;
+  }
    return NULL;
  }
+#endif
diff --git a/lib/slab_test.c b/lib/slab_test.c

index 803d0215448cca92fa2090e7982161e1c6b9c66a..b378e01a0589f7585e6ab919b44a45e978c583d4 100644 (file)
--- a/lib/slab_test.c
+++ b/lib/slab_test.c
@@ -9,6 +9,8 @@
  #include "test/birdtest.h"
  #include "lib/resource.h"
  #include "lib/bitops.h"
+#include "lib/event.h"
+#include "lib/io-loop.h"
  
  static const int sizes[] = {
    8, 12, 18, 27, 41, 75, 131, 269,
@@ -44,8 +46,11 @@ static inline byte *test_alloc(slab *s, int sz, struct resmem *sliz)
      out[p] = p & 0xff;
  
    struct resmem ns = rmemsize((resource *) s);
-
+  if (sliz->effective + sz != ns.effective)
+    bug("bt_assert(sliz->effective + sz == ns.effective) %i %i %i", sliz->effective, sz, ns.effective);
    bt_assert(sliz->effective + sz == ns.effective);
+  if ((sliz->overhead - sz - ns.overhead) % page_size)
+    bug("(sliz->overhead - sz - ns.overhead) mod page_size %i %i %i %i", sliz->overhead, sz, ns.overhead, page_size);
    bt_assert((sliz->overhead - sz - ns.overhead) % page_size == 0);
  
    *sliz = ns;
@@ -65,7 +70,11 @@ static inline void test_free(slab *s, byte *block, int sz, struct resmem *sliz)
  
    struct resmem ns = rmemsize((resource *) s);
  
+  if (sliz->effective - sz != ns.effective)
+    bug("bt_assert(sliz->effective - sz == ns.effective) %i %i %i", sliz->effective, sz, ns.effective);
    bt_assert(sliz->effective - sz == ns.effective);
+  if ((sliz->overhead + sz - ns.overhead) % page_size)
+    bug("(sliz->overhead + sz - ns.overhead) mod page_size %i %i %i %i", sliz->overhead, sz, ns.overhead, page_size);
    bt_assert((sliz->overhead + sz - ns.overhead) % page_size == 0);
  
    *sliz = ns;
@@ -84,7 +93,7 @@ t_slab(const void *data)
    const struct test_request *tr = data;
    int sz = tr->size;
  
-  slab *s = sl_new(&root_pool, sz);
+  slab *s = sl_new(&root_pool, birdloop_event_list(&main_birdloop), sz);
    struct resmem sliz = get_memsize(s);
  
    int n = ITEMS(sz);
@@ -156,7 +165,7 @@ int main(int argc, char *argv[])
  
    struct test_request tr;
  
-  for (uint i = 0; i < sizeof(sizes) / sizeof(*sizes); i++)
+  for (uint i = 0; i < sizeof(sizes) / sizeof(*sizes); i++){
        for (uint strategy = TEST_FORWARDS; strategy < TEST__MAX; strategy++)
        {
         tr = (struct test_request) {
@@ -166,6 +175,7 @@ int main(int argc, char *argv[])
         bt_test_suite_arg(t_slab, &tr, "Slab allocator test, size=%d, strategy=%s",
             tr.size, strategy_name[strategy]);
        }
+      }
  
    return bt_exit_value();
  }
diff --git a/nest/iface.c b/nest/iface.c

index 4782b3efa1c3a38a80fe72244057526d1709c3aa..7641f098ac6a96d43ef72270aeecf3051da6c0c4 100644 (file)
--- a/nest/iface.c
+++ b/nest/iface.c
@@ -1035,9 +1035,9 @@ if_init(void)
    IFACE_LOCK;
    if_pool = rp_new(&root_pool, iface_domain.attrs, "Interfaces");
    init_list(&global_iface_list);
-  iface_sub_slab = sl_new(if_pool, sizeof(struct iface_notification));
+  iface_sub_slab = sl_new(if_pool, birdloop_event_list(&main_birdloop), sizeof(struct iface_notification));
    strcpy(default_vrf.name, "default");
-  neigh_init(if_pool);
+  neigh_init(if_pool, birdloop_event_list(&main_birdloop));
    IFACE_UNLOCK;
  }
  
diff --git a/nest/iface.h b/nest/iface.h

index a065a2263d5f4b8242aad2ed17f743ba6d8030f8..cc4e6692966dc00cbf6263b177c5fc6904ac681d 100644 (file)
--- a/nest/iface.h
+++ b/nest/iface.h
@@ -174,7 +174,7 @@ void neigh_if_down(struct iface *);
  void neigh_if_link(struct iface *);
  void neigh_ifa_up(struct ifa *a);
  void neigh_ifa_down(struct ifa *a);
-void neigh_init(struct pool *);
+void neigh_init(struct pool *, struct event_list *ev_l);
  
  void neigh_link(neighbor *);
  void neigh_unlink(neighbor *);
diff --git a/nest/mpls.c b/nest/mpls.c

index b81f82c1f5860a7df98fb6009b31479a0b651efe..a3a94fe15f9f0f1844f04e5783b3598212769c44 100644 (file)
--- a/nest/mpls.c
+++ b/nest/mpls.c
@@ -218,7 +218,7 @@ mpls_new_domain(struct mpls_domain_config *cf)
    m->name = cf->name;
    m->pool = p;
  
-  lmap_init(&m->labels, p);
+  lmap_init(&m->labels, p, birdloop_event_list(&main_birdloop));
    lmap_set(&m->labels, 0);
  
    init_list(&m->ranges);
@@ -946,7 +946,7 @@ mpls_slab(struct mpls_fec_map *m, uint type)
    int pos = type ? (type - 1) : 0;
  
    if (!m->slabs[pos])
-    m->slabs[pos] = sl_new(m->pool, sizeof(struct mpls_fec) + net_addr_length[pos + 1]);
+    m->slabs[pos] = sl_new(m->pool, birdloop_event_list(m->loop), sizeof(struct mpls_fec) + net_addr_length[pos + 1]);
  
    return m->slabs[pos];
  }
diff --git a/nest/neighbor.c b/nest/neighbor.c

index 6a8c9e2530ad7f1fb7833ce5a398dbfb624ca528..7c94d816b5e07afd4ad89e7c611886b43aa1bb68 100644 (file)
--- a/nest/neighbor.c
+++ b/nest/neighbor.c
@@ -619,9 +619,9 @@ neigh_ifa_down(struct ifa *a)
   * the neighbor cache module.
   */
  void
-neigh_init(pool *if_pool)
+neigh_init(pool *if_pool, struct event_list *ev_l)
  {
-  neigh_slab = sl_new(if_pool, sizeof(neighbor));
+  neigh_slab = sl_new(if_pool, ev_l, sizeof(neighbor));
  
    for(int i = 0; i < NEIGH_HASH_SIZE; i++)
      init_list(&neigh_hash_table[i]);
diff --git a/nest/rt-attr.c b/nest/rt-attr.c

index 2e5560c0c2087a0bbd8eb4b7bcba15e5fb1b2cfd..249bfae48f350505fa3c6f26bd852f199eadcb79 100644 (file)
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -227,9 +227,9 @@ static struct rte_src * _Atomic * _Atomic rte_src_global;
  static _Atomic uint rte_src_global_max;
  
  static void
-rte_src_init(void)
+rte_src_init(struct event_list *ev)
  {
-  rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src));
+  rte_src_slab = sl_new(rta_pool, ev, sizeof(struct rte_src));
  
    uint gmax = SRC_ID_INIT_SIZE * 32;
    struct rte_src * _Atomic *g = mb_alloc(rta_pool, sizeof(struct rte_src * _Atomic) * gmax);
@@ -269,10 +269,11 @@ rt_get_source_o(struct rte_owner *p, u64 id)
      return src;
    }
  
-  RTA_LOCK;
    src = sl_allocz(rte_src_slab);
    src->owner = p;
    src->private_id = id;
+
+  RTA_LOCK;
    src->global_id = idm_alloc(&src_ids);
  
    lfuc_init(&src->uc);
@@ -1734,11 +1735,11 @@ rta_init(void)
    rta_pool = rp_new(&root_pool, attrs_domain.attrs, "Attributes");
  
    for (uint i=0; i<ARRAY_SIZE(ea_slab_sizes); i++)
-    ea_slab[i] = sl_new(rta_pool, ea_slab_sizes[i]);
+    ea_slab[i] = sl_new(rta_pool, birdloop_event_list(&main_birdloop), ea_slab_sizes[i]);
  
    SPINHASH_INIT(rta_hash_table, RTAH, rta_pool, &global_work_list);
  
-  rte_src_init();
+  rte_src_init(birdloop_event_list(&main_birdloop));
    ea_class_init();
  
    RTA_UNLOCK;
diff --git a/nest/rt-fib.c b/nest/rt-fib.c

index 688d0b96e83776c544a7bf67e87b74815197aee9..e27494677d013d347a6f956bbc7db83c28f95b53 100644 (file)
--- a/nest/rt-fib.c
+++ b/nest/rt-fib.c
@@ -148,14 +148,14 @@ static inline u32 fib_hash(struct fib *f, const net_addr *a);
   * This function initializes a newly allocated FIB and prepares it for use.
   */
  void
-fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init)
+fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init, struct event_list *ev_l)
  {
    uint addr_length = net_addr_length[addr_type];
  
    if (!hash_order)
      hash_order = HASH_DEF_ORDER;
    f->fib_pool = p;
-  f->fib_slab = addr_length ? sl_new(p, node_size + addr_length) : NULL;
+  f->fib_slab = addr_length ? sl_new(p, ev_l, node_size + addr_length) : NULL;
    f->addr_type = addr_type;
    f->node_size = node_size;
    f->node_offset = node_offset;
@@ -730,7 +730,7 @@ int main(void)
  
    log_init_debug(NULL);
    resource_init();
-  fib_init(&f, &root_pool, sizeof(struct fib_node), 4, init);
+  fib_init(&f, &root_pool, sizeof(struct fib_node), 4, init, birdloop_event_list(&bird_loop));
    dump("init");
  
    a = ipa_from_u32(0x01020304); n = fib_get(&f, &a, 32);
diff --git a/nest/rt-fib_test.c b/nest/rt-fib_test.c

index d9d7b6566f0e848a058602e380500abe7f61bfbd..14b2aef7dbec66624d6438037b99c88e1797bb00 100644 (file)
--- a/nest/rt-fib_test.c
+++ b/nest/rt-fib_test.c
@@ -41,7 +41,7 @@ t_match_random_net(void)
  
      /* Make FIB structure */
      struct fib f;
-    fib_init(&f, &root_pool, type, sizeof(struct test_node), OFFSETOF(struct test_node, n), 4, NULL);
+    fib_init(&f, &root_pool, type, sizeof(struct test_node), OFFSETOF(struct test_node, n), 4, NULL, birdloop_event_list(&main_birdloop));
  
      for (int i = 0; i < PREFIXES_NUM; i++)
      {
@@ -91,7 +91,7 @@ t_fib_walk(void)
  
      /* Make FIB structure */
      struct fib f;
-    fib_init(&f, p, type, sizeof(struct test_node), OFFSETOF(struct test_node, n), 4, NULL);
+    fib_init(&f, p, type, sizeof(struct test_node), OFFSETOF(struct test_node, n), 4, NULL, birdloop_event_list(&main_birdloop));
  
      for (int i = 1; i < PREFIXES_NUM; i++)
      {
@@ -143,7 +143,7 @@ benchmark_fib_dataset(const char *filename, int type)
  
    /* Make FIB structure */
    struct fib f;
-  fib_init(&f, p, type, sizeof(struct test_node), OFFSETOF(struct test_node, n), 0, NULL);
+  fib_init(&f, p, type, sizeof(struct test_node), OFFSETOF(struct test_node, n), 0, NULL, birdloop_event_list(&main_birdloop));
  
    for (int i = 0; i < (int) n; i++)
    {
diff --git a/nest/rt-table.c b/nest/rt-table.c

index 78d344448dcb5a78453873ee0ef66e0ff8ae3814..27c8c2c6e5bbafcfbff4eb8546c93f0ad6f937d5 100644 (file)
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -3370,7 +3370,7 @@ rt_setup(pool *pp, struct rtable_config *cf)
    t->loop = loop;
    t->lock = dom;
  
-  t->rte_slab = sl_new(p, sizeof(struct rte_storage));
+  t->rte_slab = sl_new(p, birdloop_event_list(t->loop), sizeof(struct rte_storage));
  
    t->name = cf->name;
    t->config = cf;
@@ -5051,7 +5051,7 @@ rt_init_hostcache(struct rtable_private *tab)
  
    hc->hash_items = 0;
    hc_alloc_table(hc, tab->rp, HC_DEF_ORDER);
-  hc->slab = sl_new(tab->rp, sizeof(struct hostentry));
+  hc->slab = sl_new(tab->rp, birdloop_event_list(tab->loop), sizeof(struct hostentry));
  
    hc->lp = lp_new(tab->rp);
    hc->trie = f_new_trie(hc->lp, 0);
diff --git a/proto/aggregator/aggregator.c b/proto/aggregator/aggregator.c

index 70dd1dac638a2f18232a2f9019f82060a3faf8c3..660ab0e3473e1f773d1f84111d02b4a813c9550d 100644 (file)
--- a/proto/aggregator/aggregator.c
+++ b/proto/aggregator/aggregator.c
@@ -379,10 +379,10 @@ aggregator_start(struct proto *P)
  {
    SKIP_BACK_DECLARE(struct aggregator_proto, p, p, P);
  
-  p->bucket_slab = sl_new(P->pool, sizeof(struct aggregator_bucket) + AGGR_DATA_MEMSIZE);
+  p->bucket_slab = sl_new(P->pool, birdloop_event_list(P->loop), sizeof(struct aggregator_bucket) + AGGR_DATA_MEMSIZE);
    HASH_INIT(p->buckets, P->pool, AGGR_BUCK_ORDER);
  
-  p->route_slab = sl_new(P->pool, sizeof(struct aggregator_route));
+  p->route_slab = sl_new(P->pool, birdloop_event_list(P->loop), sizeof(struct aggregator_route));
    HASH_INIT(p->routes, P->pool, AGGR_RTE_ORDER);
  
    p->reload_buckets = (event) {
diff --git a/proto/babel/babel.c b/proto/babel/babel.c

index 65f19c4a96c573f208c61acd7381a359ba9ef5e1..4ece7bb2153116498123671bed0e41660f399fc0 100644 (file)
--- a/proto/babel/babel.c
+++ b/proto/babel/babel.c
@@ -2584,9 +2584,9 @@ babel_start(struct proto *P)
    u8 ip6_type = cf->ip6_channel ? cf->ip6_channel->net_type : NET_IP6;
  
    fib_init(&p->ip4_rtable, P->pool, NET_IP4, sizeof(struct babel_entry),
-          OFFSETOF(struct babel_entry, n), 0, babel_init_entry);
+          OFFSETOF(struct babel_entry, n), 0, babel_init_entry, birdloop_event_list(P->loop));
    fib_init(&p->ip6_rtable, P->pool, ip6_type, sizeof(struct babel_entry),
-          OFFSETOF(struct babel_entry, n), 0, babel_init_entry);
+          OFFSETOF(struct babel_entry, n), 0, babel_init_entry, birdloop_event_list(P->loop));
  
    init_list(&p->interfaces);
    p->timer = tm_new_init(P->pool, babel_timer, p, 1 S, 0);
@@ -2597,10 +2597,10 @@ babel_start(struct proto *P)
    if (cf->randomize_router_id)
      babel_randomize_router_id(p);
  
-  p->route_slab = sl_new(P->pool, sizeof(struct babel_route));
-  p->source_slab = sl_new(P->pool, sizeof(struct babel_source));
-  p->msg_slab = sl_new(P->pool, sizeof(struct babel_msg_node));
-  p->seqno_slab = sl_new(P->pool, sizeof(struct babel_seqno_request));
+  p->route_slab = sl_new(P->pool, birdloop_event_list(P->loop), sizeof(struct babel_route));
+  p->source_slab = sl_new(P->pool, birdloop_event_list(P->loop), sizeof(struct babel_source));
+  p->msg_slab = sl_new(P->pool, birdloop_event_list(P->loop), sizeof(struct babel_msg_node));
+  p->seqno_slab = sl_new(P->pool, birdloop_event_list(P->loop), sizeof(struct babel_seqno_request));
  
    p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 };
  
diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c

index 4997f803ac6081c90442aa0f482ecd0ae2ae74e3..cf33d62d61bd5049d1e716649bd9ef63447bef3b 100644 (file)
--- a/proto/bfd/bfd.c
+++ b/proto/bfd/bfd.c
@@ -1124,7 +1124,7 @@ bfd_start(struct proto *P)
  
    p->tpool = birdloop_pool(P->loop);
  
-  p->session_slab = sl_new(P->pool, sizeof(struct bfd_session));
+  p->session_slab = sl_new(P->pool, birdloop_event_list(P->loop), sizeof(struct bfd_session));
    HASH_INIT(p->session_hash_id, P->pool, 8);
    HASH_INIT(p->session_hash_ip, P->pool, 8);
  
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c

index ed4ded9d53eba120527d91a71d5aed555a774433..cabfc610c25903cf84464fd377aeb8c11ca8c150 100644 (file)
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -1820,10 +1820,10 @@ bgp_withdraw_bucket(struct bgp_ptx_private *c, struct bgp_bucket *b)
  HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix);
  
  static void
-bgp_init_prefix_table(struct bgp_ptx_private *c)
+bgp_init_prefix_table(struct bgp_ptx_private *c, struct event_list *ev_l)
  {
    ASSERT_DIE(!c->prefix_slab);
-  c->prefix_slab = sl_new(c->pool, sizeof(struct bgp_prefix));
+  c->prefix_slab = sl_new(c->pool, ev_l, sizeof(struct bgp_prefix));
  
    HASH_INIT(c->prefix_hash, c->pool, 8);
  }
@@ -2089,7 +2089,7 @@ bgp_init_pending_tx(struct bgp_channel *c)
    bpp->c = c;
  
    bgp_init_bucket_table(bpp);
-  bgp_init_prefix_table(bpp);
+  bgp_init_prefix_table(bpp, birdloop_event_list(c->c.proto->loop));
  
    bpp->exporter = (struct rt_exporter) {
      .journal = {
diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c

index e10be62252f235cafcbd60b4817858badad6ab68..e3ceed8f71b7693f534ce5a5d5f3aaaf0d68a809 100644 (file)
--- a/proto/ospf/ospf.c
+++ b/proto/ospf/ospf.c
@@ -123,9 +123,9 @@ add_area_nets(struct ospf_area *oa, struct ospf_area_config *ac)
    struct area_net *an;
  
    fib_init(&oa->net_fib,  p->p.pool, ospf_get_af(p),
-          sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL);
+          sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL, birdloop_event_list(p->p.loop));
    fib_init(&oa->enet_fib, p->p.pool, ospf_get_af(p),
-          sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL);
+          sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL, birdloop_event_list(p->p.loop));
  
    WALK_LIST(anc, ac->net_list)
    {
@@ -166,7 +166,7 @@ ospf_area_add(struct ospf_proto *p, struct ospf_area_config *ac)
    oa->areaid = ac->areaid;
    oa->rt = NULL;
    oa->po = p;
-  fib_init(&oa->rtr, p->p.pool, NET_IP4, sizeof(ort), OFFSETOF(ort, fn), 0, NULL);
+  fib_init(&oa->rtr, p->p.pool, NET_IP4, sizeof(ort), OFFSETOF(ort, fn), 0, NULL, birdloop_event_list(p->p.loop));
    add_area_nets(oa, ac);
  
    if (oa->areaid == 0)
@@ -303,7 +303,7 @@ ospf_start(struct proto *P)
    p->nhpool = lp_new(P->pool);
    init_list(&(p->iface_list));
    init_list(&(p->area_list));
-  fib_init(&p->rtf, P->pool, ospf_get_af(p), sizeof(ort), OFFSETOF(ort, fn), 0, NULL);
+  fib_init(&p->rtf, P->pool, ospf_get_af(p), sizeof(ort), OFFSETOF(ort, fn), 0, NULL, birdloop_event_list(P->loop));
    if (ospf_is_v3(p))
      idm_init(&p->idm, P->pool, 16);
    p->areano = 0;
diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c

index 402afcd57a1fe9381864840392b55626cfd99afe..945a8266e2b5adcf6d92a6927bbe9a6bb622c75d 100644 (file)
--- a/proto/ospf/topology.c
+++ b/proto/ospf/topology.c
@@ -1986,7 +1986,7 @@ ospf_top_new(struct ospf_proto *p, pool *pool)
  
    f = mb_allocz(pool, sizeof(struct top_graph));
    f->pool = pool;
-  f->hash_slab = sl_new(f->pool, sizeof(struct top_hash_entry));
+  f->hash_slab = sl_new(f->pool, birdloop_event_list(p->p.loop), sizeof(struct top_hash_entry));
    f->hash_order = HASH_DEF_ORDER;
    ospf_top_ht_alloc(f);
    f->hash_entries = 0;
diff --git a/proto/radv/radv.c b/proto/radv/radv.c

index f574565338de90129413753eb0def8e912d93d11..9388d6bba6880008ba85a79d27e51c1af9d6ab54 100644 (file)
--- a/proto/radv/radv.c
+++ b/proto/radv/radv.c
@@ -597,7 +597,7 @@ radv_set_fib(struct radv_proto *p, int up)
  
    if (up)
      fib_init(&p->routes, p->p.pool, NET_IP6, sizeof(struct radv_route),
-            OFFSETOF(struct radv_route, n), 4, NULL);
+            OFFSETOF(struct radv_route, n), 4, NULL, birdloop_event_list(p->p.loop));
    else
      fib_free(&p->routes);
  
diff --git a/proto/rip/rip.c b/proto/rip/rip.c

index ce8436156c149b173969925fe520fd14a339e22b..bf95915dc02fc2e5025619921e877d56105b44b5 100644 (file)
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -1185,8 +1185,8 @@ rip_start(struct proto *P)
  
    init_list(&p->iface_list);
    fib_init(&p->rtable, P->pool, cf->rip2 ? NET_IP4 : NET_IP6,
-          sizeof(struct rip_entry), OFFSETOF(struct rip_entry, n), 0, NULL);
-  p->rte_slab = sl_new(P->pool, sizeof(struct rip_rte));
+          sizeof(struct rip_entry), OFFSETOF(struct rip_entry, n), 0, NULL, birdloop_event_list(P->loop));
+  p->rte_slab = sl_new(P->pool, birdloop_event_list(P->loop), sizeof(struct rip_rte));
    p->timer = tm_new_init(P->pool_up, rip_timer, p, 0, 0);
  
    p->rip2 = cf->rip2;
author	Katerina Kubecova <katerina.kubecova@nic.cz>
	Wed, 18 Dec 2024 11:18:17 +0000 (12:18 +0100)
committer	Maria Matejka <mq@ucw.cz>
	Wed, 19 Feb 2025 12:52:16 +0000 (13:52 +0100)
lib/bitmap.c		patch \| blob \| blame \| history
lib/bitmap.h		patch \| blob \| blame \| history
lib/bitmap_test.c		patch \| blob \| blame \| history
lib/fib.h		patch \| blob \| blame \| history
lib/netindex.c		patch \| blob \| blame \| history
lib/resource.h		patch \| blob \| blame \| history
lib/slab.c		patch \| blob \| blame \| history
lib/slab_test.c		patch \| blob \| blame \| history
nest/iface.c		patch \| blob \| blame \| history
nest/iface.h		patch \| blob \| blame \| history
nest/mpls.c		patch \| blob \| blame \| history
nest/neighbor.c		patch \| blob \| blame \| history
nest/rt-attr.c		patch \| blob \| blame \| history
nest/rt-fib.c		patch \| blob \| blame \| history
nest/rt-fib_test.c		patch \| blob \| blame \| history
nest/rt-table.c		patch \| blob \| blame \| history
proto/aggregator/aggregator.c		patch \| blob \| blame \| history
proto/babel/babel.c		patch \| blob \| blame \| history
proto/bfd/bfd.c		patch \| blob \| blame \| history
proto/bgp/attrs.c		patch \| blob \| blame \| history
proto/ospf/ospf.c		patch \| blob \| blame \| history
proto/ospf/topology.c		patch \| blob \| blame \| history
proto/radv/radv.c		patch \| blob \| blame \| history
proto/rip/rip.c		patch \| blob \| blame \| history