void *object;
};
+/* Structure holding parameters for get_partial_node_bulk() */
+struct partial_bulk_context {
+ gfp_t flags;
+ unsigned int min_objects;
+ unsigned int max_objects;
+ struct list_head slabs;
+};
+
static inline bool kmem_cache_debug(struct kmem_cache *s)
{
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
if (slab->freelist == old->freelist &&
slab->counters == old->counters) {
slab->freelist = new->freelist;
- slab->counters = new->counters;
+ /* prevent tearing for the read in get_partial_node_bulk() */
+ WRITE_ONCE(slab->counters, new->counters);
ret = true;
}
slab_unlock(slab);
stat(s, SHEAF_FREE);
}
-static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
- size_t size, void **p);
-
+static unsigned int
+__refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
+ unsigned int max);
static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf,
gfp_t gfp)
if (!to_fill)
return 0;
- filled = __kmem_cache_alloc_bulk(s, gfp, to_fill,
- &sheaf->objects[sheaf->size]);
+ filled = __refill_objects(s, &sheaf->objects[sheaf->size], gfp,
+ to_fill, to_fill);
sheaf->size += filled;
#endif
static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags);
+static bool get_partial_node_bulk(struct kmem_cache *s,
+ struct kmem_cache_node *n,
+ struct partial_bulk_context *pc)
+{
+ struct slab *slab, *slab2;
+ unsigned int total_free = 0;
+ unsigned long flags;
+
+ /* Racy check to avoid taking the lock unnecessarily. */
+ if (!n || data_race(!n->nr_partial))
+ return false;
+
+ INIT_LIST_HEAD(&pc->slabs);
+
+ spin_lock_irqsave(&n->list_lock, flags);
+
+ list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) {
+ struct freelist_counters flc;
+ unsigned int slab_free;
+
+ if (!pfmemalloc_match(slab, pc->flags))
+ continue;
+
+ /*
+ * determine the number of free objects in the slab racily
+ *
+ * slab_free is a lower bound due to possible subsequent
+ * concurrent freeing, so the caller may get more objects than
+ * requested and must handle that
+ */
+ flc.counters = data_race(READ_ONCE(slab->counters));
+ slab_free = flc.objects - flc.inuse;
+
+ /* we have already min and this would get us over the max */
+ if (total_free >= pc->min_objects
+ && total_free + slab_free > pc->max_objects)
+ break;
+
+ remove_partial(n, slab);
+
+ list_add(&slab->slab_list, &pc->slabs);
+
+ total_free += slab_free;
+ if (total_free >= pc->max_objects)
+ break;
+ }
+
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return total_free > 0;
+}
+
/*
* Try to allocate a partial slab from a specific node.
*/
return old.freelist;
}
+/*
+ * Get the slab's freelist and do not freeze it.
+ *
+ * Assumes the slab is isolated from node partial list and not frozen.
+ *
+ * Assumes this is performed only for caches without debugging so we
+ * don't need to worry about adding the slab to the full list.
+ */
+static inline void *get_freelist_nofreeze(struct kmem_cache *s, struct slab *slab)
+{
+ struct freelist_counters old, new;
+
+ do {
+ old.freelist = slab->freelist;
+ old.counters = slab->counters;
+
+ new.freelist = NULL;
+ new.counters = old.counters;
+ VM_WARN_ON_ONCE(new.frozen);
+
+ new.inuse = old.objects;
+
+ } while (!slab_update_freelist(s, slab, &old, &new, "get_freelist_nofreeze"));
+
+ return old.freelist;
+}
+
/*
* Freeze the partial slab and return the pointer to the freelist.
*/
return old.freelist;
}
+/*
+ * If the object has been wiped upon free, make sure it's fully initialized by
+ * zeroing out freelist pointer.
+ *
+ * Note that we also wipe custom freelist pointers.
+ */
+static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
+ void *obj)
+{
+ if (unlikely(slab_want_init_on_free(s)) && obj &&
+ !freeptr_outside_object(s))
+ memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
+ 0, sizeof(void *));
+}
+
+static unsigned int alloc_from_new_slab(struct kmem_cache *s, struct slab *slab,
+ void **p, unsigned int count, bool allow_spin)
+{
+ unsigned int allocated = 0;
+ struct kmem_cache_node *n;
+ bool needs_add_partial;
+ unsigned long flags;
+ void *object;
+
+ /*
+ * Are we going to put the slab on the partial list?
+ * Note slab->inuse is 0 on a new slab.
+ */
+ needs_add_partial = (slab->objects > count);
+
+ if (!allow_spin && needs_add_partial) {
+
+ n = get_node(s, slab_nid(slab));
+
+ if (!spin_trylock_irqsave(&n->list_lock, flags)) {
+ /* Unlucky, discard newly allocated slab */
+ defer_deactivate_slab(slab, NULL);
+ return 0;
+ }
+ }
+
+ object = slab->freelist;
+ while (object && allocated < count) {
+ p[allocated] = object;
+ object = get_freepointer(s, object);
+ maybe_wipe_obj_freeptr(s, p[allocated]);
+
+ slab->inuse++;
+ allocated++;
+ }
+ slab->freelist = object;
+
+ if (needs_add_partial) {
+
+ if (allow_spin) {
+ n = get_node(s, slab_nid(slab));
+ spin_lock_irqsave(&n->list_lock, flags);
+ }
+ add_partial(n, slab, DEACTIVATE_TO_HEAD);
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ }
+
+ inc_slabs_node(s, slab_nid(slab), slab->objects);
+ return allocated;
+}
+
/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
return object;
}
-/*
- * If the object has been wiped upon free, make sure it's fully initialized by
- * zeroing out freelist pointer.
- *
- * Note that we also wipe custom freelist pointers.
- */
-static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
- void *obj)
-{
- if (unlikely(slab_want_init_on_free(s)) && obj &&
- !freeptr_outside_object(s))
- memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
- 0, sizeof(void *));
-}
-
static __fastpath_inline
struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
{
return ret;
}
+static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p);
+
/*
* returns a sheaf that has at least the requested size
* when prefilling is needed, do so with given gfp flags
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
+static unsigned int
+__refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
+ unsigned int max)
+{
+ struct partial_bulk_context pc;
+ struct slab *slab, *slab2;
+ unsigned int refilled = 0;
+ unsigned long flags;
+ void *object;
+ int node;
+
+ pc.flags = gfp;
+ pc.min_objects = min;
+ pc.max_objects = max;
+
+ node = numa_mem_id();
+
+ if (WARN_ON_ONCE(!gfpflags_allow_spinning(gfp)))
+ return 0;
+
+ /* TODO: consider also other nodes? */
+ if (!get_partial_node_bulk(s, get_node(s, node), &pc))
+ goto new_slab;
+
+ list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) {
+
+ list_del(&slab->slab_list);
+
+ object = get_freelist_nofreeze(s, slab);
+
+ while (object && refilled < max) {
+ p[refilled] = object;
+ object = get_freepointer(s, object);
+ maybe_wipe_obj_freeptr(s, p[refilled]);
+
+ refilled++;
+ }
+
+ /*
+ * Freelist had more objects than we can accommodate, we need to
+ * free them back. We can treat it like a detached freelist, just
+ * need to find the tail object.
+ */
+ if (unlikely(object)) {
+ void *head = object;
+ void *tail;
+ int cnt = 0;
+
+ do {
+ tail = object;
+ cnt++;
+ object = get_freepointer(s, object);
+ } while (object);
+ do_slab_free(s, slab, head, tail, cnt, _RET_IP_);
+ }
+
+ if (refilled >= max)
+ break;
+ }
+
+ if (unlikely(!list_empty(&pc.slabs))) {
+ struct kmem_cache_node *n = get_node(s, node);
+
+ spin_lock_irqsave(&n->list_lock, flags);
+
+ list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) {
+
+ if (unlikely(!slab->inuse && n->nr_partial >= s->min_partial))
+ continue;
+
+ list_del(&slab->slab_list);
+ add_partial(n, slab, DEACTIVATE_TO_HEAD);
+ }
+
+ spin_unlock_irqrestore(&n->list_lock, flags);
+
+ /* any slabs left are completely free and for discard */
+ list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) {
+
+ list_del(&slab->slab_list);
+ discard_slab(s, slab);
+ }
+ }
+
+
+ if (likely(refilled >= min))
+ goto out;
+
+new_slab:
+
+ slab = new_slab(s, pc.flags, node);
+ if (!slab)
+ goto out;
+
+ stat(s, ALLOC_SLAB);
+
+ /*
+ * TODO: possible optimization - if we know we will consume the whole
+ * slab we might skip creating the freelist?
+ */
+ refilled += alloc_from_new_slab(s, slab, p + refilled, max - refilled,
+ /* allow_spin = */ true);
+
+ if (refilled < min)
+ goto new_slab;
+out:
+
+ return refilled;
+}
+
static inline
int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)