#define PERCPU_FREE_TARGET (4)
#define PERCPU_NR_SCANS PERCPU_FREE_TARGET
-/* Helpers to get the local list index */
-#define LOCAL_LIST_IDX(t) ((t) - BPF_LOCAL_LIST_T_OFFSET)
-#define LOCAL_FREE_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE)
-#define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING)
#define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET)
-/* Local list helpers */
-static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l)
-{
- return &loc_l->lists[LOCAL_FREE_LIST_IDX];
-}
-
-static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
-{
- return &loc_l->lists[LOCAL_PENDING_LIST_IDX];
-}
-
/* bpf_lru_node helpers */
static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
{
bpf_lru_list_count_dec(l, node->type);
node->type = tgt_free_type;
+ WRITE_ONCE(node->pending_free, 0);
list_move(&node->list, free_list);
}
bpf_lru_list_count_inc(l, tgt_type);
node->type = tgt_type;
bpf_lru_node_clear_ref(node);
+ /* Reset pending_free only when moving to the free list */
+ if (tgt_type == BPF_LRU_LIST_T_FREE)
+ WRITE_ONCE(node->pending_free, 0);
list_move(&node->list, &l->lists[tgt_type]);
}
unsigned int i = 0;
list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) {
- if (bpf_lru_node_is_ref(node)) {
+ if (bpf_lru_node_is_ref(node) &&
+ !READ_ONCE(node->pending_free)) {
__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
- } else if (lru->del_from_htab(lru->del_arg, node)) {
+ } else if (READ_ONCE(node->pending_free) ||
+ lru->del_from_htab(lru->del_arg, node)) {
__bpf_lru_node_move_to_free(l, node, free_list,
tgt_free_type);
if (++nshrinked == tgt_nshrink)
list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list,
list) {
- if (lru->del_from_htab(lru->del_arg, node)) {
+ if (READ_ONCE(node->pending_free) ||
+ lru->del_from_htab(lru->del_arg, node)) {
__bpf_lru_node_move_to_free(l, node, free_list,
tgt_free_type);
return 1;
struct bpf_lru_node *node, *tmp_node;
list_for_each_entry_safe_reverse(node, tmp_node,
- local_pending_list(loc_l), list) {
- if (bpf_lru_node_is_ref(node))
+ &loc_l->pending_list, list) {
+ if (READ_ONCE(node->pending_free))
+ __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_FREE);
+ else if (bpf_lru_node_is_ref(node))
__bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE);
else
__bpf_lru_node_move_in(l, node,
if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
return;
- raw_spin_lock_irqsave(&l->lock, flags);
+ if (raw_res_spin_lock_irqsave(&l->lock, flags)) {
+ WRITE_ONCE(node->pending_free, 1);
+ return;
+ }
__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
- raw_spin_unlock_irqrestore(&l->lock, flags);
+ raw_res_spin_unlock_irqrestore(&l->lock, flags);
}
static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru,
struct bpf_lru_list *l = &lru->common_lru.lru_list;
struct bpf_lru_node *node, *tmp_node;
unsigned int nfree = 0;
+ LIST_HEAD(tmp_free);
- raw_spin_lock(&l->lock);
+ if (raw_res_spin_lock(&l->lock))
+ return;
__local_list_flush(l, loc_l);
list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE],
list) {
- __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l),
+ __bpf_lru_node_move_to_free(l, node, &tmp_free,
BPF_LRU_LOCAL_LIST_T_FREE);
if (++nfree == lru->target_free)
break;
if (nfree < lru->target_free)
__bpf_lru_list_shrink(lru, l, lru->target_free - nfree,
- local_free_list(loc_l),
+ &tmp_free,
BPF_LRU_LOCAL_LIST_T_FREE);
- raw_spin_unlock(&l->lock);
+ raw_res_spin_unlock(&l->lock);
+
+ /*
+ * Transfer the harvested nodes from the temporary list_head into
+ * the lockless per-CPU free llist.
+ */
+ list_for_each_entry_safe(node, tmp_node, &tmp_free, list) {
+ list_del(&node->list);
+ llist_add(&node->llist, &loc_l->free_llist);
+ }
}
static void __local_list_add_pending(struct bpf_lru *lru,
*(u32 *)((void *)node + lru->hash_offset) = hash;
node->cpu = cpu;
node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
+ WRITE_ONCE(node->pending_free, 0);
bpf_lru_node_clear_ref(node);
- list_add(&node->list, local_pending_list(loc_l));
+ list_add(&node->list, &loc_l->pending_list);
}
static struct bpf_lru_node *
__local_list_pop_free(struct bpf_lru_locallist *loc_l)
{
- struct bpf_lru_node *node;
+ struct llist_node *llnode;
- node = list_first_entry_or_null(local_free_list(loc_l),
- struct bpf_lru_node,
- list);
- if (node)
- list_del(&node->list);
+ llnode = llist_del_first(&loc_l->free_llist);
+ if (!llnode)
+ return NULL;
- return node;
+ return container_of(llnode, struct bpf_lru_node, llist);
}
static struct bpf_lru_node *
ignore_ref:
/* Get from the tail (i.e. older element) of the pending list. */
- list_for_each_entry_reverse(node, local_pending_list(loc_l),
- list) {
+ list_for_each_entry_reverse(node, &loc_l->pending_list, list) {
if ((!bpf_lru_node_is_ref(node) || force) &&
- lru->del_from_htab(lru->del_arg, node)) {
+ (READ_ONCE(node->pending_free) ||
+ lru->del_from_htab(lru->del_arg, node))) {
list_del(&node->list);
return node;
}
l = per_cpu_ptr(lru->percpu_lru, cpu);
- raw_spin_lock_irqsave(&l->lock, flags);
+ if (raw_res_spin_lock_irqsave(&l->lock, flags))
+ return NULL;
__bpf_lru_list_rotate(lru, l);
__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
}
- raw_spin_unlock_irqrestore(&l->lock, flags);
+ raw_res_spin_unlock_irqrestore(&l->lock, flags);
return node;
}
loc_l = per_cpu_ptr(clru->local_list, cpu);
- raw_spin_lock_irqsave(&loc_l->lock, flags);
+ if (raw_res_spin_lock_irqsave(&loc_l->lock, flags))
+ return NULL;
node = __local_list_pop_free(loc_l);
if (!node) {
if (node)
__local_list_add_pending(lru, loc_l, cpu, node, hash);
- raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+ raw_res_spin_unlock_irqrestore(&loc_l->lock, flags);
if (node)
return node;
- /* No free nodes found from the local free list and
+ /*
+ * No free nodes found from the local free list and
* the global LRU list.
*
* Steal from the local free/pending list of the
* current CPU and remote CPU in RR. It starts
* with the loc_l->next_steal CPU.
+ *
+ * Acquire the victim's lock before touching either list. On
+ * acquisition failure (rqspinlock AA or timeout) skip the victim
+ * and try the next CPU.
*/
first_steal = loc_l->next_steal;
do {
steal_loc_l = per_cpu_ptr(clru->local_list, steal);
- raw_spin_lock_irqsave(&steal_loc_l->lock, flags);
-
- node = __local_list_pop_free(steal_loc_l);
- if (!node)
- node = __local_list_pop_pending(lru, steal_loc_l);
-
- raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags);
+ if (!raw_res_spin_lock_irqsave(&steal_loc_l->lock, flags)) {
+ node = __local_list_pop_free(steal_loc_l);
+ if (!node)
+ node = __local_list_pop_pending(lru, steal_loc_l);
+ raw_res_spin_unlock_irqrestore(&steal_loc_l->lock, flags);
+ }
steal = cpumask_next_wrap(steal, cpu_possible_mask);
} while (!node && steal != first_steal);
loc_l->next_steal = steal;
- if (node) {
- raw_spin_lock_irqsave(&loc_l->lock, flags);
- __local_list_add_pending(lru, loc_l, cpu, node, hash);
- raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+ if (!node)
+ return NULL;
+
+ if (raw_res_spin_lock_irqsave(&loc_l->lock, flags)) {
+ /*
+ * The local pending lock can't be acquired (rqspinlock AA
+ * or timeout). Return the stolen node to the per-CPU
+ * free_llist instead of orphaning it; the next pop_free on
+ * this CPU will pick it up.
+ */
+ node->type = BPF_LRU_LOCAL_LIST_T_FREE;
+ bpf_lru_node_clear_ref(node);
+ WRITE_ONCE(node->pending_free, 0);
+ llist_add(&node->llist, &loc_l->free_llist);
+ return NULL;
}
+ __local_list_add_pending(lru, loc_l, cpu, node, hash);
+ raw_res_spin_unlock_irqrestore(&loc_l->lock, flags);
return node;
}
loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu);
- raw_spin_lock_irqsave(&loc_l->lock, flags);
+ if (raw_res_spin_lock_irqsave(&loc_l->lock, flags)) {
+ WRITE_ONCE(node->pending_free, 1);
+ return;
+ }
if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) {
- raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+ raw_res_spin_unlock_irqrestore(&loc_l->lock,
+ flags);
goto check_lru_list;
}
node->type = BPF_LRU_LOCAL_LIST_T_FREE;
bpf_lru_node_clear_ref(node);
- list_move(&node->list, local_free_list(loc_l));
+ list_del(&node->list);
+
+ raw_res_spin_unlock_irqrestore(&loc_l->lock, flags);
- raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+ llist_add(&node->llist, &loc_l->free_llist);
return;
}
l = per_cpu_ptr(lru->percpu_lru, node->cpu);
- raw_spin_lock_irqsave(&l->lock, flags);
+ if (raw_res_spin_lock_irqsave(&l->lock, flags)) {
+ WRITE_ONCE(node->pending_free, 1);
+ return;
+ }
__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
- raw_spin_unlock_irqrestore(&l->lock, flags);
+ raw_res_spin_unlock_irqrestore(&l->lock, flags);
}
void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node)
node = (struct bpf_lru_node *)(buf + node_offset);
node->type = BPF_LRU_LIST_T_FREE;
+ node->pending_free = 0;
bpf_lru_node_clear_ref(node);
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
buf += elem_size;
node = (struct bpf_lru_node *)(buf + node_offset);
node->cpu = cpu;
node->type = BPF_LRU_LIST_T_FREE;
+ node->pending_free = 0;
bpf_lru_node_clear_ref(node);
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
i++;
static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu)
{
- int i;
-
- for (i = 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++)
- INIT_LIST_HEAD(&loc_l->lists[i]);
+ INIT_LIST_HEAD(&loc_l->pending_list);
+ init_llist_head(&loc_l->free_llist);
loc_l->next_steal = cpu;
- raw_spin_lock_init(&loc_l->lock);
+ raw_res_spin_lock_init(&loc_l->lock);
}
static void bpf_lru_list_init(struct bpf_lru_list *l)
l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE];
- raw_spin_lock_init(&l->lock);
+ raw_res_spin_lock_init(&l->lock);
}
int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,