]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
net: use llist for sd->defer_list
authorEric Dumazet <edumazet@google.com>
Sun, 28 Sep 2025 08:49:33 +0000 (08:49 +0000)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 30 Sep 2025 13:45:53 +0000 (15:45 +0200)
Get rid of sd->defer_lock and adopt llist operations.

We optimize skb_attempt_defer_free() for the common case,
where the packet is queued. Otherwise sd->defer_count
is increasing, until skb_defer_free_flush() clears it.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250928084934.3266948-3-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
include/linux/netdevice.h
net/core/dev.c
net/core/skbuff.c

index 27e3fa69253f694b98d32b6138cf491da5a8b824..5c9aa16933d197f70746d64e5f44cae052d9971c 100644 (file)
@@ -3537,10 +3537,10 @@ struct softnet_data {
        struct numa_drop_counters drop_counters;
 
        /* Another possibly contended cache line */
-       spinlock_t              defer_lock ____cacheline_aligned_in_smp;
-       atomic_t                defer_count;
-       int                     defer_ipi_scheduled;
-       struct sk_buff          *defer_list;
+       struct llist_head       defer_list ____cacheline_aligned_in_smp;
+       atomic_long_t           defer_count;
+
+       int                     defer_ipi_scheduled ____cacheline_aligned_in_smp;
        call_single_data_t      defer_csd;
 };
 
index 8566678d83444e8aacbfea4842878279cf28516f..fb67372774de10b0b112ca71c7c7a13819c2325b 100644 (file)
@@ -6717,22 +6717,16 @@ EXPORT_SYMBOL(napi_complete_done);
 
 static void skb_defer_free_flush(struct softnet_data *sd)
 {
+       struct llist_node *free_list;
        struct sk_buff *skb, *next;
 
-       /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
-       if (!READ_ONCE(sd->defer_list))
+       if (llist_empty(&sd->defer_list))
                return;
+       atomic_long_set(&sd->defer_count, 0);
+       free_list = llist_del_all(&sd->defer_list);
 
-       spin_lock(&sd->defer_lock);
-       skb = sd->defer_list;
-       sd->defer_list = NULL;
-       atomic_set(&sd->defer_count, 0);
-       spin_unlock(&sd->defer_lock);
-
-       while (skb != NULL) {
-               next = skb->next;
+       llist_for_each_entry_safe(skb, next, free_list, ll_node) {
                napi_consume_skb(skb, 1);
-               skb = next;
        }
 }
 
@@ -12995,7 +12989,7 @@ static int __init net_dev_init(void)
                sd->cpu = i;
 #endif
                INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
-               spin_lock_init(&sd->defer_lock);
+               init_llist_head(&sd->defer_list);
 
                gro_init(&sd->backlog.gro);
                sd->backlog.poll = process_backlog;
index 16cd357d62a6bdb13038e3a50b6d8eb3660bbc7f..17455fc1e6922f653744ce5013e7a8f7aabf2ef5 100644 (file)
@@ -7185,6 +7185,7 @@ static void kfree_skb_napi_cache(struct sk_buff *skb)
  */
 void skb_attempt_defer_free(struct sk_buff *skb)
 {
+       unsigned long defer_count;
        int cpu = skb->alloc_cpu;
        struct softnet_data *sd;
        unsigned int defer_max;
@@ -7202,17 +7203,15 @@ nodefer:        kfree_skb_napi_cache(skb);
 
        sd = &per_cpu(softnet_data, cpu);
        defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
-       if (atomic_read(&sd->defer_count) >= defer_max)
+       defer_count = atomic_long_inc_return(&sd->defer_count);
+
+       if (defer_count >= defer_max)
                goto nodefer;
 
-       spin_lock_bh(&sd->defer_lock);
-       /* Send an IPI every time queue reaches half capacity. */
-       kick = (atomic_inc_return(&sd->defer_count) - 1) == (defer_max >> 1);
+       llist_add(&skb->ll_node, &sd->defer_list);
 
-       skb->next = sd->defer_list;
-       /* Paired with READ_ONCE() in skb_defer_free_flush() */
-       WRITE_ONCE(sd->defer_list, skb);
-       spin_unlock_bh(&sd->defer_lock);
+       /* Send an IPI every time queue reaches half capacity. */
+       kick = (defer_count - 1) == (defer_max >> 1);
 
        /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
         * if we are unlucky enough (this seems very unlikely).