net: add NUMA awareness to skb_attempt_defer_free()

author Eric Dumazet <edumazet@google.com>

Sun, 28 Sep 2025 08:49:34 +0000 (08:49 +0000)

committer Paolo Abeni <pabeni@redhat.com>

Tue, 30 Sep 2025 13:45:53 +0000 (15:45 +0200)
author Eric Dumazet <edumazet@google.com>
Sun, 28 Sep 2025 08:49:34 +0000 (08:49 +0000)
committer Paolo Abeni <pabeni@redhat.com>
Tue, 30 Sep 2025 13:45:53 +0000 (15:45 +0200)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index 5c9aa16933d197f70746d64e5f44cae052d9971c..d1a687444b275d45d105e336d2ede264fd310f1b 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3536,10 +3536,6 @@ struct softnet_data {
  
         struct numa_drop_counters drop_counters;
  
-       /* Another possibly contended cache line */
-       struct llist_head       defer_list ____cacheline_aligned_in_smp;
-       atomic_long_t           defer_count;
-
         int                     defer_ipi_scheduled ____cacheline_aligned_in_smp;
         call_single_data_t      defer_csd;
  };
diff --git a/include/net/hotdata.h b/include/net/hotdata.h

index fda94b2647ffa242c256c95ae929d9ef25e54f96..4acec191c54ab367ca12fff590d1f8c8aad64651 100644 (file)
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -2,10 +2,16 @@
  #ifndef _NET_HOTDATA_H
  #define _NET_HOTDATA_H
  
+#include <linux/llist.h>
  #include <linux/types.h>
  #include <linux/netdevice.h>
  #include <net/protocol.h>
  
+struct skb_defer_node {
+       struct llist_head       defer_list;
+       atomic_long_t           defer_count;
+} ____cacheline_aligned_in_smp;
+
  /* Read mostly data used in network fast paths. */
  struct net_hotdata {
  #if IS_ENABLED(CONFIG_INET)
@@ -30,6 +36,7 @@ struct net_hotdata {
         struct rps_sock_flow_table __rcu *rps_sock_flow_table;
         u32                     rps_cpu_mask;
  #endif
+       struct skb_defer_node __percpu *skb_defer_nodes;
         int                     gro_normal_batch;
         int                     netdev_budget;
         int                     netdev_budget_usecs;
diff --git a/net/core/dev.c b/net/core/dev.c

index fb67372774de10b0b112ca71c7c7a13819c2325b..a64cef2c537e98ee87776e6f8d3ca3d98f0711b3 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5180,8 +5180,9 @@ static void napi_schedule_rps(struct softnet_data *sd)
         __napi_schedule_irqoff(&mysd->backlog);
  }
  
-void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu)
+void kick_defer_list_purge(unsigned int cpu)
  {
+       struct softnet_data *sd = &per_cpu(softnet_data, cpu);
         unsigned long flags;
  
         if (use_backlog_threads()) {
@@ -6715,18 +6716,24 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
  }
  EXPORT_SYMBOL(napi_complete_done);
  
-static void skb_defer_free_flush(struct softnet_data *sd)
+static void skb_defer_free_flush(void)
  {
         struct llist_node *free_list;
         struct sk_buff *skb, *next;
+       struct skb_defer_node *sdn;
+       int node;
  
-       if (llist_empty(&sd->defer_list))
-               return;
-       atomic_long_set(&sd->defer_count, 0);
-       free_list = llist_del_all(&sd->defer_list);
+       for_each_node(node) {
+               sdn = this_cpu_ptr(net_hotdata.skb_defer_nodes) + node;
+
+               if (llist_empty(&sdn->defer_list))
+                       continue;
+               atomic_long_set(&sdn->defer_count, 0);
+               free_list = llist_del_all(&sdn->defer_list);
  
-       llist_for_each_entry_safe(skb, next, free_list, ll_node) {
-               napi_consume_skb(skb, 1);
+               llist_for_each_entry_safe(skb, next, free_list, ll_node) {
+                       napi_consume_skb(skb, 1);
+               }
         }
  }
  
@@ -6854,7 +6861,7 @@ count:
                 if (work > 0)
                         __NET_ADD_STATS(dev_net(napi->dev),
                                         LINUX_MIB_BUSYPOLLRXPACKETS, work);
-               skb_defer_free_flush(this_cpu_ptr(&softnet_data));
+               skb_defer_free_flush();
                 bpf_net_ctx_clear(bpf_net_ctx);
                 local_bh_enable();
  
@@ -7713,7 +7720,7 @@ static void napi_threaded_poll_loop(struct napi_struct *napi)
                         local_irq_disable();
                         net_rps_action_and_irq_enable(sd);
                 }
-               skb_defer_free_flush(sd);
+               skb_defer_free_flush();
                 bpf_net_ctx_clear(bpf_net_ctx);
                 local_bh_enable();
  
@@ -7755,7 +7762,7 @@ start:
         for (;;) {
                 struct napi_struct *n;
  
-               skb_defer_free_flush(sd);
+               skb_defer_free_flush();
  
                 if (list_empty(&list)) {
                         if (list_empty(&repoll)) {
@@ -12989,7 +12996,6 @@ static int __init net_dev_init(void)
                 sd->cpu = i;
  #endif
                 INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
-               init_llist_head(&sd->defer_list);
  
                 gro_init(&sd->backlog.gro);
                 sd->backlog.poll = process_backlog;
@@ -12999,6 +13005,11 @@ static int __init net_dev_init(void)
                 if (net_page_pool_create(i))
                         goto out;
         }
+       net_hotdata.skb_defer_nodes =
+                __alloc_percpu(sizeof(struct skb_defer_node) * nr_node_ids,
+                               __alignof__(struct skb_defer_node));
+       if (!net_hotdata.skb_defer_nodes)
+               goto out;
         if (use_backlog_threads())
                 smpboot_register_percpu_thread(&backlog_threads);
  
diff --git a/net/core/dev.h b/net/core/dev.h

index d6b08d435479b2ba476b1ddeeaae1dce6ac875a2..900880e8b5b4b9492eca23a4d9201045e6bf7f74 100644 (file)
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -357,7 +357,7 @@ static inline void napi_assert_will_not_race(const struct napi_struct *napi)
         WARN_ON(READ_ONCE(napi->list_owner) != -1);
  }
  
-void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
+void kick_defer_list_purge(unsigned int cpu);
  
  #define XMIT_RECURSION_LIMIT   8
  
diff --git a/net/core/skbuff.c b/net/core/skbuff.c

index 17455fc1e6922f653744ce5013e7a8f7aabf2ef5..bc12790017b0b5c0be99f8fb9d362b3730fa4eb0 100644 (file)
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -7185,9 +7185,9 @@ static void kfree_skb_napi_cache(struct sk_buff *skb)
   */
  void skb_attempt_defer_free(struct sk_buff *skb)
  {
+       struct skb_defer_node *sdn;
         unsigned long defer_count;
         int cpu = skb->alloc_cpu;
-       struct softnet_data *sd;
         unsigned int defer_max;
         bool kick;
  
@@ -7201,14 +7201,15 @@ nodefer:        kfree_skb_napi_cache(skb);
         DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
         DEBUG_NET_WARN_ON_ONCE(skb->destructor);
  
-       sd = &per_cpu(softnet_data, cpu);
+       sdn = per_cpu_ptr(net_hotdata.skb_defer_nodes, cpu) + numa_node_id();
+
         defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
-       defer_count = atomic_long_inc_return(&sd->defer_count);
+       defer_count = atomic_long_inc_return(&sdn->defer_count);
  
         if (defer_count >= defer_max)
                 goto nodefer;
  
-       llist_add(&skb->ll_node, &sd->defer_list);
+       llist_add(&skb->ll_node, &sdn->defer_list);
  
         /* Send an IPI every time queue reaches half capacity. */
         kick = (defer_count - 1) == (defer_max >> 1);
@@ -7217,7 +7218,7 @@ nodefer:  kfree_skb_napi_cache(skb);
          * if we are unlucky enough (this seems very unlikely).
          */
         if (unlikely(kick))
-               kick_defer_list_purge(sd, cpu);
+               kick_defer_list_purge(cpu);
  }
  
  static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
author	Eric Dumazet <edumazet@google.com>
	Sun, 28 Sep 2025 08:49:34 +0000 (08:49 +0000)
committer	Paolo Abeni <pabeni@redhat.com>
	Tue, 30 Sep 2025 13:45:53 +0000 (15:45 +0200)
include/linux/netdevice.h		patch \| blob \| blame \| history
include/net/hotdata.h		patch \| blob \| blame \| history
net/core/dev.c		patch \| blob \| blame \| history
net/core/dev.h		patch \| blob \| blame \| history
net/core/skbuff.c		patch \| blob \| blame \| history