]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: rps: softnet_data reorg to make enqueue_to_backlog() fast
authorEric Dumazet <edumazet@google.com>
Fri, 24 Oct 2025 09:12:40 +0000 (09:12 +0000)
committerJakub Kicinski <kuba@kernel.org>
Wed, 29 Oct 2025 00:41:17 +0000 (17:41 -0700)
enqueue_to_backlog() is showing up in kernel profiles on hosts
with many cores, when RFS/RPS is used.

The following softnet_data fields need to be updated:

- input_queue_tail
- input_pkt_queue (next, prev, qlen, lock)
- backlog.state (if input_pkt_queue was empty)

Unfortunately they are currenly using two cache lines:

/* --- cacheline 3 boundary (192 bytes) --- */
call_single_data_t         csd __attribute__((__aligned__(64))); /*  0xc0  0x20 */
struct softnet_data *      rps_ipi_next;         /*  0xe0   0x8 */
unsigned int               cpu;                  /*  0xe8   0x4 */
unsigned int               input_queue_tail;     /*  0xec   0x4 */
struct sk_buff_head        input_pkt_queue;      /*  0xf0  0x18 */

/* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */

struct napi_struct         backlog __attribute__((__aligned__(8))); /* 0x108 0x1f0 */

Add one ____cacheline_aligned_in_smp to make sure they now are using
a single cache line.

Also, because napi_struct has written fields, make @state its first field.

We want to make sure that cpus adding packets to sd->input_pkt_queue
are not slowing down cpus processing their backlog because of
false sharing.

After this patch new layout is:

/* --- cacheline 5 boundary (320 bytes) --- */
long int                   pad[3] __attribute__((__aligned__(64))); /* 0x140  0x18 */
unsigned int               input_queue_tail;     /* 0x158   0x4 */

/* XXX 4 bytes hole, try to pack */

struct sk_buff_head        input_pkt_queue;      /* 0x160  0x18 */
struct napi_struct         backlog __attribute__((__aligned__(8))); /* 0x178 0x1f0 */

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251024091240.3292546-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/linux/netdevice.h

index 7f5aad5cc9a1994f95ba9037d3a4af27eef9d5e3..9c1e5042c5e7646c0aa9e8f4e160c78ea27a639a 100644 (file)
@@ -377,6 +377,8 @@ struct napi_config {
  * Structure for NAPI scheduling similar to tasklet but with weighting
  */
 struct napi_struct {
+       /* This field should be first or softnet_data.backlog needs tweaks. */
+       unsigned long           state;
        /* The poll_list must only be managed by the entity which
         * changes the state of the NAPI_STATE_SCHED bit.  This means
         * whoever atomically sets that bit can add this napi_struct
@@ -385,7 +387,6 @@ struct napi_struct {
         */
        struct list_head        poll_list;
 
-       unsigned long           state;
        int                     weight;
        u32                     defer_hard_irqs_count;
        int                     (*poll)(struct napi_struct *, int);
@@ -3529,9 +3530,17 @@ struct softnet_data {
        call_single_data_t      csd ____cacheline_aligned_in_smp;
        struct softnet_data     *rps_ipi_next;
        unsigned int            cpu;
+
+       /* We force a cacheline alignment from here, to hold together
+        * input_queue_tail, input_pkt_queue and backlog.state.
+        * We add holes so that backlog.state is the last field
+        * of this cache line.
+        */
+       long                    pad[3] ____cacheline_aligned_in_smp;
        unsigned int            input_queue_tail;
 #endif
        struct sk_buff_head     input_pkt_queue;
+
        struct napi_struct      backlog;
 
        struct numa_drop_counters drop_counters;