]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net_sched: add Qdisc_read_mostly and Qdisc_write groups
authorEric Dumazet <edumazet@google.com>
Fri, 21 Nov 2025 08:32:49 +0000 (08:32 +0000)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 25 Nov 2025 15:10:32 +0000 (16:10 +0100)
It is possible to reorg Qdisc to avoid always dirtying 2 cache lines in
fast path by reducing this to a single dirtied cache line.

In current layout, we change only four/six fields in the first cache line:
 - q.spinlock
 - q.qlen
 - bstats.bytes
 - bstats.packets
 - some Qdisc also change q.next/q.prev

In the second cache line we change in the fast path:
 - running
 - state
 - qstats.backlog

        /* --- cacheline 2 boundary (128 bytes) --- */
        struct sk_buff_head        gso_skb __attribute__((__aligned__(64))); /*  0x80  0x18 */
        struct qdisc_skb_head      q;                    /*  0x98  0x18 */
        struct gnet_stats_basic_sync bstats __attribute__((__aligned__(16))); /*  0xb0  0x10 */

        /* --- cacheline 3 boundary (192 bytes) --- */
        struct gnet_stats_queue    qstats;               /*  0xc0  0x14 */
        bool                       running;              /*  0xd4   0x1 */

        /* XXX 3 bytes hole, try to pack */

        unsigned long              state;                /*  0xd8   0x8 */
        struct Qdisc *             next_sched;           /*  0xe0   0x8 */
        struct sk_buff_head        skb_bad_txq;          /*  0xe8  0x18 */
        /* --- cacheline 4 boundary (256 bytes) --- */

Reorganize things to have a first cache line mostly read,
then a mostly written one.

This gives a ~3% increase of performance under tx stress.

Note that there is an additional hole because @qstats now spans over a third cache line.

/* --- cacheline 2 boundary (128 bytes) --- */
__u8                       __cacheline_group_begin__Qdisc_read_mostly[0] __attribute__((__aligned__(64))); /*  0x80     0 */
struct sk_buff_head        gso_skb;              /*  0x80  0x18 */
struct Qdisc *             next_sched;           /*  0x98   0x8 */
struct sk_buff_head        skb_bad_txq;          /*  0xa0  0x18 */
__u8                       __cacheline_group_end__Qdisc_read_mostly[0]; /*  0xb8     0 */

/* XXX 8 bytes hole, try to pack */

/* --- cacheline 3 boundary (192 bytes) --- */
__u8                       __cacheline_group_begin__Qdisc_write[0] __attribute__((__aligned__(64))); /*  0xc0     0 */
struct qdisc_skb_head      q;                    /*  0xc0  0x18 */
unsigned long              state;                /*  0xd8   0x8 */
struct gnet_stats_basic_sync bstats __attribute__((__aligned__(16))); /*  0xe0  0x10 */
bool                       running;              /*  0xf0   0x1 */

/* XXX 3 bytes hole, try to pack */

struct gnet_stats_queue    qstats;               /*  0xf4  0x14 */
/* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */
__u8                       __cacheline_group_end__Qdisc_write[0]; /* 0x108     0 */

/* XXX 56 bytes hole, try to pack */

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251121083256.674562-8-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
include/net/sch_generic.h

index cdf7a58ebcf5ef2b5f8b76eb6fbe92d5f0e07899..79501499dafba56271b9ebd97a8f379ffdc83cac 100644 (file)
@@ -103,17 +103,24 @@ struct Qdisc {
        int                     pad;
        refcount_t              refcnt;
 
-       /*
-        * For performance sake on SMP, we put highly modified fields at the end
-        */
-       struct sk_buff_head     gso_skb ____cacheline_aligned_in_smp;
-       struct qdisc_skb_head   q;
-       struct gnet_stats_basic_sync bstats;
-       struct gnet_stats_queue qstats;
-       bool                    running; /* must be written under qdisc spinlock */
-       unsigned long           state;
-       struct Qdisc            *next_sched;
-       struct sk_buff_head     skb_bad_txq;
+       /* Cache line potentially dirtied in dequeue() or __netif_reschedule(). */
+       __cacheline_group_begin(Qdisc_read_mostly) ____cacheline_aligned;
+               struct sk_buff_head     gso_skb;
+               struct Qdisc            *next_sched;
+               struct sk_buff_head     skb_bad_txq;
+       __cacheline_group_end(Qdisc_read_mostly);
+
+       /* Fields dirtied in dequeue() fast path. */
+       __cacheline_group_begin(Qdisc_write) ____cacheline_aligned;
+               struct qdisc_skb_head   q;
+               unsigned long           state;
+               struct gnet_stats_basic_sync bstats;
+               bool                    running; /* must be written under qdisc spinlock */
+
+               /* Note : we only change qstats.backlog in fast path. */
+               struct gnet_stats_queue qstats;
+       __cacheline_group_end(Qdisc_write);
+
 
        atomic_long_t           defer_count ____cacheline_aligned_in_smp;
        struct llist_head       defer_list;