]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
tcp: add cwnd_event_tx_start to tcp_congestion_ops
authorEric Dumazet <edumazet@google.com>
Mon, 23 Mar 2026 23:49:20 +0000 (23:49 +0000)
committerJakub Kicinski <kuba@kernel.org>
Wed, 25 Mar 2026 04:00:38 +0000 (21:00 -0700)
(tcp_congestion_ops)->cwnd_event() is called very often, with
@event oscillating between CA_EVENT_TX_START and other values.

This is not branch prediction friendly.

Provide a new cwnd_event_tx_start pointer dedicated for CA_EVENT_TX_START.

Both BBR and CUBIC benefit from this change, since they only care
about CA_EVENT_TX_START.

No change in kernel size:

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 4/4 grow/shrink: 3/1 up/down: 564/-568 (-4)
Function                                     old     new   delta
bbr_cwnd_event_tx_start                        -     450    +450
cubictcp_cwnd_event_tx_start                   -      70     +70
__pfx_cubictcp_cwnd_event_tx_start             -      16     +16
__pfx_bbr_cwnd_event_tx_start                  -      16     +16
tcp_unregister_congestion_control             93      99      +6
tcp_update_congestion_control                518     521      +3
tcp_register_congestion_control              422     425      +3
__tcp_transmit_skb                          3308    3306      -2
__pfx_cubictcp_cwnd_event                     16       -     -16
__pfx_bbr_cwnd_event                          16       -     -16
cubictcp_cwnd_event                           80       -     -80
bbr_cwnd_event                               454       -    -454
Total: Before=25240512, After=25240508, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260323234920.1097858-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
12 files changed:
include/net/tcp.h
net/ipv4/bpf_tcp_ca.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_cubic.c
net/ipv4/tcp_dctcp.c
net/ipv4/tcp_vegas.c
net/ipv4/tcp_vegas.h
net/ipv4/tcp_veno.c
net/ipv4/tcp_yeah.c
tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
tools/testing/selftests/bpf/progs/bpf_cubic.c
tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c

index f87bdacb5a6995422851e88cfb65734702c84093..39ff4cf3c810f5619a479dcd92192043374a1739 100644 (file)
@@ -1341,6 +1341,9 @@ struct tcp_congestion_ops {
        /* call when cwnd event occurs (optional) */
        void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
 
+       /* call when CA_EVENT_TX_START cwnd event occurs (optional) */
+       void (*cwnd_event_tx_start)(struct sock *sk);
+
        /* call when ack arrives (optional) */
        void (*in_ack_event)(struct sock *sk, u32 flags);
 
@@ -1440,6 +1443,11 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
 
+       if (event == CA_EVENT_TX_START) {
+           if (icsk->icsk_ca_ops->cwnd_event_tx_start)
+                       icsk->icsk_ca_ops->cwnd_event_tx_start(sk);
+               return;
+       }
        if (icsk->icsk_ca_ops->cwnd_event)
                icsk->icsk_ca_ops->cwnd_event(sk, event);
 }
index e01492234b0b349498f8e56c2f07d8bba092c00e..008edc7f6688523dc86963d90485655e9fa8374e 100644 (file)
@@ -272,6 +272,10 @@ static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 {
 }
 
+static void bpf_tcp_ca_cwnd_event_tx_start(struct sock *sk)
+{
+}
+
 static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags)
 {
 }
@@ -313,6 +317,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
        .cong_avoid = bpf_tcp_ca_cong_avoid,
        .set_state = bpf_tcp_ca_set_state,
        .cwnd_event = bpf_tcp_ca_cwnd_event,
+       .cwnd_event_tx_start = bpf_tcp_ca_cwnd_event_tx_start,
        .in_ack_event = bpf_tcp_ca_in_ack_event,
        .pkts_acked = bpf_tcp_ca_pkts_acked,
        .min_tso_segs = bpf_tcp_ca_min_tso_segs,
index 05d52372ca8fb068530a9b3379f1ae0f9d0b362a..1ddc20a399b07054f8175b5f6459f8ae6dbf34bb 100644 (file)
@@ -330,12 +330,12 @@ static void bbr_save_cwnd(struct sock *sk)
                bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
 }
 
-__bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void bbr_cwnd_event_tx_start(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct bbr *bbr = inet_csk_ca(sk);
 
-       if (event == CA_EVENT_TX_START && tp->app_limited) {
+       if (tp->app_limited) {
                bbr->idle_restart = 1;
                bbr->ack_epoch_mstamp = tp->tcp_mstamp;
                bbr->ack_epoch_acked = 0;
@@ -1149,7 +1149,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
        .cong_control   = bbr_main,
        .sndbuf_expand  = bbr_sndbuf_expand,
        .undo_cwnd      = bbr_undo_cwnd,
-       .cwnd_event     = bbr_cwnd_event,
+       .cwnd_event_tx_start    = bbr_cwnd_event_tx_start,
        .ssthresh       = bbr_ssthresh,
        .min_tso_segs   = bbr_min_tso_segs,
        .get_info       = bbr_get_info,
@@ -1161,7 +1161,7 @@ BTF_ID_FLAGS(func, bbr_init)
 BTF_ID_FLAGS(func, bbr_main)
 BTF_ID_FLAGS(func, bbr_sndbuf_expand)
 BTF_ID_FLAGS(func, bbr_undo_cwnd)
-BTF_ID_FLAGS(func, bbr_cwnd_event)
+BTF_ID_FLAGS(func, bbr_cwnd_event_tx_start)
 BTF_ID_FLAGS(func, bbr_ssthresh)
 BTF_ID_FLAGS(func, bbr_min_tso_segs)
 BTF_ID_FLAGS(func, bbr_set_state)
index 76c23675ae50ab50c977dc1de79a3df57db98ef6..ab78b5ae8d0e3d13a39bd1adf1e105b84f806b63 100644 (file)
@@ -139,24 +139,21 @@ __bpf_kfunc static void cubictcp_init(struct sock *sk)
                tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
-__bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk)
 {
-       if (event == CA_EVENT_TX_START) {
-               struct bictcp *ca = inet_csk_ca(sk);
-               u32 now = tcp_jiffies32;
-               s32 delta;
-
-               delta = now - tcp_sk(sk)->lsndtime;
-
-               /* We were application limited (idle) for a while.
-                * Shift epoch_start to keep cwnd growth to cubic curve.
-                */
-               if (ca->epoch_start && delta > 0) {
-                       ca->epoch_start += delta;
-                       if (after(ca->epoch_start, now))
-                               ca->epoch_start = now;
-               }
-               return;
+       struct bictcp *ca = inet_csk_ca(sk);
+       u32 now = tcp_jiffies32;
+       s32 delta;
+
+       delta = now - tcp_sk(sk)->lsndtime;
+
+       /* We were application limited (idle) for a while.
+        * Shift epoch_start to keep cwnd growth to cubic curve.
+        */
+       if (ca->epoch_start && delta > 0) {
+               ca->epoch_start += delta;
+               if (after(ca->epoch_start, now))
+                       ca->epoch_start = now;
        }
 }
 
@@ -481,7 +478,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
        .cong_avoid     = cubictcp_cong_avoid,
        .set_state      = cubictcp_state,
        .undo_cwnd      = tcp_reno_undo_cwnd,
-       .cwnd_event     = cubictcp_cwnd_event,
+       .cwnd_event_tx_start = cubictcp_cwnd_event_tx_start,
        .pkts_acked     = cubictcp_acked,
        .owner          = THIS_MODULE,
        .name           = "cubic",
@@ -492,7 +489,7 @@ BTF_ID_FLAGS(func, cubictcp_init)
 BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh)
 BTF_ID_FLAGS(func, cubictcp_cong_avoid)
 BTF_ID_FLAGS(func, cubictcp_state)
-BTF_ID_FLAGS(func, cubictcp_cwnd_event)
+BTF_ID_FLAGS(func, cubictcp_cwnd_event_tx_start)
 BTF_ID_FLAGS(func, cubictcp_acked)
 BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids)
 
index 03abe0848420d7fb7b514e6ad24f4c702954ab14..96c99999e09dde9de9c337e4d6c692f517467c7b 100644 (file)
@@ -203,15 +203,19 @@ __bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
                tcp_plb_update_state_upon_rto(sk, &ca->plb);
                dctcp_react_to_loss(sk);
                break;
-       case CA_EVENT_TX_START:
-               tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */
-               break;
        default:
                /* Don't care for the rest. */
                break;
        }
 }
 
+__bpf_kfunc static void dctcp_cwnd_event_tx_start(struct sock *sk)
+{
+       struct dctcp *ca = inet_csk_ca(sk);
+
+       tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */
+}
+
 static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
                             union tcp_cc_info *info)
 {
@@ -252,6 +256,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = {
        .init           = dctcp_init,
        .in_ack_event   = dctcp_update_alpha,
        .cwnd_event     = dctcp_cwnd_event,
+       .cwnd_event_tx_start = dctcp_cwnd_event_tx_start,
        .ssthresh       = dctcp_ssthresh,
        .cong_avoid     = tcp_reno_cong_avoid,
        .undo_cwnd      = dctcp_cwnd_undo,
@@ -275,6 +280,7 @@ BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids)
 BTF_ID_FLAGS(func, dctcp_init)
 BTF_ID_FLAGS(func, dctcp_update_alpha)
 BTF_ID_FLAGS(func, dctcp_cwnd_event)
+BTF_ID_FLAGS(func, dctcp_cwnd_event_tx_start)
 BTF_ID_FLAGS(func, dctcp_ssthresh)
 BTF_ID_FLAGS(func, dctcp_cwnd_undo)
 BTF_ID_FLAGS(func, dctcp_state)
index 786848ad37ea8d5f9bd817666181905f3f6ec9d4..cf12fb6be079d8ccd65f297a3a90a9a5e90036e8 100644 (file)
@@ -151,12 +151,17 @@ EXPORT_SYMBOL_GPL(tcp_vegas_state);
  */
 void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 {
-       if (event == CA_EVENT_CWND_RESTART ||
-           event == CA_EVENT_TX_START)
+       if (event == CA_EVENT_CWND_RESTART)
                tcp_vegas_init(sk);
 }
 EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
 
+void tcp_vegas_cwnd_event_tx_start(struct sock *sk)
+{
+       tcp_vegas_init(sk);
+}
+EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event_tx_start);
+
 static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
 {
        return  min(tp->snd_ssthresh, tcp_snd_cwnd(tp));
index 4f24d0e37d9c1e9d5336e8e99253e660143362c9..602af8e600c7f85603ff1a53008e1144617ea14b 100644 (file)
@@ -20,6 +20,7 @@ void tcp_vegas_init(struct sock *sk);
 void tcp_vegas_state(struct sock *sk, u8 ca_state);
 void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample);
 void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
+void tcp_vegas_cwnd_event_tx_start(struct sock *sk);
 size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
                          union tcp_cc_info *info);
 
index 366ff6f214b2ee746cecdf10353e4624252478e0..1b2e1b947901f3ca6004aced8e6d39c131f1fb68 100644 (file)
@@ -112,10 +112,15 @@ static void tcp_veno_state(struct sock *sk, u8 ca_state)
  */
 static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 {
-       if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START)
+       if (event == CA_EVENT_CWND_RESTART)
                tcp_veno_init(sk);
 }
 
+static void tcp_veno_cwnd_event_tx_start(struct sock *sk)
+{
+       tcp_veno_init(sk);
+}
+
 static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -213,6 +218,7 @@ static struct tcp_congestion_ops tcp_veno __read_mostly = {
        .pkts_acked     = tcp_veno_pkts_acked,
        .set_state      = tcp_veno_state,
        .cwnd_event     = tcp_veno_cwnd_event,
+       .cwnd_event_tx_start = tcp_veno_cwnd_event_tx_start,
 
        .owner          = THIS_MODULE,
        .name           = "veno",
index 18b07ff5d20e6c5eefe9ab54c6b8e429f01d64b9..b22b3dccd05efddfe11203578950eddecee14887 100644 (file)
@@ -212,6 +212,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = {
        .cong_avoid     = tcp_yeah_cong_avoid,
        .set_state      = tcp_vegas_state,
        .cwnd_event     = tcp_vegas_cwnd_event,
+       .cwnd_event_tx_start = tcp_vegas_cwnd_event_tx_start,
        .get_info       = tcp_vegas_get_info,
        .pkts_acked     = tcp_vegas_pkts_acked,
 
index 9af19dfe4e80b00bc09fe40c7d6c9642cdfb4a23..bccf677b94b61226a3dd0517e0b2e64168de711f 100644 (file)
@@ -23,7 +23,7 @@
 #define TCP_REORDERING (12)
 
 extern void cubictcp_init(struct sock *sk) __ksym;
-extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern void cubictcp_cwnd_event_tx_start(struct sock *sk) __ksym;
 extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
 extern void cubictcp_state(struct sock *sk, __u8 new_state) __ksym;
 extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
@@ -108,9 +108,9 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk)
 }
 
 SEC("struct_ops")
-void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk)
 {
-       cubictcp_cwnd_event(sk, event);
+       cubictcp_cwnd_event_tx_start(sk);
 }
 
 SEC("struct_ops")
@@ -172,7 +172,7 @@ struct tcp_congestion_ops cc_cubic = {
        .cong_control   = (void *)bpf_cubic_cong_control,
        .set_state      = (void *)bpf_cubic_state,
        .undo_cwnd      = (void *)bpf_cubic_undo_cwnd,
-       .cwnd_event     = (void *)bpf_cubic_cwnd_event,
+       .cwnd_event_tx_start    = (void *)bpf_cubic_cwnd_event_tx_start,
        .pkts_acked     = (void *)bpf_cubic_acked,
        .name           = "bpf_cc_cubic",
 };
index 46fb2b37d3a70671485af37ed700a991c0975cd2..ce18a4db813fab877aec0c73d72cc9c958312ad3 100644 (file)
@@ -185,24 +185,21 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk)
 }
 
 SEC("struct_ops")
-void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk)
 {
-       if (event == CA_EVENT_TX_START) {
-               struct bpf_bictcp *ca = inet_csk_ca(sk);
-               __u32 now = tcp_jiffies32;
-               __s32 delta;
-
-               delta = now - tcp_sk(sk)->lsndtime;
-
-               /* We were application limited (idle) for a while.
-                * Shift epoch_start to keep cwnd growth to cubic curve.
-                */
-               if (ca->epoch_start && delta > 0) {
-                       ca->epoch_start += delta;
-                       if (after(ca->epoch_start, now))
-                               ca->epoch_start = now;
-               }
-               return;
+       struct bpf_bictcp *ca = inet_csk_ca(sk);
+       __u32 now = tcp_jiffies32;
+       __s32 delta;
+
+       delta = now - tcp_sk(sk)->lsndtime;
+
+       /* We were application limited (idle) for a while.
+        * Shift epoch_start to keep cwnd growth to cubic curve.
+        */
+       if (ca->epoch_start && delta > 0) {
+               ca->epoch_start += delta;
+               if (after(ca->epoch_start, now))
+                       ca->epoch_start = now;
        }
 }
 
@@ -537,7 +534,7 @@ struct tcp_congestion_ops cubic = {
        .cong_avoid     = (void *)bpf_cubic_cong_avoid,
        .set_state      = (void *)bpf_cubic_state,
        .undo_cwnd      = (void *)bpf_cubic_undo_cwnd,
-       .cwnd_event     = (void *)bpf_cubic_cwnd_event,
+       .cwnd_event_tx_start    = (void *)bpf_cubic_cwnd_event_tx_start,
        .pkts_acked     = (void *)bpf_cubic_acked,
        .name           = "bpf_cubic",
 };
index f95862f570b73f4c86fa6d7b6a912c4b055d02fb..0a3e9d35bf6f0c4c988c741c010fb7f3405fb43c 100644 (file)
@@ -8,7 +8,7 @@ extern void bbr_init(struct sock *sk) __ksym;
 extern void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) __ksym;
 extern u32 bbr_sndbuf_expand(struct sock *sk) __ksym;
 extern u32 bbr_undo_cwnd(struct sock *sk) __ksym;
-extern void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern void bbr_cwnd_event_tx_start(struct sock *sk) __ksym;
 extern u32 bbr_ssthresh(struct sock *sk) __ksym;
 extern u32 bbr_min_tso_segs(struct sock *sk) __ksym;
 extern void bbr_set_state(struct sock *sk, u8 new_state) __ksym;
@@ -16,6 +16,7 @@ extern void bbr_set_state(struct sock *sk, u8 new_state) __ksym;
 extern void dctcp_init(struct sock *sk) __ksym;
 extern void dctcp_update_alpha(struct sock *sk, u32 flags) __ksym;
 extern void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) __ksym;
+extern void dctcp_cwnd_event_tx_start(struct sock *sk) __ksym;
 extern u32 dctcp_ssthresh(struct sock *sk) __ksym;
 extern u32 dctcp_cwnd_undo(struct sock *sk) __ksym;
 extern void dctcp_state(struct sock *sk, u8 new_state) __ksym;
@@ -24,7 +25,7 @@ extern void cubictcp_init(struct sock *sk) __ksym;
 extern u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
 extern void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) __ksym;
 extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym;
-extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern void cubictcp_cwnd_event_tx_start(struct sock *sk) __ksym;
 extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
 
 SEC("struct_ops")
@@ -69,9 +70,15 @@ u32 BPF_PROG(undo_cwnd, struct sock *sk)
 SEC("struct_ops")
 void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event)
 {
-       bbr_cwnd_event(sk, event);
        dctcp_cwnd_event(sk, event);
-       cubictcp_cwnd_event(sk, event);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cwnd_event_tx_start, struct sock *sk)
+{
+       bbr_cwnd_event_tx_start(sk);
+       dctcp_cwnd_event_tx_start(sk);
+       cubictcp_cwnd_event_tx_start(sk);
 }
 
 SEC("struct_ops")
@@ -111,6 +118,7 @@ struct tcp_congestion_ops tcp_ca_kfunc = {
        .sndbuf_expand  = (void *)sndbuf_expand,
        .undo_cwnd      = (void *)undo_cwnd,
        .cwnd_event     = (void *)cwnd_event,
+       .cwnd_event_tx_start = (void *)cwnd_event_tx_start,
        .ssthresh       = (void *)ssthresh,
        .min_tso_segs   = (void *)min_tso_segs,
        .set_state      = (void *)set_state,