]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
tcp: try to avoid safer when ACKs are thinned
authorIlpo Järvinen <ij@kernel.org>
Sat, 31 Jan 2026 22:25:01 +0000 (23:25 +0100)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 3 Feb 2026 14:13:24 +0000 (15:13 +0100)
Add newly acked pkts EWMA. When ACK thinning occurs, select
between safer and unsafe cep delta in AccECN processing based
on it. If the packets ACKed per ACK tends to be large, don't
conservatively assume ACE field overflow.

This patch uses the existing 2-byte holes in the rx group for new
u16 variables withtout creating more holes. Below are the pahole
outcomes before and after this patch:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ecn_bytes[3]; /*  2744    12 */
    /* XXX 4 bytes hole, try to pack */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];       /*  2816     0 */

    [...]
    /* size: 3264, cachelines: 51, members: 177 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ecn_bytes[3]; /*  2744    12 */
    u16                        pkts_acked_ewma;        /*  2756     2 */
    /* XXX 2 bytes hole, try to pack */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];       /*  2816     0 */

    [...]
    /* size: 3264, cachelines: 51, members: 178 */
}

Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Co-developed-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260131222515.8485-2-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Documentation/networking/net_cachelines/tcp_sock.rst
include/linux/tcp.h
net/ipv4/tcp.c
net/ipv4/tcp_input.c

index 26f32dbcf6ec9004e3be1bfefba8d303a60b1a99..563daea10d6c5c074f004cb1b8574f5392157abb 100644 (file)
@@ -105,6 +105,7 @@ u32                           received_ce             read_mostly         read_w
 u32[3]                        received_ecn_bytes      read_mostly         read_write
 u8:4                          received_ce_pending     read_mostly         read_write
 u32[3]                        delivered_ecn_bytes                         read_write
+u16                           pkts_acked_ewma                             read_write
 u8:2                          syn_ect_snt             write_mostly        read_write
 u8:2                          syn_ect_rcv             read_mostly         read_write
 u8:2                          accecn_minlen           write_mostly        read_write
index 367b491ddf978061d5a88ab02546ec99be1cca63..fbc514d582e7dc95743b93e846babf7dfe38ccc9 100644 (file)
@@ -342,6 +342,7 @@ struct tcp_sock {
        u32     rate_interval_us;  /* saved rate sample: time elapsed */
        u32     rcv_rtt_last_tsecr;
        u32     delivered_ecn_bytes[3];
+       u16     pkts_acked_ewma;/* Pkts acked EWMA for AccECN cep heuristic */
        u64     first_tx_mstamp;  /* start of window send phase */
        u64     delivered_mstamp; /* time we reached "delivered" */
        u64     bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked
index 6e94c5859f4b4ba1e74e5d04d903fd9717f6616c..49f1029b5f8b6754d78159071f639cb96b81d585 100644 (file)
@@ -3470,6 +3470,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        tcp_accecn_init_counters(tp);
        tp->prev_ecnfield = 0;
        tp->accecn_opt_tstamp = 0;
+       tp->pkts_acked_ewma = 0;
        if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release)
                icsk->icsk_ca_ops->release(sk);
        memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
@@ -5243,6 +5244,7 @@ static void __init tcp_struct_check(void)
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_ecn_bytes);
+       CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, pkts_acked_ewma);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
index a2a872382fc003cb74ad048338cc4ae5d9a833e2..38852e04229af584499af1b6bdff01e10f23238e 100644 (file)
@@ -488,6 +488,10 @@ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
                tcp_count_delivered_ce(tp, delivered);
 }
 
+#define PKTS_ACKED_WEIGHT      6
+#define PKTS_ACKED_PREC                6
+#define ACK_COMP_THRESH                4
+
 /* Returns the ECN CE delta */
 static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
                                u32 delivered_pkts, u32 delivered_bytes,
@@ -499,6 +503,7 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
        u32 delta, safe_delta, d_ceb;
        bool opt_deltas_valid;
        u32 corrected_ace;
+       u32 ewma;
 
        /* Reordered ACK or uncertain due to lack of data to send and ts */
        if (!(flag & (FLAG_FORWARD_PROGRESS | FLAG_TS_PROGRESS)))
@@ -507,6 +512,18 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
        opt_deltas_valid = tcp_accecn_process_option(tp, skb,
                                                     delivered_bytes, flag);
 
+       if (delivered_pkts) {
+               if (!tp->pkts_acked_ewma) {
+                       ewma = delivered_pkts << PKTS_ACKED_PREC;
+               } else {
+                       ewma = tp->pkts_acked_ewma;
+                       ewma = (((ewma << PKTS_ACKED_WEIGHT) - ewma) +
+                               (delivered_pkts << PKTS_ACKED_PREC)) >>
+                               PKTS_ACKED_WEIGHT;
+               }
+               tp->pkts_acked_ewma = min_t(u32, ewma, 0xFFFFU);
+       }
+
        if (!(flag & FLAG_SLOWPATH)) {
                /* AccECN counter might overflow on large ACKs */
                if (delivered_pkts <= TCP_ACCECN_CEP_ACE_MASK)
@@ -555,7 +572,8 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
                if (d_ceb <
                    safe_delta * tp->mss_cache >> TCP_ACCECN_SAFETY_SHIFT)
                        return delta;
-       }
+       } else if (tp->pkts_acked_ewma > (ACK_COMP_THRESH << PKTS_ACKED_PREC))
+               return delta;
 
        return safe_delta;
 }