]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
tcp: fix stale per-CPU tcp_tw_isn leak enabling ISN prediction
authorEric Dumazet <edumazet@google.com>
Tue, 19 May 2026 08:46:11 +0000 (08:46 +0000)
committerJakub Kicinski <kuba@kernel.org>
Thu, 21 May 2026 02:14:06 +0000 (19:14 -0700)
Blamed commit moved the TIME_WAIT-derived ISN from the skb control
block to a per-CPU variable, assuming the value would always be consumed
by tcp_conn_request() for the same packet that wrote it. That assumption
is violated by multiple drop paths between the producer
(__this_cpu_write(tcp_tw_isn, isn) in tcp_v{4,6}_rcv()) and the consumer
(tcp_conn_request()):

 - min_ttl / min_hopcount check
 - xfrm policy check
 - tcp_inbound_hash() MD5/AO mismatch
 - tcp_filter() eBPF/SO_ATTACH_FILTER drop
 - th->syn && th->fin discard in tcp_rcv_state_process() TCP_LISTEN
 - psp_sk_rx_policy_check() in tcp_v{4,6}_do_rcv()
 - tcp_checksum_complete() in tcp_v{4,6}_do_rcv()
 - tcp_v{4,6}_cookie_check() returning NULL

When a packet is dropped on any of these paths, tcp_tw_isn is left set.

The next SYN processed on the same CPU then consumes the non zero value in
tcp_conn_request(), receiving a potentially predictable ISN.

This patch moves back tcp_tw_isn to skb->cb[], getting rid of the per-cpu
variable.

Note that tcp_v{4,6}_fill_cb() do not set it.

Very litle impact on overall code size/complexity:

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/0 grow/shrink: 2/1 up/down: 8/-15 (-7)
Function                                     old     new   delta
tcp_v6_rcv                                  3038    3042      +4
tcp_v4_rcv                                  3035    3039      +4
tcp_conn_request                            2938    2923     -15
Total: Before=24436060, After=24436053, chg -0.00%

Fixes: 41eecbd712b7 ("tcp: replace TCP_SKB_CB(skb)->tcp_tw_isn with a per-cpu field")
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260519084611.2485277-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/tcp.h
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv6/tcp_ipv6.c

index ecbadcb3a7446cb18c245e670ba49ff574dfaff7..98848db62894aa4453efa9db7ea425d0aab263da 100644 (file)
@@ -65,8 +65,6 @@ static inline void tcp_orphan_count_dec(void)
        this_cpu_dec(tcp_orphan_count);
 }
 
-DECLARE_PER_CPU(u32, tcp_tw_isn);
-
 void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
@@ -1102,10 +1100,13 @@ struct tcp_skb_cb {
        __u32           seq;            /* Starting sequence number     */
        __u32           end_seq;        /* SEQ + FIN + SYN + datalen    */
        union {
-               /* Note :
+               /* Notes :
+                *      tcp_tw_isn is used in input path only
+                *      (isn chosen by tcp_timewait_state_process())
                 *        tcp_gso_segs/size are used in write queue only,
                 *        cf tcp_skb_pcount()/tcp_skb_mss()
                 */
+               u32             tcp_tw_isn;
                struct {
                        u16     tcp_gso_segs;
                        u16     tcp_gso_size;
index 432fa28e47d4c8ef5d50339bfdf7da0ea8772b94..389a7cc17110daa5b3b490b3c339e53e212969f8 100644 (file)
@@ -299,9 +299,6 @@ enum {
 DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
 EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
 
-DEFINE_PER_CPU(u32, tcp_tw_isn);
-EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
-
 long sysctl_tcp_mem[3] __read_mostly;
 
 DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
index d5c9e65d97606d8eb57aba8ebc2373adf1bed62b..de9f68a9c0cf04109101b0d1bca20440376d4b05 100644 (file)
@@ -7589,6 +7589,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                     struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_fastopen_cookie foc = { .len = -1 };
+       u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
        struct tcp_options_received tmp_opt;
        const struct tcp_sock *tp = tcp_sk(sk);
        struct net *net = sock_net(sk);
@@ -7599,20 +7600,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
        struct dst_entry *dst;
        struct flowi fl;
        u8 syncookies;
-       u32 isn;
 
 #ifdef CONFIG_TCP_AO
        const struct tcp_ao_hdr *aoh;
 #endif
 
-       isn = __this_cpu_read(tcp_tw_isn);
-       if (isn) {
-               /* TW buckets are converted to open requests without
-                * limitations, they conserve resources and peer is
-                * evidently real one.
-                */
-               __this_cpu_write(tcp_tw_isn, 0);
-       } else {
+       /* If isn is non-zero, this SYN originally matched a TIME_WAIT socket.
+        * TW sockets are converted to open requests without limitations,
+        * we skip the queue limits and syncookie checks in the block below.
+        */
+       if (!isn) {
                syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
 
                if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
index c0526cc0398049fb34b5de20a1175d54942e80cd..fdc81150ff6cf938b1971c33b2b997e5d0d8fcaa 100644 (file)
@@ -2198,6 +2198,7 @@ lookup:
                }
        }
 
+       isn = 0;
 process:
        if (static_branch_unlikely(&ip4_min_ttl)) {
                /* min_ttl can be changed concurrently from do_ip_setsockopt() */
@@ -2227,6 +2228,7 @@ process:
        th = (const struct tcphdr *)skb->data;
        iph = ip_hdr(skb);
        tcp_v4_fill_cb(skb, iph, th);
+       TCP_SKB_CB(skb)->tcp_tw_isn = isn;
 
        skb->dev = NULL;
 
@@ -2313,7 +2315,6 @@ do_time_wait:
                        sk = sk2;
                        tcp_v4_restore_cb(skb);
                        refcounted = false;
-                       __this_cpu_write(tcp_tw_isn, isn);
                        goto process;
                }
 
index d13d49bfef19457cc5902cb556605a80f4c0ab2c..36d75fb50a70b728fedb7c316e023758bd61d62c 100644 (file)
@@ -1839,6 +1839,7 @@ lookup:
                }
        }
 
+       isn = 0;
 process:
        if (static_branch_unlikely(&ip6_min_hopcount)) {
                /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
@@ -1868,6 +1869,7 @@ process:
        th = (const struct tcphdr *)skb->data;
        hdr = ipv6_hdr(skb);
        tcp_v6_fill_cb(skb, hdr, th);
+       TCP_SKB_CB(skb)->tcp_tw_isn = isn;
 
        skb->dev = NULL;
 
@@ -1956,7 +1958,6 @@ do_time_wait:
                        sk = sk2;
                        tcp_v6_restore_cb(skb);
                        refcounted = false;
-                       __this_cpu_write(tcp_tw_isn, isn);
                        goto process;
                }