]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
tcp: drop secpath at the same time as we currently drop dst
authorSabrina Dubroca <sd@queasysnail.net>
Mon, 17 Feb 2025 10:23:35 +0000 (11:23 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Feb 2025 12:10:49 +0000 (04:10 -0800)
[ Upstream commit 9b6412e6979f6f9e0632075f8f008937b5cd4efd ]

Xiumei reported hitting the WARN in xfrm6_tunnel_net_exit while
running tests that boil down to:
 - create a pair of netns
 - run a basic TCP test over ipcomp6
 - delete the pair of netns

The xfrm_state found on spi_byaddr was not deleted at the time we
delete the netns, because we still have a reference on it. This
lingering reference comes from a secpath (which holds a ref on the
xfrm_state), which is still attached to an skb. This skb is not
leaked, it ends up on sk_receive_queue and then gets defer-free'd by
skb_attempt_defer_free.

The problem happens when we defer freeing an skb (push it on one CPU's
defer_list), and don't flush that list before the netns is deleted. In
that case, we still have a reference on the xfrm_state that we don't
expect at this point.

We already drop the skb's dst in the TCP receive path when it's no
longer needed, so let's also drop the secpath. At this point,
tcp_filter has already called into the LSM hooks that may require the
secpath, so it should not be needed anymore. However, in some of those
places, the MPTCP extension has just been attached to the skb, so we
cannot simply drop all extensions.

Fixes: 68822bdf76f1 ("net: generalize skb freeing deferral to per-cpu lists")
Reported-by: Xiumei Mu <xmu@redhat.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/5055ba8f8f72bdcb602faa299faca73c280b7735.1739743613.git.sd@queasysnail.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
include/net/tcp.h
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c

index b3917af309e0f1c16423f70378cad5cda6bb08b3..78c755414fa879936b0117aea172dd81c09d3f79 100644 (file)
@@ -40,6 +40,7 @@
 #include <net/inet_ecn.h>
 #include <net/dst.h>
 #include <net/mptcp.h>
+#include <net/xfrm.h>
 
 #include <linux/seq_file.h>
 #include <linux/memcontrol.h>
@@ -630,6 +631,19 @@ void tcp_fin(struct sock *sk);
 void tcp_check_space(struct sock *sk);
 void tcp_sack_compress_send_ack(struct sock *sk);
 
+static inline void tcp_cleanup_skb(struct sk_buff *skb)
+{
+       skb_dst_drop(skb);
+       secpath_reset(skb);
+}
+
+static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb)
+{
+       DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
+       DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb));
+       __skb_queue_tail(&sk->sk_receive_queue, skb);
+}
+
 /* tcp_timer.c */
 void tcp_init_xmit_timers(struct sock *);
 static inline void tcp_clear_xmit_timers(struct sock *sk)
index 0f523cbfe329efeaee2ef206b0779e9911ef22cd..32b28fc21b63c0d1db3b457dda6e4afe86af0f44 100644 (file)
@@ -178,7 +178,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
        if (!skb)
                return;
 
-       skb_dst_drop(skb);
+       tcp_cleanup_skb(skb);
        /* segs_in has been initialized to 1 in tcp_create_openreq_child().
         * Hence, reset segs_in to 0 before calling tcp_segs_in()
         * to avoid double counting.  Also, tcp_segs_in() expects
@@ -195,7 +195,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
        TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
 
        tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-       __skb_queue_tail(&sk->sk_receive_queue, skb);
+       tcp_add_receive_queue(sk, skb);
        tp->syn_data_acked = 1;
 
        /* u64_stats_update_begin(&tp->syncp) not needed here,
index 6074b4c3ab94036bc92ff5298d51b573a5783ca1..10d38ec0ff5acd5e5cb49d3875b1dbd105adeae1 100644 (file)
@@ -4874,7 +4874,7 @@ static void tcp_ofo_queue(struct sock *sk)
                tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
                if (!eaten)
-                       __skb_queue_tail(&sk->sk_receive_queue, skb);
+                       tcp_add_receive_queue(sk, skb);
                else
                        kfree_skb_partial(skb, fragstolen);
 
@@ -5065,7 +5065,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
                                  skb, fragstolen)) ? 1 : 0;
        tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
        if (!eaten) {
-               __skb_queue_tail(&sk->sk_receive_queue, skb);
+               tcp_add_receive_queue(sk, skb);
                skb_set_owner_r(skb, sk);
        }
        return eaten;
@@ -5148,7 +5148,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
                __kfree_skb(skb);
                return;
        }
-       skb_dst_drop(skb);
+       tcp_cleanup_skb(skb);
        __skb_pull(skb, tcp_hdr(skb)->doff * 4);
 
        reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -6098,7 +6098,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
                        NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
 
                        /* Bulk data transfer: receiver */
-                       skb_dst_drop(skb);
+                       tcp_cleanup_skb(skb);
                        __skb_pull(skb, tcp_header_len);
                        eaten = tcp_queue_rcv(sk, skb, &fragstolen);
 
index 705320f160ac86a340474fa464b51bd30e4f48b4..2f49a504c9d3e3cd307b04b88bf234a65d0f9bf8 100644 (file)
@@ -1842,7 +1842,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
         */
        skb_condense(skb);
 
-       skb_dst_drop(skb);
+       tcp_cleanup_skb(skb);
 
        if (unlikely(tcp_checksum_complete(skb))) {
                bh_unlock_sock(sk);