]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
net: better track kernel sockets lifetime
authorEric Dumazet <edumazet@google.com>
Tue, 12 Aug 2025 18:40:16 +0000 (14:40 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 28 Aug 2025 14:28:39 +0000 (16:28 +0200)
[ Upstream commit 5c70eb5c593d64d93b178905da215a9fd288a4b5 ]

While kernel sockets are dismantled during pernet_operations->exit(),
their freeing can be delayed by any tx packets still held in qdisc
or device queues, due to skb_set_owner_w() prior calls.

This then trigger the following warning from ref_tracker_dir_exit() [1]

To fix this, make sure that kernel sockets own a reference on net->passive.

Add sk_net_refcnt_upgrade() helper, used whenever a kernel socket
is converted to a refcounted one.

[1]

[  136.263918][   T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at
[  136.263918][   T35]      sk_alloc+0x2b3/0x370
[  136.263918][   T35]      inet6_create+0x6ce/0x10f0
[  136.263918][   T35]      __sock_create+0x4c0/0xa30
[  136.263918][   T35]      inet_ctl_sock_create+0xc2/0x250
[  136.263918][   T35]      igmp6_net_init+0x39/0x390
[  136.263918][   T35]      ops_init+0x31e/0x590
[  136.263918][   T35]      setup_net+0x287/0x9e0
[  136.263918][   T35]      copy_net_ns+0x33f/0x570
[  136.263918][   T35]      create_new_namespaces+0x425/0x7b0
[  136.263918][   T35]      unshare_nsproxy_namespaces+0x124/0x180
[  136.263918][   T35]      ksys_unshare+0x57d/0xa70
[  136.263918][   T35]      __x64_sys_unshare+0x38/0x40
[  136.263918][   T35]      do_syscall_64+0xf3/0x230
[  136.263918][   T35]      entry_SYSCALL_64_after_hwframe+0x77/0x7f
[  136.263918][   T35]
[  136.343488][   T35] ref_tracker: net notrefcnt@ffff8880638f01e0 has 1/2 users at
[  136.343488][   T35]      sk_alloc+0x2b3/0x370
[  136.343488][   T35]      inet6_create+0x6ce/0x10f0
[  136.343488][   T35]      __sock_create+0x4c0/0xa30
[  136.343488][   T35]      inet_ctl_sock_create+0xc2/0x250
[  136.343488][   T35]      ndisc_net_init+0xa7/0x2b0
[  136.343488][   T35]      ops_init+0x31e/0x590
[  136.343488][   T35]      setup_net+0x287/0x9e0
[  136.343488][   T35]      copy_net_ns+0x33f/0x570
[  136.343488][   T35]      create_new_namespaces+0x425/0x7b0
[  136.343488][   T35]      unshare_nsproxy_namespaces+0x124/0x180
[  136.343488][   T35]      ksys_unshare+0x57d/0xa70
[  136.343488][   T35]      __x64_sys_unshare+0x38/0x40
[  136.343488][   T35]      do_syscall_64+0xf3/0x230
[  136.343488][   T35]      entry_SYSCALL_64_after_hwframe+0x77/0x7f

Fixes: 0cafd77dcd03 ("net: add a refcount tracker for kernel sockets")
Reported-by: syzbot+30a19e01a97420719891@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/67b72aeb.050a0220.14d86d.0283.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250220131854.4048077-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/net/sock.h
net/core/sock.c
net/mptcp/subflow.c
net/netlink/af_netlink.c
net/rds/tcp.c
net/smc/af_smc.c
net/sunrpc/svcsock.c
net/sunrpc/xprtsock.c

index e15bea43b2ecd152fc294b9b8a6f726ee30bedf6..b5f7208a9ec38359b769ff63e69ac460edb97e9b 100644 (file)
@@ -1859,6 +1859,7 @@ static inline bool sock_allow_reclassification(const struct sock *csk)
 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                      struct proto *prot, int kern);
 void sk_free(struct sock *sk);
+void sk_net_refcnt_upgrade(struct sock *sk);
 void sk_destruct(struct sock *sk);
 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
 void sk_free_unlock_clone(struct sock *sk);
index ec48690b5174ebfbc708e63df8167e95fb177cf9..b74bc8175937e21933ef1dc50b8131073657f69d 100644 (file)
@@ -2159,6 +2159,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                        get_net_track(net, &sk->ns_tracker, priority);
                        sock_inuse_add(net, 1);
                } else {
+                       net_passive_inc(net);
                        __netns_tracker_alloc(net, &sk->ns_tracker,
                                              false, priority);
                }
@@ -2183,6 +2184,7 @@ EXPORT_SYMBOL(sk_alloc);
 static void __sk_destruct(struct rcu_head *head)
 {
        struct sock *sk = container_of(head, struct sock, sk_rcu);
+       struct net *net = sock_net(sk);
        struct sk_filter *filter;
 
        if (sk->sk_destruct)
@@ -2214,14 +2216,28 @@ static void __sk_destruct(struct rcu_head *head)
        put_cred(sk->sk_peer_cred);
        put_pid(sk->sk_peer_pid);
 
-       if (likely(sk->sk_net_refcnt))
-               put_net_track(sock_net(sk), &sk->ns_tracker);
-       else
-               __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
-
+       if (likely(sk->sk_net_refcnt)) {
+               put_net_track(net, &sk->ns_tracker);
+       } else {
+               __netns_tracker_free(net, &sk->ns_tracker, false);
+               net_passive_dec(net);
+       }
        sk_prot_free(sk->sk_prot_creator, sk);
 }
 
+void sk_net_refcnt_upgrade(struct sock *sk)
+{
+       struct net *net = sock_net(sk);
+
+       WARN_ON_ONCE(sk->sk_net_refcnt);
+       __netns_tracker_free(net, &sk->ns_tracker, false);
+       net_passive_dec(net);
+       sk->sk_net_refcnt = 1;
+       get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+       sock_inuse_add(net, 1);
+}
+EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade);
+
 void sk_destruct(struct sock *sk)
 {
        bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
@@ -2313,6 +2329,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                 * is not properly dismantling its kernel sockets at netns
                 * destroy time.
                 */
+               net_passive_inc(sock_net(newsk));
                __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
                                      false, priority);
        }
index a01ea18283c726de79e78abae67fccc108dff4fe..0c9b9c0c277c237a435dff850f33294ff54d9f58 100644 (file)
@@ -1715,10 +1715,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
         * needs it.
         * Update ns_tracker to current stack trace and refcounted tracker.
         */
-       __netns_tracker_free(net, &sf->sk->ns_tracker, false);
-       sf->sk->sk_net_refcnt = 1;
-       get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
-       sock_inuse_add(net, 1);
+       sk_net_refcnt_upgrade(sf->sk);
        err = tcp_set_ulp(sf->sk, "mptcp");
 
 release_ssk:
index 53c9c56e6f25742db6df9219e2265f3b12f13ab4..a5ffda87daf63bc6ffad62e15bb947773f1de140 100644 (file)
@@ -798,16 +798,6 @@ static int netlink_release(struct socket *sock)
 
        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
 
-       /* Because struct net might disappear soon, do not keep a pointer. */
-       if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) {
-               __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
-               /* Because of deferred_put_nlk_sk and use of work queue,
-                * it is possible  netns will be freed before this socket.
-                */
-               sock_net_set(sk, &init_net);
-               __netns_tracker_alloc(&init_net, &sk->ns_tracker,
-                                     false, GFP_KERNEL);
-       }
        call_rcu(&nlk->rcu, deferred_put_nlk_sk);
        return 0;
 }
index 2dba7505b41489d10d5a48ccb97e619eae1ff6d1..985b05f38b6746ca5c0cd07f8545913fea69ecfb 100644 (file)
@@ -503,12 +503,8 @@ bool rds_tcp_tune(struct socket *sock)
                        release_sock(sk);
                        return false;
                }
-               /* Update ns_tracker to current stack trace and refcounted tracker */
-               __netns_tracker_free(net, &sk->ns_tracker, false);
-
-               sk->sk_net_refcnt = 1;
-               netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
-               sock_inuse_add(net, 1);
+               sk_net_refcnt_upgrade(sk);
+               put_net(net);
        }
        rtn = net_generic(net, rds_tcp_netid);
        if (rtn->sndbuf_size > 0) {
index 45efbbfff94ae28b9457eb9c595a623c3ffdde64..8f75bb9d165aaeebe2215aa67fc9148e650e7ba5 100644 (file)
@@ -3343,10 +3343,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family)
         * which need net ref.
         */
        sk = smc->clcsock->sk;
-       __netns_tracker_free(net, &sk->ns_tracker, false);
-       sk->sk_net_refcnt = 1;
-       get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
-       sock_inuse_add(net, 1);
+       sk_net_refcnt_upgrade(sk);
        return 0;
 }
 
index 7229b4a9ad1dcf29ef81e89555b9b87620217567..78b139d8c1f3b9754e53579b51d70e8658411ed0 100644 (file)
@@ -1579,10 +1579,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
        newlen = error;
 
        if (protocol == IPPROTO_TCP) {
-               __netns_tracker_free(net, &sock->sk->ns_tracker, false);
-               sock->sk->sk_net_refcnt = 1;
-               get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL);
-               sock_inuse_add(net, 1);
+               sk_net_refcnt_upgrade(sock->sk);
                if ((error = kernel_listen(sock, 64)) < 0)
                        goto bummer;
        }
index 99bb3e762af46f0ca77276b779b21f31a5e44081..8b27a21f3b42d86a3c7ede571605658c59f8bf22 100644 (file)
@@ -1941,12 +1941,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
                goto out;
        }
 
-       if (protocol == IPPROTO_TCP) {
-               __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false);
-               sock->sk->sk_net_refcnt = 1;
-               get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL);
-               sock_inuse_add(xprt->xprt_net, 1);
-       }
+       if (protocol == IPPROTO_TCP)
+               sk_net_refcnt_upgrade(sock->sk);
 
        filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
        if (IS_ERR(filp))