]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
rds: filter RDS_INFO_* getsockopt by caller's netns
authorMaoyi Xie <maoyixie.tju@gmail.com>
Wed, 20 May 2026 08:42:36 +0000 (16:42 +0800)
committerJakub Kicinski <kuba@kernel.org>
Mon, 25 May 2026 19:18:42 +0000 (12:18 -0700)
The RDS_INFO_* family of getsockopt(2) options reads several
file-scope global lists that are not per-netns:

  rds_sock_info / rds6_sock_info,
  rds_sock_inc_info / rds6_sock_inc_info        -> rds_sock_list
  rds_tcp_tc_info / rds6_tcp_tc_info            -> rds_tcp_tc_list
  rds_conn_info / rds6_conn_info,
  rds_conn_message_info_cmn (for the *_SEND_MESSAGES and
  *_RETRANS_MESSAGES variants),
  rds_for_each_conn_info (for RDS_INFO_IB_CONNECTIONS)
                                                -> rds_conn_hash[]

The handlers do not filter by the caller's network namespace.
rds_info_getsockopt() has no netns or capable() check, and
rds_create() has no capable() check, so AF_RDS is reachable from
an unprivileged user namespace. As a result, an unprivileged
caller in a fresh user_ns plus netns can read the bound address
and sock inode of every RDS socket on the host, the peer address
of incoming messages on every RDS socket on the host, the peer
address and TCP sequence numbers of every rds-tcp connection on
the host, and the peer address and RDS sequence numbers of every
RDS connection on the host.

The rds-tcp transport is reachable from a non-initial netns (see
rds_set_transport()), so a one-shot init_net gate at
rds_info_getsockopt() would deny legitimate per-netns visibility
to rds-tcp callers. Instead, filter at each handler by comparing
the netns of the caller's socket to the netns of the list entry,
or to rds_conn_net(conn) for connection paths. Only copy entries
whose netns matches the caller. Counters (RDS_INFO_COUNTERS) are
aggregate statistics and remain global.

Reproducer (KASAN VM, rds and rds_tcp loaded): an AF_RDS socket
binds 127.0.0.1:4242 in init_net as root. A child process enters
a fresh user_ns plus netns and opens AF_RDS there, then calls
getsockopt(SOL_RDS, RDS_INFO_SOCKETS). Before this change, the
child sees the init_net socket. After this change, the child
sees zero entries.

Drop the rds_sock_count, rds_tcp_tc_count, and rds6_tcp_tc_count
globals. v2 used them for the size precheck and lens->nr; v3
replaced the precheck with a per-ns count from a first pass over
the list, so the globals have no remaining readers. The matching
increments and decrements in rds_create()/rds_destroy_sock() and
rds_tcp_set_callbacks()/rds_tcp_restore_callbacks() go away with
them. Reported by the kernel test robot under clang W=1.

Suggested-by: Allison Henderson <achender@kernel.org>
Suggested-by: Simon Horman <horms@kernel.org>
Reviewed-by: Allison Henderson <achender@kernel.org>
Co-developed-by: Praveen Kakkolangara <praveen.kakkolangara@aumovio.com>
Signed-off-by: Praveen Kakkolangara <praveen.kakkolangara@aumovio.com>
Signed-off-by: Maoyi Xie <maoyixie.tju@gmail.com>
Link: https://patch.msgid.link/20260520084236.2724349-1-maoyixie.tju@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/rds/af_rds.c
net/rds/connection.c
net/rds/tcp.c

index 93b2da63ed42ec1b669cd11532e455f6777d347b..6f4f9cf352bdd55d79b6773e36af09e52f7282f0 100644 (file)
@@ -43,7 +43,6 @@
 
 /* this is just used for stats gathering :/ */
 static DEFINE_SPINLOCK(rds_sock_lock);
-static unsigned long rds_sock_count;
 static LIST_HEAD(rds_sock_list);
 DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq);
 
@@ -82,7 +81,6 @@ static int rds_release(struct socket *sock)
 
        spin_lock_bh(&rds_sock_lock);
        list_del_init(&rs->rs_item);
-       rds_sock_count--;
        spin_unlock_bh(&rds_sock_lock);
 
        rds_trans_put(rs->rs_transport);
@@ -694,7 +692,6 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 
        spin_lock_bh(&rds_sock_lock);
        list_add_tail(&rs->rs_item, &rds_sock_list);
-       rds_sock_count++;
        spin_unlock_bh(&rds_sock_lock);
 
        return 0;
@@ -735,6 +732,7 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
                              struct rds_info_iterator *iter,
                              struct rds_info_lengths *lens)
 {
+       struct net *net = sock_net(sock->sk);
        struct rds_sock *rs;
        struct rds_incoming *inc;
        unsigned int total = 0;
@@ -744,6 +742,9 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
        spin_lock_bh(&rds_sock_lock);
 
        list_for_each_entry(rs, &rds_sock_list, rs_item) {
+               /* Only show sockets in the caller's netns. */
+               if (!net_eq(sock_net(rds_rs_to_sk(rs)), net))
+                       continue;
                /* This option only supports IPv4 sockets. */
                if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
                        continue;
@@ -774,6 +775,7 @@ static void rds6_sock_inc_info(struct socket *sock, unsigned int len,
                               struct rds_info_iterator *iter,
                               struct rds_info_lengths *lens)
 {
+       struct net *net = sock_net(sock->sk);
        struct rds_incoming *inc;
        unsigned int total = 0;
        struct rds_sock *rs;
@@ -783,6 +785,9 @@ static void rds6_sock_inc_info(struct socket *sock, unsigned int len,
        spin_lock_bh(&rds_sock_lock);
 
        list_for_each_entry(rs, &rds_sock_list, rs_item) {
+               /* Only show sockets in the caller's netns. */
+               if (!net_eq(sock_net(rds_rs_to_sk(rs)), net))
+                       continue;
                read_lock(&rs->rs_recv_lock);
 
                list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
@@ -806,7 +811,9 @@ static void rds_sock_info(struct socket *sock, unsigned int len,
                          struct rds_info_iterator *iter,
                          struct rds_info_lengths *lens)
 {
+       struct net *net = sock_net(sock->sk);
        struct rds_info_socket sinfo;
+       unsigned int copied = 0;
        unsigned int cnt = 0;
        struct rds_sock *rs;
 
@@ -814,12 +821,24 @@ static void rds_sock_info(struct socket *sock, unsigned int len,
 
        spin_lock_bh(&rds_sock_lock);
 
-       if (len < rds_sock_count) {
-               cnt = rds_sock_count;
-               goto out;
+       /* First pass: count entries visible in the caller's netns. */
+       list_for_each_entry(rs, &rds_sock_list, rs_item) {
+               if (!net_eq(sock_net(rds_rs_to_sk(rs)), net))
+                       continue;
+               if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
+                       continue;
+               cnt++;
        }
 
+       if (len < cnt)
+               goto out;
+
        list_for_each_entry(rs, &rds_sock_list, rs_item) {
+               if (copied >= cnt)
+                       break;
+               /* Only show sockets in the caller's netns. */
+               if (!net_eq(sock_net(rds_rs_to_sk(rs)), net))
+                       continue;
                /* This option only supports IPv4 sockets. */
                if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
                        continue;
@@ -832,8 +851,13 @@ static void rds_sock_info(struct socket *sock, unsigned int len,
                sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
 
                rds_info_copy(iter, &sinfo, sizeof(sinfo));
-               cnt++;
+               copied++;
        }
+       /* A concurrent rds_bind() can change rs_bound_addr between the
+        * two passes without holding rds_sock_lock, so copied may be
+        * less than cnt. Report what was actually copied.
+        */
+       cnt = copied;
 
 out:
        lens->nr = cnt;
@@ -847,17 +871,32 @@ static void rds6_sock_info(struct socket *sock, unsigned int len,
                           struct rds_info_iterator *iter,
                           struct rds_info_lengths *lens)
 {
+       struct net *net = sock_net(sock->sk);
        struct rds6_info_socket sinfo6;
+       unsigned int copied = 0;
+       unsigned int cnt = 0;
        struct rds_sock *rs;
 
        len /= sizeof(struct rds6_info_socket);
 
        spin_lock_bh(&rds_sock_lock);
 
-       if (len < rds_sock_count)
+       /* First pass: count entries visible in the caller's netns. */
+       list_for_each_entry(rs, &rds_sock_list, rs_item) {
+               if (!net_eq(sock_net(rds_rs_to_sk(rs)), net))
+                       continue;
+               cnt++;
+       }
+
+       if (len < cnt)
                goto out;
 
        list_for_each_entry(rs, &rds_sock_list, rs_item) {
+               if (copied >= cnt)
+                       break;
+               /* Only show sockets in the caller's netns. */
+               if (!net_eq(sock_net(rds_rs_to_sk(rs)), net))
+                       continue;
                sinfo6.sndbuf = rds_sk_sndbuf(rs);
                sinfo6.rcvbuf = rds_sk_rcvbuf(rs);
                sinfo6.bound_addr = rs->rs_bound_addr;
@@ -867,10 +906,12 @@ static void rds6_sock_info(struct socket *sock, unsigned int len,
                sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs));
 
                rds_info_copy(iter, &sinfo6, sizeof(sinfo6));
+               copied++;
        }
+       cnt = copied;
 
  out:
-       lens->nr = rds_sock_count;
+       lens->nr = cnt;
        lens->each = sizeof(struct rds6_info_socket);
 
        spin_unlock_bh(&rds_sock_lock);
index c10b7ed06c49f8f9caf5a1447c3ade9728c965f8..7c8ab8e973e1bc09211a7a97fa33ea900a819770 100644 (file)
@@ -568,6 +568,7 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
                                      struct rds_info_lengths *lens,
                                      int want_send, bool isv6)
 {
+       struct net *net = sock_net(sock->sk);
        struct hlist_head *head;
        struct list_head *list;
        struct rds_connection *conn;
@@ -590,6 +591,9 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
                        struct rds_conn_path *cp;
                        int npaths;
 
+                       /* Only show connections in the caller's netns. */
+                       if (!net_eq(rds_conn_net(conn), net))
+                               continue;
                        if (!isv6 && conn->c_isv6)
                                continue;
 
@@ -688,6 +692,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
                          u64 *buffer,
                          size_t item_len)
 {
+       struct net *net = sock_net(sock->sk);
        struct hlist_head *head;
        struct rds_connection *conn;
        size_t i;
@@ -700,6 +705,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
        for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
             i++, head++) {
                hlist_for_each_entry_rcu(conn, head, c_hash_node) {
+                       /* Only show connections in the caller's netns. */
+                       if (!net_eq(rds_conn_net(conn), net))
+                               continue;
 
                        /* Zero the per-item buffer before handing it to the
                         * visitor so any field the visitor does not write -
@@ -733,6 +741,7 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
                                    u64 *buffer,
                                    size_t item_len)
 {
+       struct net *net = sock_net(sock->sk);
        struct hlist_head *head;
        struct rds_connection *conn;
        size_t i;
@@ -747,6 +756,10 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
                hlist_for_each_entry_rcu(conn, head, c_hash_node) {
                        struct rds_conn_path *cp;
 
+                       /* Only show connections in the caller's netns. */
+                       if (!net_eq(rds_conn_net(conn), net))
+                               continue;
+
                        /* XXX We only copy the information from the first
                         * path for now.  The problem is that if there are
                         * more than one underlying paths, we cannot report
index 5830b31a1f37b3e380a58eab043ea3162d18fae5..a1de114d5e2e01aa3edbc8fc1a66ecf095582a6f 100644 (file)
 static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
 static LIST_HEAD(rds_tcp_tc_list);
 
-/* rds_tcp_tc_count counts only IPv4 connections.
- * rds6_tcp_tc_count counts both IPv4 and IPv6 connections.
- */
-static unsigned int rds_tcp_tc_count;
-#if IS_ENABLED(CONFIG_IPV6)
-static unsigned int rds6_tcp_tc_count;
-#endif
-
 /* Track rds_tcp_connection structs so they can be cleaned up */
 static DEFINE_SPINLOCK(rds_tcp_conn_lock);
 static LIST_HEAD(rds_tcp_conn_list);
@@ -110,11 +102,6 @@ void rds_tcp_restore_callbacks(struct socket *sock,
        /* done under the callback_lock to serialize with write_space */
        spin_lock(&rds_tcp_tc_list_lock);
        list_del_init(&tc->t_list_item);
-#if IS_ENABLED(CONFIG_IPV6)
-       rds6_tcp_tc_count--;
-#endif
-       if (!tc->t_cpath->cp_conn->c_isv6)
-               rds_tcp_tc_count--;
        spin_unlock(&rds_tcp_tc_list_lock);
 
        tc->t_sock = NULL;
@@ -206,11 +193,6 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
        spin_lock(&rds_tcp_tc_list_lock);
        tc->t_sock = sock;
        list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
-#if IS_ENABLED(CONFIG_IPV6)
-       rds6_tcp_tc_count++;
-#endif
-       if (!tc->t_cpath->cp_conn->c_isv6)
-               rds_tcp_tc_count++;
        spin_unlock(&rds_tcp_tc_list_lock);
 
        /* accepted sockets need our listen data ready undone */
@@ -238,20 +220,37 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
                            struct rds_info_iterator *iter,
                            struct rds_info_lengths *lens)
 {
+       struct net *net = sock_net(rds_sock->sk);
        struct rds_info_tcp_socket tsinfo;
        struct rds_tcp_connection *tc;
+       unsigned int copied = 0;
+       unsigned int cnt = 0;
        unsigned long flags;
 
        spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
 
-       if (len / sizeof(tsinfo) < rds_tcp_tc_count)
+       /* First pass: count entries visible in the caller's netns. */
+       list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+               if (tc->t_cpath->cp_conn->c_isv6)
+                       continue;
+               if (!net_eq(rds_conn_net(tc->t_cpath->cp_conn), net))
+                       continue;
+               cnt++;
+       }
+
+       if (len / sizeof(tsinfo) < cnt)
                goto out;
 
        list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
                struct inet_sock *inet = inet_sk(tc->t_sock->sk);
 
+               if (copied >= cnt)
+                       break;
                if (tc->t_cpath->cp_conn->c_isv6)
                        continue;
+               /* Only show connections in the caller's netns. */
+               if (!net_eq(rds_conn_net(tc->t_cpath->cp_conn), net))
+                       continue;
 
                tsinfo.local_addr = inet->inet_saddr;
                tsinfo.local_port = inet->inet_sport;
@@ -266,10 +265,12 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
                tsinfo.tos = tc->t_cpath->cp_conn->c_tos;
 
                rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
+               copied++;
        }
+       cnt = copied;
 
 out:
-       lens->nr = rds_tcp_tc_count;
+       lens->nr = cnt;
        lens->each = sizeof(tsinfo);
 
        spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
@@ -284,19 +285,35 @@ static void rds6_tcp_tc_info(struct socket *sock, unsigned int len,
                             struct rds_info_iterator *iter,
                             struct rds_info_lengths *lens)
 {
+       struct net *net = sock_net(sock->sk);
        struct rds6_info_tcp_socket tsinfo6;
        struct rds_tcp_connection *tc;
+       unsigned int copied = 0;
+       unsigned int cnt = 0;
        unsigned long flags;
 
        spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
 
-       if (len / sizeof(tsinfo6) < rds6_tcp_tc_count)
+       /* First pass: count entries visible in the caller's netns. */
+       list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+               if (!net_eq(rds_conn_net(tc->t_cpath->cp_conn), net))
+                       continue;
+               cnt++;
+       }
+
+       if (len / sizeof(tsinfo6) < cnt)
                goto out;
 
        list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
                struct sock *sk = tc->t_sock->sk;
                struct inet_sock *inet = inet_sk(sk);
 
+               if (copied >= cnt)
+                       break;
+               /* Only show connections in the caller's netns. */
+               if (!net_eq(rds_conn_net(tc->t_cpath->cp_conn), net))
+                       continue;
+
                tsinfo6.local_addr = sk->sk_v6_rcv_saddr;
                tsinfo6.local_port = inet->inet_sport;
                tsinfo6.peer_addr = sk->sk_v6_daddr;
@@ -309,10 +326,12 @@ static void rds6_tcp_tc_info(struct socket *sock, unsigned int len,
                tsinfo6.last_seen_una = tc->t_last_seen_una;
 
                rds_info_copy(iter, &tsinfo6, sizeof(tsinfo6));
+               copied++;
        }
+       cnt = copied;
 
 out:
-       lens->nr = rds6_tcp_tc_count;
+       lens->nr = cnt;
        lens->each = sizeof(tsinfo6);
 
        spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);