]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 4.19
authorSasha Levin <sashal@kernel.org>
Sat, 11 May 2024 23:37:43 +0000 (19:37 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 11 May 2024 23:37:43 +0000 (19:37 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.19/af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch [new file with mode: 0644]
queue-4.19/af_unix-fix-garbage-collector-racing-against-connect.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch b/queue-4.19/af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch
new file mode 100644 (file)
index 0000000..cafb642
--- /dev/null
@@ -0,0 +1,146 @@
+From d319cb5cfb4bf5e99ad8ed80e34c2d2330e2bbaf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 09:08:53 -0800
+Subject: af_unix: Do not use atomic ops for unix_sk(sk)->inflight.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 ]
+
+When touching unix_sk(sk)->inflight, we are always under
+spin_lock(&unix_gc_lock).
+
+Let's convert unix_sk(sk)->inflight to the normal unsigned long.
+
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/af_unix.h |  2 +-
+ net/unix/af_unix.c    |  4 ++--
+ net/unix/garbage.c    | 17 ++++++++---------
+ net/unix/scm.c        |  8 +++++---
+ 4 files changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/include/net/af_unix.h b/include/net/af_unix.h
+index e514508bdc928..f22ab1a7b6bab 100644
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -52,7 +52,7 @@ struct unix_sock {
+       struct mutex            iolock, bindlock;
+       struct sock             *peer;
+       struct list_head        link;
+-      atomic_long_t           inflight;
++      unsigned long           inflight;
+       spinlock_t              lock;
+       unsigned long           gc_flags;
+ #define UNIX_GC_CANDIDATE     0
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 7910b9c88d8b7..921b7e355b9b9 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -814,11 +814,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
+       sk->sk_write_space      = unix_write_space;
+       sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
+       sk->sk_destruct         = unix_sock_destructor;
+-      u         = unix_sk(sk);
++      u = unix_sk(sk);
++      u->inflight = 0;
+       u->path.dentry = NULL;
+       u->path.mnt = NULL;
+       spin_lock_init(&u->lock);
+-      atomic_long_set(&u->inflight, 0);
+       INIT_LIST_HEAD(&u->link);
+       mutex_init(&u->iolock); /* single task reading lock */
+       mutex_init(&u->bindlock); /* single task binding lock */
+diff --git a/net/unix/garbage.c b/net/unix/garbage.c
+index 0a212422b513c..04dd9e80cbe01 100644
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -171,17 +171,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
+ static void dec_inflight(struct unix_sock *usk)
+ {
+-      atomic_long_dec(&usk->inflight);
++      usk->inflight--;
+ }
+ static void inc_inflight(struct unix_sock *usk)
+ {
+-      atomic_long_inc(&usk->inflight);
++      usk->inflight++;
+ }
+ static void inc_inflight_move_tail(struct unix_sock *u)
+ {
+-      atomic_long_inc(&u->inflight);
++      u->inflight++;
++
+       /* If this still might be part of a cycle, move it to the end
+        * of the list, so that it's checked even if it was already
+        * passed over
+@@ -241,14 +242,12 @@ void unix_gc(void)
+        */
+       list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+               long total_refs;
+-              long inflight_refs;
+               total_refs = file_count(u->sk.sk_socket->file);
+-              inflight_refs = atomic_long_read(&u->inflight);
+-              BUG_ON(inflight_refs < 1);
+-              BUG_ON(total_refs < inflight_refs);
+-              if (total_refs == inflight_refs) {
++              BUG_ON(!u->inflight);
++              BUG_ON(total_refs < u->inflight);
++              if (total_refs == u->inflight) {
+                       list_move_tail(&u->link, &gc_candidates);
+                       __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
+                       __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+@@ -275,7 +274,7 @@ void unix_gc(void)
+               /* Move cursor to after the current position. */
+               list_move(&cursor, &u->link);
+-              if (atomic_long_read(&u->inflight) > 0) {
++              if (u->inflight) {
+                       list_move_tail(&u->link, &not_cycle_list);
+                       __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+                       scan_children(&u->sk, inc_inflight_move_tail, NULL);
+diff --git a/net/unix/scm.c b/net/unix/scm.c
+index ac206bfdbbe3c..186c20826a14f 100644
+--- a/net/unix/scm.c
++++ b/net/unix/scm.c
+@@ -50,12 +50,13 @@ void unix_inflight(struct user_struct *user, struct file *fp)
+       if (s) {
+               struct unix_sock *u = unix_sk(s);
+-              if (atomic_long_inc_return(&u->inflight) == 1) {
++              if (!u->inflight) {
+                       BUG_ON(!list_empty(&u->link));
+                       list_add_tail(&u->link, &gc_inflight_list);
+               } else {
+                       BUG_ON(list_empty(&u->link));
+               }
++              u->inflight++;
+               /* Paired with READ_ONCE() in wait_for_unix_gc() */
+               WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+       }
+@@ -72,10 +73,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
+       if (s) {
+               struct unix_sock *u = unix_sk(s);
+-              BUG_ON(!atomic_long_read(&u->inflight));
++              BUG_ON(!u->inflight);
+               BUG_ON(list_empty(&u->link));
+-              if (atomic_long_dec_and_test(&u->inflight))
++              u->inflight--;
++              if (!u->inflight)
+                       list_del_init(&u->link);
+               /* Paired with READ_ONCE() in wait_for_unix_gc() */
+               WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+-- 
+2.43.0
+
diff --git a/queue-4.19/af_unix-fix-garbage-collector-racing-against-connect.patch b/queue-4.19/af_unix-fix-garbage-collector-racing-against-connect.patch
new file mode 100644 (file)
index 0000000..6460773
--- /dev/null
@@ -0,0 +1,122 @@
+From f7ca4e9d021afdd0a63cd047dded4e75bd2aedf1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:09:39 +0200
+Subject: af_unix: Fix garbage collector racing against connect()
+
+From: Michal Luczaj <mhal@rbox.co>
+
+[ Upstream commit 47d8ac011fe1c9251070e1bd64cb10b48193ec51 ]
+
+Garbage collector does not take into account the risk of embryo getting
+enqueued during the garbage collection. If such embryo has a peer that
+carries SCM_RIGHTS, two consecutive passes of scan_children() may see a
+different set of children. Leading to an incorrectly elevated inflight
+count, and then a dangling pointer within the gc_inflight_list.
+
+sockets are AF_UNIX/SOCK_STREAM
+S is an unconnected socket
+L is a listening in-flight socket bound to addr, not in fdtable
+V's fd will be passed via sendmsg(), gets inflight count bumped
+
+connect(S, addr)       sendmsg(S, [V]); close(V)       __unix_gc()
+----------------       -------------------------       -----------
+
+NS = unix_create1()
+skb1 = sock_wmalloc(NS)
+L = unix_find_other(addr)
+unix_state_lock(L)
+unix_peer(S) = NS
+                       // V count=1 inflight=0
+
+                       NS = unix_peer(S)
+                       skb2 = sock_alloc()
+                       skb_queue_tail(NS, skb2[V])
+
+                       // V became in-flight
+                       // V count=2 inflight=1
+
+                       close(V)
+
+                       // V count=1 inflight=1
+                       // GC candidate condition met
+
+                                               for u in gc_inflight_list:
+                                                 if (total_refs == inflight_refs)
+                                                   add u to gc_candidates
+
+                                               // gc_candidates={L, V}
+
+                                               for u in gc_candidates:
+                                                 scan_children(u, dec_inflight)
+
+                                               // embryo (skb1) was not
+                                               // reachable from L yet, so V's
+                                               // inflight remains unchanged
+__skb_queue_tail(L, skb1)
+unix_state_unlock(L)
+                                               for u in gc_candidates:
+                                                 if (u.inflight)
+                                                   scan_children(u, inc_inflight_move_tail)
+
+                                               // V count=1 inflight=2 (!)
+
+If there is a GC-candidate listening socket, lock/unlock its state. This
+makes GC wait until the end of any ongoing connect() to that socket. After
+flipping the lock, a possibly SCM-laden embryo is already enqueued. And if
+there is another embryo coming, it can not possibly carry SCM_RIGHTS. At
+this point, unix_inflight() can not happen because unix_gc_lock is already
+taken. Inflight graph remains unaffected.
+
+Fixes: 1fd05ba5a2f2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/garbage.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/net/unix/garbage.c b/net/unix/garbage.c
+index 04dd9e80cbe01..a3a49110fe06f 100644
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -239,11 +239,22 @@ void unix_gc(void)
+        * receive queues.  Other, non candidate sockets _can_ be
+        * added to queue, so we must make sure only to touch
+        * candidates.
++       *
++       * Embryos, though never candidates themselves, affect which
++       * candidates are reachable by the garbage collector.  Before
++       * being added to a listener's queue, an embryo may already
++       * receive data carrying SCM_RIGHTS, potentially making the
++       * passed socket a candidate that is not yet reachable by the
++       * collector.  It becomes reachable once the embryo is
++       * enqueued.  Therefore, we must ensure that no SCM-laden
++       * embryo appears in a (candidate) listener's queue between
++       * consecutive scan_children() calls.
+        */
+       list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
++              struct sock *sk = &u->sk;
+               long total_refs;
+-              total_refs = file_count(u->sk.sk_socket->file);
++              total_refs = file_count(sk->sk_socket->file);
+               BUG_ON(!u->inflight);
+               BUG_ON(total_refs < u->inflight);
+@@ -251,6 +262,11 @@ void unix_gc(void)
+                       list_move_tail(&u->link, &gc_candidates);
+                       __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
+                       __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
++
++                      if (sk->sk_state == TCP_LISTEN) {
++                              unix_state_lock(sk);
++                              unix_state_unlock(sk);
++                      }
+               }
+       }
+-- 
+2.43.0
+
index dc22b8b540043bbe331748708e4ac64bde68835d..405031620e31ab23c01f42a13ff243f9e019b2bf 100644 (file)
@@ -51,3 +51,5 @@ rtnetlink-correct-nested-ifla_vf_vlan_list-attribute.patch
 phonet-fix-rtm_phonet_notify-skb-allocation.patch
 net-bridge-fix-corrupted-ethernet-header-on-multicas.patch
 ipv6-fib6_rules-avoid-possible-null-dereference-in-f.patch
+af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch
+af_unix-fix-garbage-collector-racing-against-connect.patch