--- /dev/null
+From d319cb5cfb4bf5e99ad8ed80e34c2d2330e2bbaf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 09:08:53 -0800
+Subject: af_unix: Do not use atomic ops for unix_sk(sk)->inflight.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 ]
+
+When touching unix_sk(sk)->inflight, we are always under
+spin_lock(&unix_gc_lock).
+
+Let's convert unix_sk(sk)->inflight to the normal unsigned long.
+
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/af_unix.h | 2 +-
+ net/unix/af_unix.c | 4 ++--
+ net/unix/garbage.c | 17 ++++++++---------
+ net/unix/scm.c | 8 +++++---
+ 4 files changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/include/net/af_unix.h b/include/net/af_unix.h
+index e514508bdc928..f22ab1a7b6bab 100644
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -52,7 +52,7 @@ struct unix_sock {
+ struct mutex iolock, bindlock;
+ struct sock *peer;
+ struct list_head link;
+- atomic_long_t inflight;
++ unsigned long inflight;
+ spinlock_t lock;
+ unsigned long gc_flags;
+ #define UNIX_GC_CANDIDATE 0
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 7910b9c88d8b7..921b7e355b9b9 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -814,11 +814,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
+ sk->sk_write_space = unix_write_space;
+ sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
+ sk->sk_destruct = unix_sock_destructor;
+- u = unix_sk(sk);
++ u = unix_sk(sk);
++ u->inflight = 0;
+ u->path.dentry = NULL;
+ u->path.mnt = NULL;
+ spin_lock_init(&u->lock);
+- atomic_long_set(&u->inflight, 0);
+ INIT_LIST_HEAD(&u->link);
+ mutex_init(&u->iolock); /* single task reading lock */
+ mutex_init(&u->bindlock); /* single task binding lock */
+diff --git a/net/unix/garbage.c b/net/unix/garbage.c
+index 0a212422b513c..04dd9e80cbe01 100644
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -171,17 +171,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
+
+ static void dec_inflight(struct unix_sock *usk)
+ {
+- atomic_long_dec(&usk->inflight);
++ usk->inflight--;
+ }
+
+ static void inc_inflight(struct unix_sock *usk)
+ {
+- atomic_long_inc(&usk->inflight);
++ usk->inflight++;
+ }
+
+ static void inc_inflight_move_tail(struct unix_sock *u)
+ {
+- atomic_long_inc(&u->inflight);
++ u->inflight++;
++
+ /* If this still might be part of a cycle, move it to the end
+ * of the list, so that it's checked even if it was already
+ * passed over
+@@ -241,14 +242,12 @@ void unix_gc(void)
+ */
+ list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+ long total_refs;
+- long inflight_refs;
+
+ total_refs = file_count(u->sk.sk_socket->file);
+- inflight_refs = atomic_long_read(&u->inflight);
+
+- BUG_ON(inflight_refs < 1);
+- BUG_ON(total_refs < inflight_refs);
+- if (total_refs == inflight_refs) {
++ BUG_ON(!u->inflight);
++ BUG_ON(total_refs < u->inflight);
++ if (total_refs == u->inflight) {
+ list_move_tail(&u->link, &gc_candidates);
+ __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
+ __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+@@ -275,7 +274,7 @@ void unix_gc(void)
+ /* Move cursor to after the current position. */
+ list_move(&cursor, &u->link);
+
+- if (atomic_long_read(&u->inflight) > 0) {
++ if (u->inflight) {
+ list_move_tail(&u->link, ¬_cycle_list);
+ __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+ scan_children(&u->sk, inc_inflight_move_tail, NULL);
+diff --git a/net/unix/scm.c b/net/unix/scm.c
+index ac206bfdbbe3c..186c20826a14f 100644
+--- a/net/unix/scm.c
++++ b/net/unix/scm.c
+@@ -50,12 +50,13 @@ void unix_inflight(struct user_struct *user, struct file *fp)
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+- if (atomic_long_inc_return(&u->inflight) == 1) {
++ if (!u->inflight) {
+ BUG_ON(!list_empty(&u->link));
+ list_add_tail(&u->link, &gc_inflight_list);
+ } else {
+ BUG_ON(list_empty(&u->link));
+ }
++ u->inflight++;
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+ }
+@@ -72,10 +73,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+- BUG_ON(!atomic_long_read(&u->inflight));
++ BUG_ON(!u->inflight);
+ BUG_ON(list_empty(&u->link));
+
+- if (atomic_long_dec_and_test(&u->inflight))
++ u->inflight--;
++ if (!u->inflight)
+ list_del_init(&u->link);
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+--
+2.43.0
+
--- /dev/null
+From f7ca4e9d021afdd0a63cd047dded4e75bd2aedf1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:09:39 +0200
+Subject: af_unix: Fix garbage collector racing against connect()
+
+From: Michal Luczaj <mhal@rbox.co>
+
+[ Upstream commit 47d8ac011fe1c9251070e1bd64cb10b48193ec51 ]
+
+Garbage collector does not take into account the risk of embryo getting
+enqueued during the garbage collection. If such embryo has a peer that
+carries SCM_RIGHTS, two consecutive passes of scan_children() may see a
+different set of children. Leading to an incorrectly elevated inflight
+count, and then a dangling pointer within the gc_inflight_list.
+
+sockets are AF_UNIX/SOCK_STREAM
+S is an unconnected socket
+L is a listening in-flight socket bound to addr, not in fdtable
+V's fd will be passed via sendmsg(), gets inflight count bumped
+
+connect(S, addr) sendmsg(S, [V]); close(V) __unix_gc()
+---------------- ------------------------- -----------
+
+NS = unix_create1()
+skb1 = sock_wmalloc(NS)
+L = unix_find_other(addr)
+unix_state_lock(L)
+unix_peer(S) = NS
+ // V count=1 inflight=0
+
+ NS = unix_peer(S)
+ skb2 = sock_alloc()
+ skb_queue_tail(NS, skb2[V])
+
+ // V became in-flight
+ // V count=2 inflight=1
+
+ close(V)
+
+ // V count=1 inflight=1
+ // GC candidate condition met
+
+ for u in gc_inflight_list:
+ if (total_refs == inflight_refs)
+ add u to gc_candidates
+
+ // gc_candidates={L, V}
+
+ for u in gc_candidates:
+ scan_children(u, dec_inflight)
+
+ // embryo (skb1) was not
+ // reachable from L yet, so V's
+ // inflight remains unchanged
+__skb_queue_tail(L, skb1)
+unix_state_unlock(L)
+ for u in gc_candidates:
+ if (u.inflight)
+ scan_children(u, inc_inflight_move_tail)
+
+ // V count=1 inflight=2 (!)
+
+If there is a GC-candidate listening socket, lock/unlock its state. This
+makes GC wait until the end of any ongoing connect() to that socket. After
+flipping the lock, a possibly SCM-laden embryo is already enqueued. And if
+there is another embryo coming, it can not possibly carry SCM_RIGHTS. At
+this point, unix_inflight() can not happen because unix_gc_lock is already
+taken. Inflight graph remains unaffected.
+
+Fixes: 1fd05ba5a2f2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/garbage.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/net/unix/garbage.c b/net/unix/garbage.c
+index 04dd9e80cbe01..a3a49110fe06f 100644
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -239,11 +239,22 @@ void unix_gc(void)
+ * receive queues. Other, non candidate sockets _can_ be
+ * added to queue, so we must make sure only to touch
+ * candidates.
++ *
++ * Embryos, though never candidates themselves, affect which
++ * candidates are reachable by the garbage collector. Before
++ * being added to a listener's queue, an embryo may already
++ * receive data carrying SCM_RIGHTS, potentially making the
++ * passed socket a candidate that is not yet reachable by the
++ * collector. It becomes reachable once the embryo is
++ * enqueued. Therefore, we must ensure that no SCM-laden
++ * embryo appears in a (candidate) listener's queue between
++ * consecutive scan_children() calls.
+ */
+ list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
++ struct sock *sk = &u->sk;
+ long total_refs;
+
+- total_refs = file_count(u->sk.sk_socket->file);
++ total_refs = file_count(sk->sk_socket->file);
+
+ BUG_ON(!u->inflight);
+ BUG_ON(total_refs < u->inflight);
+@@ -251,6 +262,11 @@ void unix_gc(void)
+ list_move_tail(&u->link, &gc_candidates);
+ __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
+ __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
++
++ if (sk->sk_state == TCP_LISTEN) {
++ unix_state_lock(sk);
++ unix_state_unlock(sk);
++ }
+ }
+ }
+
+--
+2.43.0
+