From: Sasha Levin Date: Mon, 17 Jun 2024 11:30:31 +0000 (-0400) Subject: Fixes for 6.6 X-Git-Tag: v6.1.95~112 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=881507e1dd179119715e550436f7c6e4a003f6a1;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.6 Signed-off-by: Sasha Levin --- diff --git a/queue-6.6/af_unix-annotate-data-race-of-sk-sk_state-in-unix_ac.patch b/queue-6.6/af_unix-annotate-data-race-of-sk-sk_state-in-unix_ac.patch new file mode 100644 index 00000000000..93a4dbda12c --- /dev/null +++ b/queue-6.6/af_unix-annotate-data-race-of-sk-sk_state-in-unix_ac.patch @@ -0,0 +1,40 @@ +From 5faedee2d8eb25d77d1905f64f88003a96d57acb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Jun 2024 09:52:32 -0700 +Subject: af_unix: Annotate data-race of sk->sk_state in unix_accept(). + +From: Kuniyuki Iwashima + +[ Upstream commit 1b536948e805aab61a48c5aa5db10c9afee880bd ] + +Once sk->sk_state is changed to TCP_LISTEN, it never changes. + +unix_accept() takes the advantage and reads sk->sk_state without +holding unix_state_lock(). + +Let's use READ_ONCE() there. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/unix/af_unix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index ab57fa0595e21..5fb705ce15ea5 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1711,7 +1711,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, + goto out; + + arg->err = -EINVAL; +- if (sk->sk_state != TCP_LISTEN) ++ if (READ_ONCE(sk->sk_state) != TCP_LISTEN) + goto out; + + /* If socket state is TCP_LISTEN it cannot change (for now...), +-- +2.43.0 + diff --git a/queue-6.6/af_unix-read-with-msg_peek-loops-if-the-first-unread.patch b/queue-6.6/af_unix-read-with-msg_peek-loops-if-the-first-unread.patch new file mode 100644 index 00000000000..93fabc30133 --- /dev/null +++ b/queue-6.6/af_unix-read-with-msg_peek-loops-if-the-first-unread.patch @@ -0,0 +1,85 @@ +From 0287273ab0d26c26b43ded5238b074546416829a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jun 2024 01:46:39 -0700 +Subject: af_unix: Read with MSG_PEEK loops if the first unread byte is OOB + +From: Rao Shoaib + +[ Upstream commit a6736a0addd60fccc3a3508461d72314cc609772 ] + +Read with MSG_PEEK flag loops if the first byte to read is an OOB byte. +commit 22dd70eb2c3d ("af_unix: Don't peek OOB data without MSG_OOB.") +addresses the loop issue but does not address the issue that no data +beyond OOB byte can be read. + +>>> from socket import * +>>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) +>>> c1.send(b'a', MSG_OOB) +1 +>>> c1.send(b'b') +1 +>>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) +b'b' + +>>> from socket import * +>>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) +>>> c2.setsockopt(SOL_SOCKET, SO_OOBINLINE, 1) +>>> c1.send(b'a', MSG_OOB) +1 +>>> c1.send(b'b') +1 +>>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) +b'a' +>>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) +b'a' +>>> c2.recv(1, MSG_DONTWAIT) +b'a' +>>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) +b'b' +>>> + +Fixes: 314001f0bf92 ("af_unix: Add OOB support") +Signed-off-by: Rao Shoaib +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20240611084639.2248934-1-Rao.Shoaib@oracle.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/unix/af_unix.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 5fb705ce15ea5..5ec188209bdd5 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2600,18 +2600,18 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, + if (skb == u->oob_skb) { + if (copied) { + skb = NULL; +- } else if (sock_flag(sk, SOCK_URGINLINE)) { +- if (!(flags & MSG_PEEK)) { ++ } else if (!(flags & MSG_PEEK)) { ++ if (sock_flag(sk, SOCK_URGINLINE)) { + WRITE_ONCE(u->oob_skb, NULL); + consume_skb(skb); ++ } else { ++ __skb_unlink(skb, &sk->sk_receive_queue); ++ WRITE_ONCE(u->oob_skb, NULL); ++ unlinked_skb = skb; ++ skb = skb_peek(&sk->sk_receive_queue); + } +- } else if (flags & MSG_PEEK) { +- skb = NULL; +- } else { +- __skb_unlink(skb, &sk->sk_receive_queue); +- WRITE_ONCE(u->oob_skb, NULL); +- unlinked_skb = skb; +- skb = skb_peek(&sk->sk_receive_queue); ++ } else if (!sock_flag(sk, SOCK_URGINLINE)) { ++ skb = skb_peek_next(skb, &sk->sk_receive_queue); + } + } + +-- +2.43.0 + diff --git a/queue-6.6/af_unix-replace-bug_on-with-warn_on_once.patch b/queue-6.6/af_unix-replace-bug_on-with-warn_on_once.patch new file mode 100644 index 00000000000..4f4325715b0 --- /dev/null +++ b/queue-6.6/af_unix-replace-bug_on-with-warn_on_once.patch @@ -0,0 +1,87 @@ +From 17c0bc83b42a1e9feef661539a4ad1e1c09dd936 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 Jan 2024 11:04:33 -0800 +Subject: af_unix: Replace BUG_ON() with WARN_ON_ONCE(). + +From: Kuniyuki Iwashima + +[ Upstream commit d0f6dc26346863e1f4a23117f5468614e54df064 ] + +This is a prep patch for the last patch in this series so that +checkpatch will not warn about BUG_ON(). + +Signed-off-by: Kuniyuki Iwashima +Acked-by: Jens Axboe +Link: https://lore.kernel.org/r/20240129190435.57228-2-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 1b536948e805 ("af_unix: Annotate data-race of sk->sk_state in unix_accept().") +Signed-off-by: Sasha Levin +--- + net/unix/garbage.c | 8 ++++---- + net/unix/scm.c | 8 ++++---- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/net/unix/garbage.c b/net/unix/garbage.c +index 96cc6b7674333..b4bf7f7538826 100644 +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -145,7 +145,7 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), + /* An embryo cannot be in-flight, so it's safe + * to use the list link. + */ +- BUG_ON(!list_empty(&u->link)); ++ WARN_ON_ONCE(!list_empty(&u->link)); + list_add_tail(&u->link, &embryos); + } + spin_unlock(&x->sk_receive_queue.lock); +@@ -224,8 +224,8 @@ static void __unix_gc(struct work_struct *work) + + total_refs = file_count(sk->sk_socket->file); + +- BUG_ON(!u->inflight); +- BUG_ON(total_refs < u->inflight); ++ WARN_ON_ONCE(!u->inflight); ++ WARN_ON_ONCE(total_refs < u->inflight); + if (total_refs == u->inflight) { + list_move_tail(&u->link, &gc_candidates); + __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); +@@ -318,7 +318,7 @@ static void __unix_gc(struct work_struct *work) + list_move_tail(&u->link, &gc_inflight_list); + + /* All candidates should have been detached by now. */ +- BUG_ON(!list_empty(&gc_candidates)); ++ WARN_ON_ONCE(!list_empty(&gc_candidates)); + + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, false); +diff --git a/net/unix/scm.c b/net/unix/scm.c +index b5ae5ab167773..505e56cf02a21 100644 +--- a/net/unix/scm.c ++++ b/net/unix/scm.c +@@ -51,10 +51,10 @@ void unix_inflight(struct user_struct *user, struct file *fp) + + if (u) { + if (!u->inflight) { +- BUG_ON(!list_empty(&u->link)); ++ WARN_ON_ONCE(!list_empty(&u->link)); + list_add_tail(&u->link, &gc_inflight_list); + } else { +- BUG_ON(list_empty(&u->link)); ++ WARN_ON_ONCE(list_empty(&u->link)); + } + u->inflight++; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ +@@ -71,8 +71,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp) + spin_lock(&unix_gc_lock); + + if (u) { +- BUG_ON(!u->inflight); +- BUG_ON(list_empty(&u->link)); ++ WARN_ON_ONCE(!u->inflight); ++ WARN_ON_ONCE(list_empty(&u->link)); + + u->inflight--; + if (!u->inflight) +-- +2.43.0 + diff --git a/queue-6.6/af_unix-return-struct-unix_sock-from-unix_get_socket.patch b/queue-6.6/af_unix-return-struct-unix_sock-from-unix_get_socket.patch new file mode 100644 index 00000000000..0c95e262a26 --- /dev/null +++ b/queue-6.6/af_unix-return-struct-unix_sock-from-unix_get_socket.patch @@ -0,0 +1,135 @@ +From 267ecb9a5c2758115014adbfcc140dfd1dac18e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 09:08:54 -0800 +Subject: af_unix: Return struct unix_sock from unix_get_socket(). + +From: Kuniyuki Iwashima + +[ Upstream commit 5b17307bd0789edea0675d524a2b277b93bbde62 ] + +Currently, unix_get_socket() returns struct sock, but after calling +it, we always cast it to unix_sk(). + +Let's return struct unix_sock from unix_get_socket(). + +Signed-off-by: Kuniyuki Iwashima +Acked-by: Pavel Begunkov +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20240123170856.41348-4-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 1b536948e805 ("af_unix: Annotate data-race of sk->sk_state in unix_accept().") +Signed-off-by: Sasha Levin +--- + include/net/af_unix.h | 2 +- + net/unix/garbage.c | 19 +++++++------------ + net/unix/scm.c | 19 +++++++------------ + 3 files changed, 15 insertions(+), 25 deletions(-) + +diff --git a/include/net/af_unix.h b/include/net/af_unix.h +index 77bf30203d3cf..7a00d7ed527b6 100644 +--- a/include/net/af_unix.h ++++ b/include/net/af_unix.h +@@ -14,7 +14,7 @@ void unix_destruct_scm(struct sk_buff *skb); + void io_uring_destruct_scm(struct sk_buff *skb); + void unix_gc(void); + void wait_for_unix_gc(void); +-struct sock *unix_get_socket(struct file *filp); ++struct unix_sock *unix_get_socket(struct file *filp); + struct sock *unix_peer_get(struct sock *sk); + + #define UNIX_HASH_MOD (256 - 1) +diff --git a/net/unix/garbage.c b/net/unix/garbage.c +index 2a758531e1027..38639766b9e7c 100644 +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -105,20 +105,15 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), + + while (nfd--) { + /* Get the socket the fd matches if it indeed does so */ +- struct sock *sk = unix_get_socket(*fp++); ++ struct unix_sock *u = unix_get_socket(*fp++); + +- if (sk) { +- struct unix_sock *u = unix_sk(sk); ++ /* Ignore non-candidates, they could have been added ++ * to the queues after starting the garbage collection ++ */ ++ if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { ++ hit = true; + +- /* Ignore non-candidates, they could +- * have been added to the queues after +- * starting the garbage collection +- */ +- if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { +- hit = true; +- +- func(u); +- } ++ func(u); + } + } + if (hit && hitlist != NULL) { +diff --git a/net/unix/scm.c b/net/unix/scm.c +index e92f2fad64105..b5ae5ab167773 100644 +--- a/net/unix/scm.c ++++ b/net/unix/scm.c +@@ -21,9 +21,8 @@ EXPORT_SYMBOL(gc_inflight_list); + DEFINE_SPINLOCK(unix_gc_lock); + EXPORT_SYMBOL(unix_gc_lock); + +-struct sock *unix_get_socket(struct file *filp) ++struct unix_sock *unix_get_socket(struct file *filp) + { +- struct sock *u_sock = NULL; + struct inode *inode = file_inode(filp); + + /* Socket ? */ +@@ -34,10 +33,10 @@ struct sock *unix_get_socket(struct file *filp) + + /* PF_UNIX ? */ + if (s && ops && ops->family == PF_UNIX) +- u_sock = s; ++ return unix_sk(s); + } + +- return u_sock; ++ return NULL; + } + EXPORT_SYMBOL(unix_get_socket); + +@@ -46,13 +45,11 @@ EXPORT_SYMBOL(unix_get_socket); + */ + void unix_inflight(struct user_struct *user, struct file *fp) + { +- struct sock *s = unix_get_socket(fp); ++ struct unix_sock *u = unix_get_socket(fp); + + spin_lock(&unix_gc_lock); + +- if (s) { +- struct unix_sock *u = unix_sk(s); +- ++ if (u) { + if (!u->inflight) { + BUG_ON(!list_empty(&u->link)); + list_add_tail(&u->link, &gc_inflight_list); +@@ -69,13 +66,11 @@ void unix_inflight(struct user_struct *user, struct file *fp) + + void unix_notinflight(struct user_struct *user, struct file *fp) + { +- struct sock *s = unix_get_socket(fp); ++ struct unix_sock *u = unix_get_socket(fp); + + spin_lock(&unix_gc_lock); + +- if (s) { +- struct unix_sock *u = unix_sk(s); +- ++ if (u) { + BUG_ON(!u->inflight); + BUG_ON(list_empty(&u->link)); + +-- +2.43.0 + diff --git a/queue-6.6/af_unix-run-gc-on-only-one-cpu.patch b/queue-6.6/af_unix-run-gc-on-only-one-cpu.patch new file mode 100644 index 00000000000..b9fc302c80b --- /dev/null +++ b/queue-6.6/af_unix-run-gc-on-only-one-cpu.patch @@ -0,0 +1,133 @@ +From 98ec559b1b6f1e877ae9fa39c646ecfcdc6d6342 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 09:08:55 -0800 +Subject: af_unix: Run GC on only one CPU. + +From: Kuniyuki Iwashima + +[ Upstream commit 8b90a9f819dc2a06baae4ec1a64d875e53b824ec ] + +If more than 16000 inflight AF_UNIX sockets exist and the garbage +collector is not running, unix_(dgram|stream)_sendmsg() call unix_gc(). +Also, they wait for unix_gc() to complete. + +In unix_gc(), all inflight AF_UNIX sockets are traversed at least once, +and more if they are the GC candidate. Thus, sendmsg() significantly +slows down with too many inflight AF_UNIX sockets. + +There is a small window to invoke multiple unix_gc() instances, which +will then be blocked by the same spinlock except for one. + +Let's convert unix_gc() to use struct work so that it will not consume +CPUs unnecessarily. + +Note WRITE_ONCE(gc_in_progress, true) is moved before running GC. +If we leave the WRITE_ONCE() as is and use the following test to +call flush_work(), a process might not call it. + + CPU 0 CPU 1 + --- --- + start work and call __unix_gc() + if (work_pending(&unix_gc_work) || <-- false + READ_ONCE(gc_in_progress)) <-- false + flush_work(); <-- missed! + WRITE_ONCE(gc_in_progress, true) + +Signed-off-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20240123170856.41348-5-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 1b536948e805 ("af_unix: Annotate data-race of sk->sk_state in unix_accept().") +Signed-off-by: Sasha Levin +--- + net/unix/garbage.c | 54 +++++++++++++++++++++++----------------------- + 1 file changed, 27 insertions(+), 27 deletions(-) + +diff --git a/net/unix/garbage.c b/net/unix/garbage.c +index 38639766b9e7c..a2a8543613a52 100644 +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -86,7 +86,6 @@ + /* Internal data structures and random procedures: */ + + static LIST_HEAD(gc_candidates); +-static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); + + static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), + struct sk_buff_head *hitlist) +@@ -182,23 +181,8 @@ static void inc_inflight_move_tail(struct unix_sock *u) + } + + static bool gc_in_progress; +-#define UNIX_INFLIGHT_TRIGGER_GC 16000 +- +-void wait_for_unix_gc(void) +-{ +- /* If number of inflight sockets is insane, +- * force a garbage collect right now. +- * Paired with the WRITE_ONCE() in unix_inflight(), +- * unix_notinflight() and gc_in_progress(). +- */ +- if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && +- !READ_ONCE(gc_in_progress)) +- unix_gc(); +- wait_event(unix_gc_wait, !READ_ONCE(gc_in_progress)); +-} + +-/* The external entry point: unix_gc() */ +-void unix_gc(void) ++static void __unix_gc(struct work_struct *work) + { + struct sk_buff *next_skb, *skb; + struct unix_sock *u; +@@ -209,13 +193,6 @@ void unix_gc(void) + + spin_lock(&unix_gc_lock); + +- /* Avoid a recursive GC. */ +- if (gc_in_progress) +- goto out; +- +- /* Paired with READ_ONCE() in wait_for_unix_gc(). */ +- WRITE_ONCE(gc_in_progress, true); +- + /* First, select candidates for garbage collection. Only + * in-flight sockets are considered, and from those only ones + * which don't have any external reference. +@@ -346,8 +323,31 @@ void unix_gc(void) + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, false); + +- wake_up(&unix_gc_wait); +- +- out: + spin_unlock(&unix_gc_lock); + } ++ ++static DECLARE_WORK(unix_gc_work, __unix_gc); ++ ++void unix_gc(void) ++{ ++ WRITE_ONCE(gc_in_progress, true); ++ queue_work(system_unbound_wq, &unix_gc_work); ++} ++ ++#define UNIX_INFLIGHT_TRIGGER_GC 16000 ++ ++void wait_for_unix_gc(void) ++{ ++ /* If number of inflight sockets is insane, ++ * force a garbage collect right now. ++ * ++ * Paired with the WRITE_ONCE() in unix_inflight(), ++ * unix_notinflight(), and __unix_gc(). ++ */ ++ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && ++ !READ_ONCE(gc_in_progress)) ++ unix_gc(); ++ ++ if (READ_ONCE(gc_in_progress)) ++ flush_work(&unix_gc_work); ++} +-- +2.43.0 + diff --git a/queue-6.6/af_unix-save-listener-for-embryo-socket.patch b/queue-6.6/af_unix-save-listener-for-embryo-socket.patch new file mode 100644 index 00000000000..894da4fbf06 --- /dev/null +++ b/queue-6.6/af_unix-save-listener-for-embryo-socket.patch @@ -0,0 +1,83 @@ +From 85705757f4b5b569af40f4a66f9a4308ea950651 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Mar 2024 13:24:17 -0700 +Subject: af_unix: Save listener for embryo socket. + +From: Kuniyuki Iwashima + +[ Upstream commit aed6ecef55d70de3762ce41c561b7f547dbaf107 ] + +This is a prep patch for the following change, where we need to +fetch the listening socket from the successor embryo socket +during GC. + +We add a new field to struct unix_sock to save a pointer to a +listening socket. + +We set it when connect() creates a new socket, and clear it when +accept() is called. + +Signed-off-by: Kuniyuki Iwashima +Acked-by: Paolo Abeni +Link: https://lore.kernel.org/r/20240325202425.60930-8-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 1b536948e805 ("af_unix: Annotate data-race of sk->sk_state in unix_accept().") +Signed-off-by: Sasha Levin +--- + include/net/af_unix.h | 1 + + net/unix/af_unix.c | 5 ++++- + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/include/net/af_unix.h b/include/net/af_unix.h +index 865e2f7bd67cf..1aa30778d2c05 100644 +--- a/include/net/af_unix.h ++++ b/include/net/af_unix.h +@@ -62,6 +62,7 @@ struct unix_sock { + struct path path; + struct mutex iolock, bindlock; + struct sock *peer; ++ struct sock *listener; + struct list_head link; + unsigned long inflight; + spinlock_t lock; +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 868f5332566c7..9d48eef5d62e3 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -987,6 +987,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, + sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen); + sk->sk_destruct = unix_sock_destructor; + u = unix_sk(sk); ++ u->listener = NULL; + u->inflight = 0; + u->path.dentry = NULL; + u->path.mnt = NULL; +@@ -1606,6 +1607,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, + newsk->sk_type = sk->sk_type; + init_peercred(newsk); + newu = unix_sk(newsk); ++ newu->listener = other; + RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); + otheru = unix_sk(other); + +@@ -1701,8 +1703,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) + { + struct sock *sk = sock->sk; +- struct sock *tsk; + struct sk_buff *skb; ++ struct sock *tsk; + int err; + + err = -EOPNOTSUPP; +@@ -1727,6 +1729,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, + } + + tsk = skb->sk; ++ unix_sk(tsk)->listener = NULL; + skb_free_datagram(sk, skb); + wake_up_interruptible(&unix_sk(sk)->peer_wait); + +-- +2.43.0 + diff --git a/queue-6.6/af_unix-try-to-run-gc-async.patch b/queue-6.6/af_unix-try-to-run-gc-async.patch new file mode 100644 index 00000000000..0d4609d189b --- /dev/null +++ b/queue-6.6/af_unix-try-to-run-gc-async.patch @@ -0,0 +1,210 @@ +From 6f82fb6e3831ba0424dd146e087553b9bfbe294b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Jan 2024 09:08:56 -0800 +Subject: af_unix: Try to run GC async. + +From: Kuniyuki Iwashima + +[ Upstream commit d9f21b3613337b55cc9d4a6ead484dca68475143 ] + +If more than 16000 inflight AF_UNIX sockets exist and the garbage +collector is not running, unix_(dgram|stream)_sendmsg() call unix_gc(). +Also, they wait for unix_gc() to complete. + +In unix_gc(), all inflight AF_UNIX sockets are traversed at least once, +and more if they are the GC candidate. Thus, sendmsg() significantly +slows down with too many inflight AF_UNIX sockets. + +However, if a process sends data with no AF_UNIX FD, the sendmsg() call +does not need to wait for GC. After this change, only the process that +meets the condition below will be blocked under such a situation. + + 1) cmsg contains AF_UNIX socket + 2) more than 32 AF_UNIX sent by the same user are still inflight + +Note that even a sendmsg() call that does not meet the condition but has +AF_UNIX FD will be blocked later in unix_scm_to_skb() by the spinlock, +but we allow that as a bonus for sane users. + +The results below are the time spent in unix_dgram_sendmsg() sending 1 +byte of data with no FD 4096 times on a host where 32K inflight AF_UNIX +sockets exist. + +Without series: the sane sendmsg() needs to wait gc unreasonably. + + $ sudo /usr/share/bcc/tools/funclatency -p 11165 unix_dgram_sendmsg + Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end. + ^C + nsecs : count distribution + [...] + 524288 -> 1048575 : 0 | | + 1048576 -> 2097151 : 3881 |****************************************| + 2097152 -> 4194303 : 214 |** | + 4194304 -> 8388607 : 1 | | + + avg = 1825567 nsecs, total: 7477526027 nsecs, count: 4096 + +With series: the sane sendmsg() can finish much faster. + + $ sudo /usr/share/bcc/tools/funclatency -p 8702 unix_dgram_sendmsg + Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end. + ^C + nsecs : count distribution + [...] + 128 -> 255 : 0 | | + 256 -> 511 : 4092 |****************************************| + 512 -> 1023 : 2 | | + 1024 -> 2047 : 0 | | + 2048 -> 4095 : 0 | | + 4096 -> 8191 : 1 | | + 8192 -> 16383 : 1 | | + + avg = 410 nsecs, total: 1680510 nsecs, count: 4096 + +Signed-off-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20240123170856.41348-6-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 1b536948e805 ("af_unix: Annotate data-race of sk->sk_state in unix_accept().") +Signed-off-by: Sasha Levin +--- + include/net/af_unix.h | 12 ++++++++++-- + include/net/scm.h | 1 + + net/core/scm.c | 5 +++++ + net/unix/af_unix.c | 6 ++++-- + net/unix/garbage.c | 10 +++++++++- + 5 files changed, 29 insertions(+), 5 deletions(-) + +diff --git a/include/net/af_unix.h b/include/net/af_unix.h +index 7a00d7ed527b6..865e2f7bd67cf 100644 +--- a/include/net/af_unix.h ++++ b/include/net/af_unix.h +@@ -8,13 +8,21 @@ + #include + #include + ++#if IS_ENABLED(CONFIG_UNIX) ++struct unix_sock *unix_get_socket(struct file *filp); ++#else ++static inline struct unix_sock *unix_get_socket(struct file *filp) ++{ ++ return NULL; ++} ++#endif ++ + void unix_inflight(struct user_struct *user, struct file *fp); + void unix_notinflight(struct user_struct *user, struct file *fp); + void unix_destruct_scm(struct sk_buff *skb); + void io_uring_destruct_scm(struct sk_buff *skb); + void unix_gc(void); +-void wait_for_unix_gc(void); +-struct unix_sock *unix_get_socket(struct file *filp); ++void wait_for_unix_gc(struct scm_fp_list *fpl); + struct sock *unix_peer_get(struct sock *sk); + + #define UNIX_HASH_MOD (256 - 1) +diff --git a/include/net/scm.h b/include/net/scm.h +index e8c76b4be2fe7..1ff6a28550644 100644 +--- a/include/net/scm.h ++++ b/include/net/scm.h +@@ -24,6 +24,7 @@ struct scm_creds { + + struct scm_fp_list { + short count; ++ short count_unix; + short max; + struct user_struct *user; + struct file *fp[SCM_MAX_FD]; +diff --git a/net/core/scm.c b/net/core/scm.c +index 737917c7ac627..574607b1c2d96 100644 +--- a/net/core/scm.c ++++ b/net/core/scm.c +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + + + /* +@@ -85,6 +86,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) + return -ENOMEM; + *fplp = fpl; + fpl->count = 0; ++ fpl->count_unix = 0; + fpl->max = SCM_MAX_FD; + fpl->user = NULL; + } +@@ -109,6 +111,9 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) + fput(file); + return -EINVAL; + } ++ if (unix_get_socket(file)) ++ fpl->count_unix++; ++ + *fpp++ = file; + fpl->count++; + } +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index e6395647558af..868f5332566c7 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1908,11 +1908,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + long timeo; + int err; + +- wait_for_unix_gc(); + err = scm_send(sock, msg, &scm, false); + if (err < 0) + return err; + ++ wait_for_unix_gc(scm.fp); ++ + err = -EOPNOTSUPP; + if (msg->msg_flags&MSG_OOB) + goto out; +@@ -2180,11 +2181,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + bool fds_sent = false; + int data_len; + +- wait_for_unix_gc(); + err = scm_send(sock, msg, &scm, false); + if (err < 0) + return err; + ++ wait_for_unix_gc(scm.fp); ++ + err = -EOPNOTSUPP; + if (msg->msg_flags & MSG_OOB) { + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) +diff --git a/net/unix/garbage.c b/net/unix/garbage.c +index a2a8543613a52..96cc6b7674333 100644 +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -335,8 +335,9 @@ void unix_gc(void) + } + + #define UNIX_INFLIGHT_TRIGGER_GC 16000 ++#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8) + +-void wait_for_unix_gc(void) ++void wait_for_unix_gc(struct scm_fp_list *fpl) + { + /* If number of inflight sockets is insane, + * force a garbage collect right now. +@@ -348,6 +349,13 @@ void wait_for_unix_gc(void) + !READ_ONCE(gc_in_progress)) + unix_gc(); + ++ /* Penalise users who want to send AF_UNIX sockets ++ * but whose sockets have not been received yet. ++ */ ++ if (!fpl || !fpl->count_unix || ++ READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER) ++ return; ++ + if (READ_ONCE(gc_in_progress)) + flush_work(&unix_gc_work); + } +-- +2.43.0 + diff --git a/queue-6.6/block-fix-request.queuelist-usage-in-flush.patch b/queue-6.6/block-fix-request.queuelist-usage-in-flush.patch new file mode 100644 index 00000000000..769cbc0edce --- /dev/null +++ b/queue-6.6/block-fix-request.queuelist-usage-in-flush.patch @@ -0,0 +1,75 @@ +From b5cb31590b0082cd1c72973dfb8f25180a04e04b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 8 Jun 2024 22:31:15 +0800 +Subject: block: fix request.queuelist usage in flush + +From: Chengming Zhou + +[ Upstream commit d0321c812d89c5910d8da8e4b10c891c6b96ff70 ] + +Friedrich Weber reported a kernel crash problem and bisected to commit +81ada09cc25e ("blk-flush: reuse rq queuelist in flush state machine"). + +The root cause is that we use "list_move_tail(&rq->queuelist, pending)" +in the PREFLUSH/POSTFLUSH sequences. But rq->queuelist.next == xxx since +it's popped out from plug->cached_rq in __blk_mq_alloc_requests_batch(). +We don't initialize its queuelist just for this first request, although +the queuelist of all later popped requests will be initialized. + +Fix it by changing to use "list_add_tail(&rq->queuelist, pending)" so +rq->queuelist doesn't need to be initialized. It should be ok since rq +can't be on any list when PREFLUSH or POSTFLUSH, has no move actually. + +Please note the commit 81ada09cc25e ("blk-flush: reuse rq queuelist in +flush state machine") also has another requirement that no drivers would +touch rq->queuelist after blk_mq_end_request() since we will reuse it to +add rq to the post-flush pending list in POSTFLUSH. If this is not true, +we will have to revert that commit IMHO. + +This updated version adds "list_del_init(&rq->queuelist)" in flush rq +callback since the dm layer may submit request of a weird invalid format +(REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH), which causes double list_add +if without this "list_del_init(&rq->queuelist)". The weird invalid format +problem should be fixed in dm layer. + +Reported-by: Friedrich Weber +Closes: https://lore.kernel.org/lkml/14b89dfb-505c-49f7-aebb-01c54451db40@proxmox.com/ +Closes: https://lore.kernel.org/lkml/c9d03ff7-27c5-4ebd-b3f6-5a90d96f35ba@proxmox.com/ +Fixes: 81ada09cc25e ("blk-flush: reuse rq queuelist in flush state machine") +Cc: Christoph Hellwig +Cc: ming.lei@redhat.com +Cc: bvanassche@acm.org +Tested-by: Friedrich Weber +Signed-off-by: Chengming Zhou +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20240608143115.972486-1-chengming.zhou@linux.dev +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-flush.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/block/blk-flush.c b/block/blk-flush.c +index e73dc22d05c1d..313f0ffcce42e 100644 +--- a/block/blk-flush.c ++++ b/block/blk-flush.c +@@ -183,7 +183,7 @@ static void blk_flush_complete_seq(struct request *rq, + /* queue for flush */ + if (list_empty(pending)) + fq->flush_pending_since = jiffies; +- list_move_tail(&rq->queuelist, pending); ++ list_add_tail(&rq->queuelist, pending); + break; + + case REQ_FSEQ_DATA: +@@ -261,6 +261,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq, + unsigned int seq = blk_flush_cur_seq(rq); + + BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); ++ list_del_init(&rq->queuelist); + blk_flush_complete_seq(rq, fq, seq, error); + } + +-- +2.43.0 + diff --git a/queue-6.6/block-sed-opal-avoid-possible-wrong-address-referenc.patch b/queue-6.6/block-sed-opal-avoid-possible-wrong-address-referenc.patch new file mode 100644 index 00000000000..328e5a9f890 --- /dev/null +++ b/queue-6.6/block-sed-opal-avoid-possible-wrong-address-referenc.patch @@ -0,0 +1,42 @@ +From 305b8c60411a3d319afb1f4b1c7dbd2fc2d85226 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jun 2024 15:37:00 +0800 +Subject: block: sed-opal: avoid possible wrong address reference in + read_sed_opal_key() + +From: Su Hui + +[ Upstream commit 9b1ebce6a1fded90d4a1c6c57dc6262dac4c4c14 ] + +Clang static checker (scan-build) warning: +block/sed-opal.c:line 317, column 3 +Value stored to 'ret' is never read. + +Fix this problem by returning the error code when keyring_search() failed. +Otherwise, 'key' will have a wrong value when 'kerf' stores the error code. + +Fixes: 3bfeb6125664 ("block: sed-opal: keyring support for SED keys") +Signed-off-by: Su Hui +Link: https://lore.kernel.org/r/20240611073659.429582-1-suhui@nfschina.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/sed-opal.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/sed-opal.c b/block/sed-opal.c +index e27109be77690..1a1cb35bf4b79 100644 +--- a/block/sed-opal.c ++++ b/block/sed-opal.c +@@ -313,7 +313,7 @@ static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen) + &key_type_user, key_name, true); + + if (IS_ERR(kref)) +- ret = PTR_ERR(kref); ++ return PTR_ERR(kref); + + key = key_ref_to_ptr(kref); + down_read(&key->sem); +-- +2.43.0 + diff --git a/queue-6.6/bluetooth-fix-connection-setup-in-l2cap_connect.patch b/queue-6.6/bluetooth-fix-connection-setup-in-l2cap_connect.patch new file mode 100644 index 00000000000..37b1d6ed501 --- /dev/null +++ b/queue-6.6/bluetooth-fix-connection-setup-in-l2cap_connect.patch @@ -0,0 +1,45 @@ +From 6e073e1801b8427cd78293e94b77752ad519242f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Jun 2024 18:06:20 +0300 +Subject: Bluetooth: fix connection setup in l2cap_connect + +From: Pauli Virtanen + +[ Upstream commit c695439d198d30e10553a3b98360c5efe77b6903 ] + +The amp_id argument of l2cap_connect() was removed in +commit 84a4bb6548a2 ("Bluetooth: HCI: Remove HCI_AMP support") + +It was always called with amp_id == 0, i.e. AMP_ID_BREDR == 0x00 (ie. +non-AMP controller). In the above commit, the code path for amp_id != 0 +was preserved, although it should have used the amp_id == 0 one. + +Restore the previous behavior of the non-AMP code path, to fix problems +with L2CAP connections. + +Fixes: 84a4bb6548a2 ("Bluetooth: HCI: Remove HCI_AMP support") +Signed-off-by: Pauli Virtanen +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/l2cap_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index d5fb78c604cf3..bf31c5bae218f 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -4009,8 +4009,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, + status = L2CAP_CS_AUTHOR_PEND; + chan->ops->defer(chan); + } else { +- l2cap_state_change(chan, BT_CONNECT2); +- result = L2CAP_CR_PEND; ++ l2cap_state_change(chan, BT_CONFIG); ++ result = L2CAP_CR_SUCCESS; + status = L2CAP_CS_NO_INFO; + } + } else { +-- +2.43.0 + diff --git a/queue-6.6/bluetooth-l2cap-fix-rejecting-l2cap_conn_param_updat.patch b/queue-6.6/bluetooth-l2cap-fix-rejecting-l2cap_conn_param_updat.patch new file mode 100644 index 00000000000..626090b1226 --- /dev/null +++ b/queue-6.6/bluetooth-l2cap-fix-rejecting-l2cap_conn_param_updat.patch @@ -0,0 +1,108 @@ +From e0b5d63f1e1619a0ff1f30b6127a949b7a2e2cbe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 May 2024 16:03:07 -0400 +Subject: Bluetooth: L2CAP: Fix rejecting L2CAP_CONN_PARAM_UPDATE_REQ + +From: Luiz Augusto von Dentz + +[ Upstream commit 806a5198c05987b748b50f3d0c0cfb3d417381a4 ] + +This removes the bogus check for max > hcon->le_conn_max_interval since +the later is just the initial maximum conn interval not the maximum the +stack could support which is really 3200=4000ms. + +In order to pass GAP/CONN/CPUP/BV-05-C one shall probably enter values +of the following fields in IXIT that would cause hci_check_conn_params +to fail: + +TSPX_conn_update_int_min +TSPX_conn_update_int_max +TSPX_conn_update_peripheral_latency +TSPX_conn_update_supervision_timeout + +Link: https://github.com/bluez/bluez/issues/847 +Fixes: e4b019515f95 ("Bluetooth: Enforce validation on max value of connection interval") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + include/net/bluetooth/hci_core.h | 36 ++++++++++++++++++++++++++++---- + net/bluetooth/l2cap_core.c | 8 +------ + 2 files changed, 33 insertions(+), 11 deletions(-) + +diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h +index f786d2d62fa5e..f89d6d43ba8f1 100644 +--- a/include/net/bluetooth/hci_core.h ++++ b/include/net/bluetooth/hci_core.h +@@ -2071,18 +2071,46 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, + { + u16 max_latency; + +- if (min > max || min < 6 || max > 3200) ++ if (min > max) { ++ BT_WARN("min %d > max %d", min, max); + return -EINVAL; ++ } ++ ++ if (min < 6) { ++ BT_WARN("min %d < 6", min); ++ return -EINVAL; ++ } ++ ++ if (max > 3200) { ++ BT_WARN("max %d > 3200", max); ++ return -EINVAL; ++ } ++ ++ if (to_multiplier < 10) { ++ BT_WARN("to_multiplier %d < 10", to_multiplier); ++ return -EINVAL; ++ } + +- if (to_multiplier < 10 || to_multiplier > 3200) ++ if (to_multiplier > 3200) { ++ BT_WARN("to_multiplier %d > 3200", to_multiplier); + return -EINVAL; ++ } + +- if (max >= to_multiplier * 8) ++ if (max >= to_multiplier * 8) { ++ BT_WARN("max %d >= to_multiplier %d * 8", max, to_multiplier); + return -EINVAL; ++ } + + max_latency = (to_multiplier * 4 / max) - 1; +- if (latency > 499 || latency > max_latency) ++ if (latency > 499) { ++ BT_WARN("latency %d > 499", latency); + return -EINVAL; ++ } ++ ++ if (latency > max_latency) { ++ BT_WARN("latency %d > max_latency %d", latency, max_latency); ++ return -EINVAL; ++ } + + return 0; + } +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index 37210567fbfbe..d5fb78c604cf3 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -4645,13 +4645,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, + + memset(&rsp, 0, sizeof(rsp)); + +- if (max > hcon->le_conn_max_interval) { +- BT_DBG("requested connection interval exceeds current bounds."); +- err = -EINVAL; +- } else { +- err = hci_check_conn_params(min, max, latency, to_multiplier); +- } +- ++ err = hci_check_conn_params(min, max, latency, to_multiplier); + if (err) + rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); + else +-- +2.43.0 + diff --git a/queue-6.6/bnxt_en-adjust-logging-of-firmware-messages-in-case-.patch b/queue-6.6/bnxt_en-adjust-logging-of-firmware-messages-in-case-.patch new file mode 100644 index 00000000000..ccc7073e8b6 --- /dev/null +++ b/queue-6.6/bnxt_en-adjust-logging-of-firmware-messages-in-case-.patch @@ -0,0 +1,47 @@ +From 881ca4ccc8970220dd467b6500f267e88f10584c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jun 2024 11:25:46 +0300 +Subject: bnxt_en: Adjust logging of firmware messages in case of released + token in __hwrm_send() + +From: Aleksandr Mishin + +[ Upstream commit a9b9741854a9fe9df948af49ca5514e0ed0429df ] + +In case of token is released due to token->state == BNXT_HWRM_DEFERRED, +released token (set to NULL) is used in log messages. This issue is +expected to be prevented by HWRM_ERR_CODE_PF_UNAVAILABLE error code. But +this error code is returned by recent firmware. So some firmware may not +return it. This may lead to NULL pointer dereference. +Adjust this issue by adding token pointer check. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: 8fa4219dba8e ("bnxt_en: add dynamic debug support for HWRM messages") +Suggested-by: Michael Chan +Signed-off-by: Aleksandr Mishin +Reviewed-by: Wojciech Drewek +Reviewed-by: Michael Chan +Link: https://lore.kernel.org/r/20240611082547.12178-1-amishin@t-argos.ru +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +index 132442f16fe67..7a4e08b5a8c1b 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +@@ -678,7 +678,7 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx) + req_type); + else if (rc && rc != HWRM_ERR_CODE_PF_UNAVAILABLE) + hwrm_err(bp, ctx, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n", +- req_type, token->seq_id, rc); ++ req_type, le16_to_cpu(ctx->req->seq_id), rc); + rc = __hwrm_to_stderr(rc); + exit: + if (token) +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-add-output-string-to-cachefiles_obj_-get-.patch b/queue-6.6/cachefiles-add-output-string-to-cachefiles_obj_-get-.patch new file mode 100644 index 00000000000..7ca78d284b4 --- /dev/null +++ b/queue-6.6/cachefiles-add-output-string-to-cachefiles_obj_-get-.patch @@ -0,0 +1,40 @@ +From 6401cdb9844aec4d4e7673857e6e08352b73221a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 19:42:57 +0800 +Subject: cachefiles: add output string to cachefiles_obj_[get|put]_ondemand_fd + +From: Baokun Li + +[ Upstream commit cc5ac966f26193ab185cc43d64d9f1ae998ccb6e ] + +This lets us see the correct trace output. + +Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240522114308.2402121-2-libaokun@huaweicloud.com +Acked-by: Jeff Layton +Reviewed-by: Jingbo Xu +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + include/trace/events/cachefiles.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h +index cf4b98b9a9edc..e3213af847cdf 100644 +--- a/include/trace/events/cachefiles.h ++++ b/include/trace/events/cachefiles.h +@@ -127,7 +127,9 @@ enum cachefiles_error_trace { + EM(cachefiles_obj_see_lookup_cookie, "SEE lookup_cookie") \ + EM(cachefiles_obj_see_lookup_failed, "SEE lookup_failed") \ + EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ +- E_(cachefiles_obj_see_withdrawal, "SEE withdrawal") ++ EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ ++ EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ ++ E_(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") + + #define cachefiles_coherency_traces \ + EM(cachefiles_coherency_check_aux, "BAD aux ") \ +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-add-restore-command-to-recover-inflight-o.patch b/queue-6.6/cachefiles-add-restore-command-to-recover-inflight-o.patch new file mode 100644 index 00000000000..fced623f0f5 --- /dev/null +++ b/queue-6.6/cachefiles-add-restore-command-to-recover-inflight-o.patch @@ -0,0 +1,94 @@ +From dd1c13ebd6c3601dac3cb0097a9ce66cf71c3936 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 Nov 2023 12:14:22 +0800 +Subject: cachefiles: add restore command to recover inflight ondemand read + requests + +From: Jia Zhu + +[ Upstream commit e73fa11a356ca0905c3cc648eaacc6f0f2d2c8b3 ] + +Previously, in ondemand read scenario, if the anonymous fd was closed by +user daemon, inflight and subsequent read requests would return EIO. +As long as the device connection is not released, user daemon can hold +and restore inflight requests by setting the request flag to +CACHEFILES_REQ_NEW. + +Suggested-by: Gao Xiang +Signed-off-by: Jia Zhu +Signed-off-by: Xin Yin +Link: https://lore.kernel.org/r/20231120041422.75170-6-zhujia.zj@bytedance.com +Reviewed-by: Jingbo Xu +Reviewed-by: David Howells +Signed-off-by: Christian Brauner +Stable-dep-of: 4b4391e77a6b ("cachefiles: defer exposing anon_fd until after copy_to_user() succeeds") +Signed-off-by: Sasha Levin +--- + fs/cachefiles/daemon.c | 1 + + fs/cachefiles/internal.h | 3 +++ + fs/cachefiles/ondemand.c | 23 +++++++++++++++++++++++ + 3 files changed, 27 insertions(+) + +diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c +index 7d1f456e376dd..26b487e112596 100644 +--- a/fs/cachefiles/daemon.c ++++ b/fs/cachefiles/daemon.c +@@ -77,6 +77,7 @@ static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { + { "tag", cachefiles_daemon_tag }, + #ifdef CONFIG_CACHEFILES_ONDEMAND + { "copen", cachefiles_ondemand_copen }, ++ { "restore", cachefiles_ondemand_restore }, + #endif + { "", NULL } + }; +diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h +index 33fe418aca770..361356d0e866a 100644 +--- a/fs/cachefiles/internal.h ++++ b/fs/cachefiles/internal.h +@@ -304,6 +304,9 @@ extern ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache, + char *args); + ++extern int cachefiles_ondemand_restore(struct cachefiles_cache *cache, ++ char *args); ++ + extern int cachefiles_ondemand_init_object(struct cachefiles_object *object); + extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); + +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index 8118649d30727..6d8f7f01a73ac 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -214,6 +214,29 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) + return ret; + } + ++int cachefiles_ondemand_restore(struct cachefiles_cache *cache, char *args) ++{ ++ struct cachefiles_req *req; ++ ++ XA_STATE(xas, &cache->reqs, 0); ++ ++ if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) ++ return -EOPNOTSUPP; ++ ++ /* ++ * Reset the requests to CACHEFILES_REQ_NEW state, so that the ++ * requests have been processed halfway before the crash of the ++ * user daemon could be reprocessed after the recovery. ++ */ ++ xas_lock(&xas); ++ xas_for_each(&xas, req, ULONG_MAX) ++ xas_set_mark(&xas, CACHEFILES_REQ_NEW); ++ xas_unlock(&xas); ++ ++ wake_up_all(&cache->daemon_pollwq); ++ return 0; ++} ++ + static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) + { + struct cachefiles_object *object; +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-add-spin_lock-for-cachefiles_ondemand_inf.patch b/queue-6.6/cachefiles-add-spin_lock-for-cachefiles_ondemand_inf.patch new file mode 100644 index 00000000000..13e4dd3105b --- /dev/null +++ b/queue-6.6/cachefiles-add-spin_lock-for-cachefiles_ondemand_inf.patch @@ -0,0 +1,138 @@ +From bfa214e4535e89751d3b7e6c94c8affb5061439f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 19:43:03 +0800 +Subject: cachefiles: add spin_lock for cachefiles_ondemand_info + +From: Baokun Li + +[ Upstream commit 0a790040838c736495d5afd6b2d636f159f817f1 ] + +The following concurrency may cause a read request to fail to be completed +and result in a hung: + + t1 | t2 +--------------------------------------------------------- + cachefiles_ondemand_copen + req = xa_erase(&cache->reqs, id) +// Anon fd is maliciously closed. +cachefiles_ondemand_fd_release + xa_lock(&cache->reqs) + cachefiles_ondemand_set_object_close(object) + xa_unlock(&cache->reqs) + cachefiles_ondemand_set_object_open + // No one will ever close it again. +cachefiles_ondemand_daemon_read + cachefiles_ondemand_select_req + // Get a read req but its fd is already closed. + // The daemon can't issue a cread ioctl with an closed fd, then hung. + +So add spin_lock for cachefiles_ondemand_info to protect ondemand_id and +state, thus we can avoid the above problem in cachefiles_ondemand_copen() +by using ondemand_id to determine if fd has been closed. + +Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240522114308.2402121-8-libaokun@huaweicloud.com +Acked-by: Jeff Layton +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/cachefiles/internal.h | 1 + + fs/cachefiles/ondemand.c | 35 ++++++++++++++++++++++++++++++++++- + 2 files changed, 35 insertions(+), 1 deletion(-) + +diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h +index b9a90f1a0c015..33fe418aca770 100644 +--- a/fs/cachefiles/internal.h ++++ b/fs/cachefiles/internal.h +@@ -55,6 +55,7 @@ struct cachefiles_ondemand_info { + int ondemand_id; + enum cachefiles_object_state state; + struct cachefiles_object *object; ++ spinlock_t lock; + }; + + /* +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index 8e130de952f7d..8118649d30727 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -10,13 +10,16 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, + struct cachefiles_object *object = file->private_data; + struct cachefiles_cache *cache = object->volume->cache; + struct cachefiles_ondemand_info *info = object->ondemand; +- int object_id = info->ondemand_id; ++ int object_id; + struct cachefiles_req *req; + XA_STATE(xas, &cache->reqs, 0); + + xa_lock(&cache->reqs); ++ spin_lock(&info->lock); ++ object_id = info->ondemand_id; + info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; + cachefiles_ondemand_set_object_close(object); ++ spin_unlock(&info->lock); + + /* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */ + xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { +@@ -116,6 +119,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) + { + struct cachefiles_req *req; + struct fscache_cookie *cookie; ++ struct cachefiles_ondemand_info *info; + char *pid, *psize; + unsigned long id; + long size; +@@ -166,6 +170,33 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) + goto out; + } + ++ info = req->object->ondemand; ++ spin_lock(&info->lock); ++ /* ++ * The anonymous fd was closed before copen ? Fail the request. ++ * ++ * t1 | t2 ++ * --------------------------------------------------------- ++ * cachefiles_ondemand_copen ++ * req = xa_erase(&cache->reqs, id) ++ * // Anon fd is maliciously closed. ++ * cachefiles_ondemand_fd_release ++ * xa_lock(&cache->reqs) ++ * cachefiles_ondemand_set_object_close(object) ++ * xa_unlock(&cache->reqs) ++ * cachefiles_ondemand_set_object_open ++ * // No one will ever close it again. ++ * cachefiles_ondemand_daemon_read ++ * cachefiles_ondemand_select_req ++ * ++ * Get a read req but its fd is already closed. The daemon can't ++ * issue a cread ioctl with an closed fd, then hung. ++ */ ++ if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) { ++ spin_unlock(&info->lock); ++ req->error = -EBADFD; ++ goto out; ++ } + cookie = req->object->cookie; + cookie->object_size = size; + if (size) +@@ -175,6 +206,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) + trace_cachefiles_ondemand_copen(req->object, id, size); + + cachefiles_ondemand_set_object_open(req->object); ++ spin_unlock(&info->lock); + wake_up_all(&cache->daemon_pollwq); + + out: +@@ -552,6 +584,7 @@ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, + return -ENOMEM; + + object->ondemand->object = object; ++ spin_lock_init(&object->ondemand->lock); + INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker); + return 0; + } +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-defer-exposing-anon_fd-until-after-copy_t.patch b/queue-6.6/cachefiles-defer-exposing-anon_fd-until-after-copy_t.patch new file mode 100644 index 00000000000..9218ad77682 --- /dev/null +++ b/queue-6.6/cachefiles-defer-exposing-anon_fd-until-after-copy_t.patch @@ -0,0 +1,164 @@ +From a7d757d9908626923c4cafac8a2b8c7fd0424d0c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 19:43:05 +0800 +Subject: cachefiles: defer exposing anon_fd until after copy_to_user() + succeeds + +From: Baokun Li + +[ Upstream commit 4b4391e77a6bf24cba2ef1590e113d9b73b11039 ] + +After installing the anonymous fd, we can now see it in userland and close +it. However, at this point we may not have gotten the reference count of +the cache, but we will put it during colse fd, so this may cause a cache +UAF. + +So grab the cache reference count before fd_install(). In addition, by +kernel convention, fd is taken over by the user land after fd_install(), +and the kernel should not call close_fd() after that, i.e., it should call +fd_install() after everything is ready, thus fd_install() is called after +copy_to_user() succeeds. + +Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") +Suggested-by: Hou Tao +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240522114308.2402121-10-libaokun@huaweicloud.com +Acked-by: Jeff Layton +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/cachefiles/ondemand.c | 53 +++++++++++++++++++++++++--------------- + 1 file changed, 33 insertions(+), 20 deletions(-) + +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index 773c3b407a33b..a8cfa5047aaf8 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -4,6 +4,11 @@ + #include + #include "internal.h" + ++struct ondemand_anon_file { ++ struct file *file; ++ int fd; ++}; ++ + static inline void cachefiles_req_put(struct cachefiles_req *req) + { + if (refcount_dec_and_test(&req->ref)) +@@ -250,14 +255,14 @@ int cachefiles_ondemand_restore(struct cachefiles_cache *cache, char *args) + return 0; + } + +-static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) ++static int cachefiles_ondemand_get_fd(struct cachefiles_req *req, ++ struct ondemand_anon_file *anon_file) + { + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct cachefiles_open *load; +- struct file *file; + u32 object_id; +- int ret, fd; ++ int ret; + + object = cachefiles_grab_object(req->object, + cachefiles_obj_get_ondemand_fd); +@@ -269,16 +274,16 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) + if (ret < 0) + goto err; + +- fd = get_unused_fd_flags(O_WRONLY); +- if (fd < 0) { +- ret = fd; ++ anon_file->fd = get_unused_fd_flags(O_WRONLY); ++ if (anon_file->fd < 0) { ++ ret = anon_file->fd; + goto err_free_id; + } + +- file = anon_inode_getfile("[cachefiles]", &cachefiles_ondemand_fd_fops, +- object, O_WRONLY); +- if (IS_ERR(file)) { +- ret = PTR_ERR(file); ++ anon_file->file = anon_inode_getfile("[cachefiles]", ++ &cachefiles_ondemand_fd_fops, object, O_WRONLY); ++ if (IS_ERR(anon_file->file)) { ++ ret = PTR_ERR(anon_file->file); + goto err_put_fd; + } + +@@ -286,16 +291,15 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) + if (object->ondemand->ondemand_id > 0) { + spin_unlock(&object->ondemand->lock); + /* Pair with check in cachefiles_ondemand_fd_release(). */ +- file->private_data = NULL; ++ anon_file->file->private_data = NULL; + ret = -EEXIST; + goto err_put_file; + } + +- file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; +- fd_install(fd, file); ++ anon_file->file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; + + load = (void *)req->msg.data; +- load->fd = fd; ++ load->fd = anon_file->fd; + object->ondemand->ondemand_id = object_id; + spin_unlock(&object->ondemand->lock); + +@@ -304,9 +308,11 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) + return 0; + + err_put_file: +- fput(file); ++ fput(anon_file->file); ++ anon_file->file = NULL; + err_put_fd: +- put_unused_fd(fd); ++ put_unused_fd(anon_file->fd); ++ anon_file->fd = ret; + err_free_id: + xa_erase(&cache->ondemand_ids, object_id); + err: +@@ -363,6 +369,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + struct cachefiles_msg *msg; + size_t n; + int ret = 0; ++ struct ondemand_anon_file anon_file; + XA_STATE(xas, &cache->reqs, cache->req_id_next); + + xa_lock(&cache->reqs); +@@ -396,7 +403,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + xa_unlock(&cache->reqs); + + if (msg->opcode == CACHEFILES_OP_OPEN) { +- ret = cachefiles_ondemand_get_fd(req); ++ ret = cachefiles_ondemand_get_fd(req, &anon_file); + if (ret) + goto out; + } +@@ -404,10 +411,16 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + msg->msg_id = xas.xa_index; + msg->object_id = req->object->ondemand->ondemand_id; + +- if (copy_to_user(_buffer, msg, n) != 0) { ++ if (copy_to_user(_buffer, msg, n) != 0) + ret = -EFAULT; +- if (msg->opcode == CACHEFILES_OP_OPEN) +- close_fd(((struct cachefiles_open *)msg->data)->fd); ++ ++ if (msg->opcode == CACHEFILES_OP_OPEN) { ++ if (ret < 0) { ++ fput(anon_file.file); ++ put_unused_fd(anon_file.fd); ++ goto out; ++ } ++ fd_install(anon_file.fd, anon_file.file); + } + out: + cachefiles_put_object(req->object, cachefiles_obj_put_read_req); +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-extract-ondemand-info-field-from-cachefil.patch b/queue-6.6/cachefiles-extract-ondemand-info-field-from-cachefil.patch new file mode 100644 index 00000000000..263dda76880 --- /dev/null +++ b/queue-6.6/cachefiles-extract-ondemand-info-field-from-cachefil.patch @@ -0,0 +1,206 @@ +From 2eb94239bd3de7da41b73919ecdc70bd158be940 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 Nov 2023 12:14:19 +0800 +Subject: cachefiles: extract ondemand info field from cachefiles_object + +From: Jia Zhu + +[ Upstream commit 3c5ecfe16e7699011c12c2d44e55437415331fa3 ] + +We'll introduce a @work_struct field for @object in subsequent patches, +it will enlarge the size of @object. +As the result of that, this commit extracts ondemand info field from +@object. + +Signed-off-by: Jia Zhu +Link: https://lore.kernel.org/r/20231120041422.75170-3-zhujia.zj@bytedance.com +Reviewed-by: Jingbo Xu +Reviewed-by: David Howells +Signed-off-by: Christian Brauner +Stable-dep-of: 0a790040838c ("cachefiles: add spin_lock for cachefiles_ondemand_info") +Signed-off-by: Sasha Levin +--- + fs/cachefiles/interface.c | 7 ++++++- + fs/cachefiles/internal.h | 26 ++++++++++++++++++++++---- + fs/cachefiles/ondemand.c | 34 ++++++++++++++++++++++++++++------ + 3 files changed, 56 insertions(+), 11 deletions(-) + +diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c +index 40052bdb33655..35ba2117a6f65 100644 +--- a/fs/cachefiles/interface.c ++++ b/fs/cachefiles/interface.c +@@ -31,6 +31,11 @@ struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie) + if (!object) + return NULL; + ++ if (cachefiles_ondemand_init_obj_info(object, volume)) { ++ kmem_cache_free(cachefiles_object_jar, object); ++ return NULL; ++ } ++ + refcount_set(&object->ref, 1); + + spin_lock_init(&object->lock); +@@ -88,7 +93,7 @@ void cachefiles_put_object(struct cachefiles_object *object, + ASSERTCMP(object->file, ==, NULL); + + kfree(object->d_name); +- ++ cachefiles_ondemand_deinit_obj_info(object); + cache = object->volume->cache->cache; + fscache_put_cookie(object->cookie, fscache_cookie_put_object); + object->cookie = NULL; +diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h +index 00beedeaec183..b0fe76964bc0d 100644 +--- a/fs/cachefiles/internal.h ++++ b/fs/cachefiles/internal.h +@@ -49,6 +49,12 @@ enum cachefiles_object_state { + CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ + }; + ++struct cachefiles_ondemand_info { ++ int ondemand_id; ++ enum cachefiles_object_state state; ++ struct cachefiles_object *object; ++}; ++ + /* + * Backing file state. + */ +@@ -66,8 +72,7 @@ struct cachefiles_object { + unsigned long flags; + #define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */ + #ifdef CONFIG_CACHEFILES_ONDEMAND +- int ondemand_id; +- enum cachefiles_object_state state; ++ struct cachefiles_ondemand_info *ondemand; + #endif + }; + +@@ -302,17 +307,21 @@ extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); + extern int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len); + ++extern int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj, ++ struct cachefiles_volume *volume); ++extern void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj); ++ + #define CACHEFILES_OBJECT_STATE_FUNCS(_state, _STATE) \ + static inline bool \ + cachefiles_ondemand_object_is_##_state(const struct cachefiles_object *object) \ + { \ +- return object->state == CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ ++ return object->ondemand->state == CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ + } \ + \ + static inline void \ + cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ + { \ +- object->state = CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ ++ object->ondemand->state = CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ + } + + CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); +@@ -338,6 +347,15 @@ static inline int cachefiles_ondemand_read(struct cachefiles_object *object, + { + return -EOPNOTSUPP; + } ++ ++static inline int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj, ++ struct cachefiles_volume *volume) ++{ ++ return 0; ++} ++static inline void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj) ++{ ++} + #endif + + /* +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index 90456b8a4b3e0..deb7e3007aa1d 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -9,12 +9,13 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, + { + struct cachefiles_object *object = file->private_data; + struct cachefiles_cache *cache = object->volume->cache; +- int object_id = object->ondemand_id; ++ struct cachefiles_ondemand_info *info = object->ondemand; ++ int object_id = info->ondemand_id; + struct cachefiles_req *req; + XA_STATE(xas, &cache->reqs, 0); + + xa_lock(&cache->reqs); +- object->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; ++ info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; + cachefiles_ondemand_set_object_close(object); + + /* +@@ -222,7 +223,7 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) + load = (void *)req->msg.data; + load->fd = fd; + req->msg.object_id = object_id; +- object->ondemand_id = object_id; ++ object->ondemand->ondemand_id = object_id; + + cachefiles_get_unbind_pincount(cache); + trace_cachefiles_ondemand_open(object, &req->msg, load); +@@ -368,7 +369,7 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + + if (opcode != CACHEFILES_OP_OPEN && + !cachefiles_ondemand_object_is_open(object)) { +- WARN_ON_ONCE(object->ondemand_id == 0); ++ WARN_ON_ONCE(object->ondemand->ondemand_id == 0); + xas_unlock(&xas); + ret = -EIO; + goto out; +@@ -438,7 +439,7 @@ static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, + if (!cachefiles_ondemand_object_is_open(object)) + return -ENOENT; + +- req->msg.object_id = object->ondemand_id; ++ req->msg.object_id = object->ondemand->ondemand_id; + trace_cachefiles_ondemand_close(object, &req->msg); + return 0; + } +@@ -454,7 +455,7 @@ static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req, + struct cachefiles_object *object = req->object; + struct cachefiles_read *load = (void *)req->msg.data; + struct cachefiles_read_ctx *read_ctx = private; +- int object_id = object->ondemand_id; ++ int object_id = object->ondemand->ondemand_id; + + /* Stop enqueuing requests when daemon has closed anon_fd. */ + if (!cachefiles_ondemand_object_is_open(object)) { +@@ -500,6 +501,27 @@ void cachefiles_ondemand_clean_object(struct cachefiles_object *object) + cachefiles_ondemand_init_close_req, NULL); + } + ++int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, ++ struct cachefiles_volume *volume) ++{ ++ if (!cachefiles_in_ondemand_mode(volume->cache)) ++ return 0; ++ ++ object->ondemand = kzalloc(sizeof(struct cachefiles_ondemand_info), ++ GFP_KERNEL); ++ if (!object->ondemand) ++ return -ENOMEM; ++ ++ object->ondemand->object = object; ++ return 0; ++} ++ ++void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *object) ++{ ++ kfree(object->ondemand); ++ object->ondemand = NULL; ++} ++ + int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) + { +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-fix-slab-use-after-free-in-cachefiles_ond.patch b/queue-6.6/cachefiles-fix-slab-use-after-free-in-cachefiles_ond.patch new file mode 100644 index 00000000000..ebfc12ae9e6 --- /dev/null +++ b/queue-6.6/cachefiles-fix-slab-use-after-free-in-cachefiles_ond.patch @@ -0,0 +1,189 @@ +From 38c59ec0f413d10696667fbd5bed9a3a6f7493bd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 19:42:59 +0800 +Subject: cachefiles: fix slab-use-after-free in cachefiles_ondemand_get_fd() + +From: Baokun Li + +[ Upstream commit de3e26f9e5b76fc628077578c001c4a51bf54d06 ] + +We got the following issue in a fuzz test of randomly issuing the restore +command: + +================================================================== +BUG: KASAN: slab-use-after-free in cachefiles_ondemand_daemon_read+0x609/0xab0 +Write of size 4 at addr ffff888109164a80 by task ondemand-04-dae/4962 + +CPU: 11 PID: 4962 Comm: ondemand-04-dae Not tainted 6.8.0-rc7-dirty #542 +Call Trace: + kasan_report+0x94/0xc0 + cachefiles_ondemand_daemon_read+0x609/0xab0 + vfs_read+0x169/0xb50 + ksys_read+0xf5/0x1e0 + +Allocated by task 626: + __kmalloc+0x1df/0x4b0 + cachefiles_ondemand_send_req+0x24d/0x690 + cachefiles_create_tmpfile+0x249/0xb30 + cachefiles_create_file+0x6f/0x140 + cachefiles_look_up_object+0x29c/0xa60 + cachefiles_lookup_cookie+0x37d/0xca0 + fscache_cookie_state_machine+0x43c/0x1230 + [...] + +Freed by task 626: + kfree+0xf1/0x2c0 + cachefiles_ondemand_send_req+0x568/0x690 + cachefiles_create_tmpfile+0x249/0xb30 + cachefiles_create_file+0x6f/0x140 + cachefiles_look_up_object+0x29c/0xa60 + cachefiles_lookup_cookie+0x37d/0xca0 + fscache_cookie_state_machine+0x43c/0x1230 + [...] +================================================================== + +Following is the process that triggers the issue: + + mount | daemon_thread1 | daemon_thread2 +------------------------------------------------------------ + cachefiles_ondemand_init_object + cachefiles_ondemand_send_req + REQ_A = kzalloc(sizeof(*req) + data_len) + wait_for_completion(&REQ_A->done) + + cachefiles_daemon_read + cachefiles_ondemand_daemon_read + REQ_A = cachefiles_ondemand_select_req + cachefiles_ondemand_get_fd + copy_to_user(_buffer, msg, n) + process_open_req(REQ_A) + ------ restore ------ + cachefiles_ondemand_restore + xas_for_each(&xas, req, ULONG_MAX) + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + + cachefiles_daemon_read + cachefiles_ondemand_daemon_read + REQ_A = cachefiles_ondemand_select_req + + write(devfd, ("copen %u,%llu", msg->msg_id, size)); + cachefiles_ondemand_copen + xa_erase(&cache->reqs, id) + complete(&REQ_A->done) + kfree(REQ_A) + cachefiles_ondemand_get_fd(REQ_A) + fd = get_unused_fd_flags + file = anon_inode_getfile + fd_install(fd, file) + load = (void *)REQ_A->msg.data; + load->fd = fd; + // load UAF !!! + +This issue is caused by issuing a restore command when the daemon is still +alive, which results in a request being processed multiple times thus +triggering a UAF. So to avoid this problem, add an additional reference +count to cachefiles_req, which is held while waiting and reading, and then +released when the waiting and reading is over. + +Note that since there is only one reference count for waiting, we need to +avoid the same request being completed multiple times, so we can only +complete the request if it is successfully removed from the xarray. + +Fixes: e73fa11a356c ("cachefiles: add restore command to recover inflight ondemand read requests") +Suggested-by: Hou Tao +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240522114308.2402121-4-libaokun@huaweicloud.com +Acked-by: Jeff Layton +Reviewed-by: Jia Zhu +Reviewed-by: Jingbo Xu +Signed-off-by: Christian Brauner +Stable-dep-of: 4b4391e77a6b ("cachefiles: defer exposing anon_fd until after copy_to_user() succeeds") +Signed-off-by: Sasha Levin +--- + fs/cachefiles/internal.h | 1 + + fs/cachefiles/ondemand.c | 23 +++++++++++++++++++---- + 2 files changed, 20 insertions(+), 4 deletions(-) + +diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h +index 361356d0e866a..28799c8e2c6f6 100644 +--- a/fs/cachefiles/internal.h ++++ b/fs/cachefiles/internal.h +@@ -139,6 +139,7 @@ static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) + struct cachefiles_req { + struct cachefiles_object *object; + struct completion done; ++ refcount_t ref; + int error; + struct cachefiles_msg msg; + }; +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index 6d8f7f01a73ac..f8d0a01795702 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -4,6 +4,12 @@ + #include + #include "internal.h" + ++static inline void cachefiles_req_put(struct cachefiles_req *req) ++{ ++ if (refcount_dec_and_test(&req->ref)) ++ kfree(req); ++} ++ + static int cachefiles_ondemand_fd_release(struct inode *inode, + struct file *file) + { +@@ -362,6 +368,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + + xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + cache->req_id_next = xas.xa_index + 1; ++ refcount_inc(&req->ref); + xa_unlock(&cache->reqs); + + id = xas.xa_index; +@@ -388,15 +395,22 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + complete(&req->done); + } + ++ cachefiles_req_put(req); + return n; + + err_put_fd: + if (msg->opcode == CACHEFILES_OP_OPEN) + close_fd(((struct cachefiles_open *)msg->data)->fd); + error: +- xa_erase(&cache->reqs, id); +- req->error = ret; +- complete(&req->done); ++ xas_reset(&xas); ++ xas_lock(&xas); ++ if (xas_load(&xas) == req) { ++ req->error = ret; ++ complete(&req->done); ++ xas_store(&xas, NULL); ++ } ++ xas_unlock(&xas); ++ cachefiles_req_put(req); + return ret; + } + +@@ -427,6 +441,7 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + goto out; + } + ++ refcount_set(&req->ref, 1); + req->object = object; + init_completion(&req->done); + req->msg.opcode = opcode; +@@ -488,7 +503,7 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + wake_up_all(&cache->daemon_pollwq); + wait_for_completion(&req->done); + ret = req->error; +- kfree(req); ++ cachefiles_req_put(req); + return ret; + out: + /* Reset the object to close state in error handling path. +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-fix-slab-use-after-free-in-cachefiles_ond.patch-25104 b/queue-6.6/cachefiles-fix-slab-use-after-free-in-cachefiles_ond.patch-25104 new file mode 100644 index 00000000000..b547541573f --- /dev/null +++ b/queue-6.6/cachefiles-fix-slab-use-after-free-in-cachefiles_ond.patch-25104 @@ -0,0 +1,143 @@ +From 5f938b16c5aadb76f041facf14eb623ac63031d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 19:43:00 +0800 +Subject: cachefiles: fix slab-use-after-free in + cachefiles_ondemand_daemon_read() + +From: Baokun Li + +[ Upstream commit da4a827416066191aafeeccee50a8836a826ba10 ] + +We got the following issue in a fuzz test of randomly issuing the restore +command: + +================================================================== +BUG: KASAN: slab-use-after-free in cachefiles_ondemand_daemon_read+0xb41/0xb60 +Read of size 8 at addr ffff888122e84088 by task ondemand-04-dae/963 + +CPU: 13 PID: 963 Comm: ondemand-04-dae Not tainted 6.8.0-dirty #564 +Call Trace: + kasan_report+0x93/0xc0 + cachefiles_ondemand_daemon_read+0xb41/0xb60 + vfs_read+0x169/0xb50 + ksys_read+0xf5/0x1e0 + +Allocated by task 116: + kmem_cache_alloc+0x140/0x3a0 + cachefiles_lookup_cookie+0x140/0xcd0 + fscache_cookie_state_machine+0x43c/0x1230 + [...] + +Freed by task 792: + kmem_cache_free+0xfe/0x390 + cachefiles_put_object+0x241/0x480 + fscache_cookie_state_machine+0x5c8/0x1230 + [...] +================================================================== + +Following is the process that triggers the issue: + + mount | daemon_thread1 | daemon_thread2 +------------------------------------------------------------ +cachefiles_withdraw_cookie + cachefiles_ondemand_clean_object(object) + cachefiles_ondemand_send_req + REQ_A = kzalloc(sizeof(*req) + data_len) + wait_for_completion(&REQ_A->done) + + cachefiles_daemon_read + cachefiles_ondemand_daemon_read + REQ_A = cachefiles_ondemand_select_req + msg->object_id = req->object->ondemand->ondemand_id + ------ restore ------ + cachefiles_ondemand_restore + xas_for_each(&xas, req, ULONG_MAX) + xas_set_mark(&xas, CACHEFILES_REQ_NEW) + + cachefiles_daemon_read + cachefiles_ondemand_daemon_read + REQ_A = cachefiles_ondemand_select_req + copy_to_user(_buffer, msg, n) + xa_erase(&cache->reqs, id) + complete(&REQ_A->done) + ------ close(fd) ------ + cachefiles_ondemand_fd_release + cachefiles_put_object + cachefiles_put_object + kmem_cache_free(cachefiles_object_jar, object) + REQ_A->object->ondemand->ondemand_id + // object UAF !!! + +When we see the request within xa_lock, req->object must not have been +freed yet, so grab the reference count of object before xa_unlock to +avoid the above issue. + +Fixes: 0a7e54c1959c ("cachefiles: resend an open request if the read request's object is closed") +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240522114308.2402121-5-libaokun@huaweicloud.com +Acked-by: Jeff Layton +Reviewed-by: Jia Zhu +Reviewed-by: Jingbo Xu +Signed-off-by: Christian Brauner +Stable-dep-of: 4b4391e77a6b ("cachefiles: defer exposing anon_fd until after copy_to_user() succeeds") +Signed-off-by: Sasha Levin +--- + fs/cachefiles/ondemand.c | 3 +++ + include/trace/events/cachefiles.h | 6 +++++- + 2 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index f8d0a01795702..fd73811c7ce4f 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -369,6 +369,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + cache->req_id_next = xas.xa_index + 1; + refcount_inc(&req->ref); ++ cachefiles_grab_object(req->object, cachefiles_obj_get_read_req); + xa_unlock(&cache->reqs); + + id = xas.xa_index; +@@ -389,6 +390,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + goto err_put_fd; + } + ++ cachefiles_put_object(req->object, cachefiles_obj_put_read_req); + /* CLOSE request has no reply */ + if (msg->opcode == CACHEFILES_OP_CLOSE) { + xa_erase(&cache->reqs, id); +@@ -402,6 +404,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + if (msg->opcode == CACHEFILES_OP_OPEN) + close_fd(((struct cachefiles_open *)msg->data)->fd); + error: ++ cachefiles_put_object(req->object, cachefiles_obj_put_read_req); + xas_reset(&xas); + xas_lock(&xas); + if (xas_load(&xas) == req) { +diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h +index e3213af847cdf..7d931db02b934 100644 +--- a/include/trace/events/cachefiles.h ++++ b/include/trace/events/cachefiles.h +@@ -33,6 +33,8 @@ enum cachefiles_obj_ref_trace { + cachefiles_obj_see_withdrawal, + cachefiles_obj_get_ondemand_fd, + cachefiles_obj_put_ondemand_fd, ++ cachefiles_obj_get_read_req, ++ cachefiles_obj_put_read_req, + }; + + enum fscache_why_object_killed { +@@ -129,7 +131,9 @@ enum cachefiles_error_trace { + EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ + EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ + EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ +- E_(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") ++ EM(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") \ ++ EM(cachefiles_obj_get_read_req, "GET read_req") \ ++ E_(cachefiles_obj_put_read_req, "PUT read_req") + + #define cachefiles_coherency_traces \ + EM(cachefiles_coherency_check_aux, "BAD aux ") \ +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-flush-all-requests-after-setting-cachefil.patch b/queue-6.6/cachefiles-flush-all-requests-after-setting-cachefil.patch new file mode 100644 index 00000000000..267484bf981 --- /dev/null +++ b/queue-6.6/cachefiles-flush-all-requests-after-setting-cachefil.patch @@ -0,0 +1,71 @@ +From dd4529ddadb57a70dc6a35b448978baceeb9acf4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 19:43:07 +0800 +Subject: cachefiles: flush all requests after setting CACHEFILES_DEAD + +From: Baokun Li + +[ Upstream commit 85e833cd7243bda7285492b0653c3abb1e2e757b ] + +In ondemand mode, when the daemon is processing an open request, if the +kernel flags the cache as CACHEFILES_DEAD, the cachefiles_daemon_write() +will always return -EIO, so the daemon can't pass the copen to the kernel. +Then the kernel process that is waiting for the copen triggers a hung_task. + +Since the DEAD state is irreversible, it can only be exited by closing +/dev/cachefiles. Therefore, after calling cachefiles_io_error() to mark +the cache as CACHEFILES_DEAD, if in ondemand mode, flush all requests to +avoid the above hungtask. We may still be able to read some of the cached +data before closing the fd of /dev/cachefiles. + +Note that this relies on the patch that adds reference counting to the req, +otherwise it may UAF. + +Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240522114308.2402121-12-libaokun@huaweicloud.com +Acked-by: Jeff Layton +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/cachefiles/daemon.c | 2 +- + fs/cachefiles/internal.h | 3 +++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c +index 26b487e112596..b9945e4f697be 100644 +--- a/fs/cachefiles/daemon.c ++++ b/fs/cachefiles/daemon.c +@@ -133,7 +133,7 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) + return 0; + } + +-static void cachefiles_flush_reqs(struct cachefiles_cache *cache) ++void cachefiles_flush_reqs(struct cachefiles_cache *cache) + { + struct xarray *xa = &cache->reqs; + struct cachefiles_req *req; +diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h +index 28799c8e2c6f6..3eea52462fc87 100644 +--- a/fs/cachefiles/internal.h ++++ b/fs/cachefiles/internal.h +@@ -188,6 +188,7 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, + * daemon.c + */ + extern const struct file_operations cachefiles_daemon_fops; ++extern void cachefiles_flush_reqs(struct cachefiles_cache *cache); + extern void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache); + extern void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache); + +@@ -414,6 +415,8 @@ do { \ + pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ + fscache_io_error((___cache)->cache); \ + set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ ++ if (cachefiles_in_ondemand_mode(___cache)) \ ++ cachefiles_flush_reqs(___cache); \ + } while (0) + + #define cachefiles_io_error_obj(object, FMT, ...) \ +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-introduce-object-ondemand-state.patch b/queue-6.6/cachefiles-introduce-object-ondemand-state.patch new file mode 100644 index 00000000000..2cad17f1332 --- /dev/null +++ b/queue-6.6/cachefiles-introduce-object-ondemand-state.patch @@ -0,0 +1,145 @@ +From 74df6cee3497c6c5341685599d9f5362878bc07c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 Nov 2023 12:14:18 +0800 +Subject: cachefiles: introduce object ondemand state + +From: Jia Zhu + +[ Upstream commit 357a18d033143617e9c7d420c8f0dd4cbab5f34d ] + +Previously, @ondemand_id field was used not only to identify ondemand +state of the object, but also to represent the index of the xarray. +This commit introduces @state field to decouple the role of @ondemand_id +and adds helpers to access it. + +Signed-off-by: Jia Zhu +Link: https://lore.kernel.org/r/20231120041422.75170-2-zhujia.zj@bytedance.com +Reviewed-by: Jingbo Xu +Reviewed-by: David Howells +Signed-off-by: Christian Brauner +Stable-dep-of: 0a790040838c ("cachefiles: add spin_lock for cachefiles_ondemand_info") +Signed-off-by: Sasha Levin +--- + fs/cachefiles/internal.h | 21 +++++++++++++++++++++ + fs/cachefiles/ondemand.c | 21 +++++++++------------ + 2 files changed, 30 insertions(+), 12 deletions(-) + +diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h +index 2ad58c4652084..00beedeaec183 100644 +--- a/fs/cachefiles/internal.h ++++ b/fs/cachefiles/internal.h +@@ -44,6 +44,11 @@ struct cachefiles_volume { + struct dentry *fanout[256]; /* Fanout subdirs */ + }; + ++enum cachefiles_object_state { ++ CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */ ++ CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ ++}; ++ + /* + * Backing file state. + */ +@@ -62,6 +67,7 @@ struct cachefiles_object { + #define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */ + #ifdef CONFIG_CACHEFILES_ONDEMAND + int ondemand_id; ++ enum cachefiles_object_state state; + #endif + }; + +@@ -296,6 +302,21 @@ extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); + extern int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len); + ++#define CACHEFILES_OBJECT_STATE_FUNCS(_state, _STATE) \ ++static inline bool \ ++cachefiles_ondemand_object_is_##_state(const struct cachefiles_object *object) \ ++{ \ ++ return object->state == CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ ++} \ ++ \ ++static inline void \ ++cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ ++{ \ ++ object->state = CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ ++} ++ ++CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); ++CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE); + #else + static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index 0254ed39f68ce..90456b8a4b3e0 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -15,6 +15,7 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, + + xa_lock(&cache->reqs); + object->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; ++ cachefiles_ondemand_set_object_close(object); + + /* + * Flush all pending READ requests since their completion depends on +@@ -176,6 +177,8 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) + set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); + trace_cachefiles_ondemand_copen(req->object, id, size); + ++ cachefiles_ondemand_set_object_open(req->object); ++ + out: + complete(&req->done); + return ret; +@@ -363,7 +366,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + /* coupled with the barrier in cachefiles_flush_reqs() */ + smp_mb(); + +- if (opcode != CACHEFILES_OP_OPEN && object->ondemand_id <= 0) { ++ if (opcode != CACHEFILES_OP_OPEN && ++ !cachefiles_ondemand_object_is_open(object)) { + WARN_ON_ONCE(object->ondemand_id == 0); + xas_unlock(&xas); + ret = -EIO; +@@ -430,18 +434,11 @@ static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, + void *private) + { + struct cachefiles_object *object = req->object; +- int object_id = object->ondemand_id; + +- /* +- * It's possible that object id is still 0 if the cookie looking up +- * phase failed before OPEN request has ever been sent. Also avoid +- * sending CLOSE request for CACHEFILES_ONDEMAND_ID_CLOSED, which means +- * anon_fd has already been closed. +- */ +- if (object_id <= 0) ++ if (!cachefiles_ondemand_object_is_open(object)) + return -ENOENT; + +- req->msg.object_id = object_id; ++ req->msg.object_id = object->ondemand_id; + trace_cachefiles_ondemand_close(object, &req->msg); + return 0; + } +@@ -460,7 +457,7 @@ static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req, + int object_id = object->ondemand_id; + + /* Stop enqueuing requests when daemon has closed anon_fd. */ +- if (object_id <= 0) { ++ if (!cachefiles_ondemand_object_is_open(object)) { + WARN_ON_ONCE(object_id == 0); + pr_info_once("READ: anonymous fd closed prematurely.\n"); + return -EIO; +@@ -485,7 +482,7 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object) + * creating a new tmpfile as the cache file. Reuse the previously + * allocated object ID if any. + */ +- if (object->ondemand_id > 0) ++ if (cachefiles_ondemand_object_is_open(object)) + return 0; + + volume_key_size = volume->key[0] + 1; +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-never-get-a-new-anonymous-fd-if-ondemand_.patch b/queue-6.6/cachefiles-never-get-a-new-anonymous-fd-if-ondemand_.patch new file mode 100644 index 00000000000..f22e6e37fc7 --- /dev/null +++ b/queue-6.6/cachefiles-never-get-a-new-anonymous-fd-if-ondemand_.patch @@ -0,0 +1,166 @@ +From 6da64f507940363f282ec190a807edde15abb9ac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 19:43:04 +0800 +Subject: cachefiles: never get a new anonymous fd if ondemand_id is valid + +From: Baokun Li + +[ Upstream commit 4988e35e95fc938bdde0e15880fe72042fc86acf ] + +Now every time the daemon reads an open request, it gets a new anonymous fd +and ondemand_id. With the introduction of "restore", it is possible to read +the same open request more than once, and therefore an object can have more +than one anonymous fd. + +If the anonymous fd is not unique, the following concurrencies will result +in an fd leak: + + t1 | t2 | t3 +------------------------------------------------------------ + cachefiles_ondemand_init_object + cachefiles_ondemand_send_req + REQ_A = kzalloc(sizeof(*req) + data_len) + wait_for_completion(&REQ_A->done) + cachefiles_daemon_read + cachefiles_ondemand_daemon_read + REQ_A = cachefiles_ondemand_select_req + cachefiles_ondemand_get_fd + load->fd = fd0 + ondemand_id = object_id0 + ------ restore ------ + cachefiles_ondemand_restore + // restore REQ_A + cachefiles_daemon_read + cachefiles_ondemand_daemon_read + REQ_A = cachefiles_ondemand_select_req + cachefiles_ondemand_get_fd + load->fd = fd1 + ondemand_id = object_id1 + process_open_req(REQ_A) + write(devfd, ("copen %u,%llu", msg->msg_id, size)) + cachefiles_ondemand_copen + xa_erase(&cache->reqs, id) + complete(&REQ_A->done) + kfree(REQ_A) + process_open_req(REQ_A) + // copen fails due to no req + // daemon close(fd1) + cachefiles_ondemand_fd_release + // set object closed + -- umount -- + cachefiles_withdraw_cookie + cachefiles_ondemand_clean_object + cachefiles_ondemand_init_close_req + if (!cachefiles_ondemand_object_is_open(object)) + return -ENOENT; + // The fd0 is not closed until the daemon exits. + +However, the anonymous fd holds the reference count of the object and the +object holds the reference count of the cookie. So even though the cookie +has been relinquished, it will not be unhashed and freed until the daemon +exits. + +In fscache_hash_cookie(), when the same cookie is found in the hash list, +if the cookie is set with the FSCACHE_COOKIE_RELINQUISHED bit, then the new +cookie waits for the old cookie to be unhashed, while the old cookie is +waiting for the leaked fd to be closed, if the daemon does not exit in time +it will trigger a hung task. + +To avoid this, allocate a new anonymous fd only if no anonymous fd has +been allocated (ondemand_id == 0) or if the previously allocated anonymous +fd has been closed (ondemand_id == -1). Moreover, returns an error if +ondemand_id is valid, letting the daemon know that the current userland +restore logic is abnormal and needs to be checked. + +Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240522114308.2402121-9-libaokun@huaweicloud.com +Acked-by: Jeff Layton +Signed-off-by: Christian Brauner +Stable-dep-of: 4b4391e77a6b ("cachefiles: defer exposing anon_fd until after copy_to_user() succeeds") +Signed-off-by: Sasha Levin +--- + fs/cachefiles/ondemand.c | 34 ++++++++++++++++++++++++++++------ + 1 file changed, 28 insertions(+), 6 deletions(-) + +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index 99b4bffad4a4f..773c3b407a33b 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -14,11 +14,18 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, + struct file *file) + { + struct cachefiles_object *object = file->private_data; +- struct cachefiles_cache *cache = object->volume->cache; +- struct cachefiles_ondemand_info *info = object->ondemand; ++ struct cachefiles_cache *cache; ++ struct cachefiles_ondemand_info *info; + int object_id; + struct cachefiles_req *req; +- XA_STATE(xas, &cache->reqs, 0); ++ XA_STATE(xas, NULL, 0); ++ ++ if (!object) ++ return 0; ++ ++ info = object->ondemand; ++ cache = object->volume->cache; ++ xas.xa = &cache->reqs; + + xa_lock(&cache->reqs); + spin_lock(&info->lock); +@@ -275,22 +282,39 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) + goto err_put_fd; + } + ++ spin_lock(&object->ondemand->lock); ++ if (object->ondemand->ondemand_id > 0) { ++ spin_unlock(&object->ondemand->lock); ++ /* Pair with check in cachefiles_ondemand_fd_release(). */ ++ file->private_data = NULL; ++ ret = -EEXIST; ++ goto err_put_file; ++ } ++ + file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; + fd_install(fd, file); + + load = (void *)req->msg.data; + load->fd = fd; + object->ondemand->ondemand_id = object_id; ++ spin_unlock(&object->ondemand->lock); + + cachefiles_get_unbind_pincount(cache); + trace_cachefiles_ondemand_open(object, &req->msg, load); + return 0; + ++err_put_file: ++ fput(file); + err_put_fd: + put_unused_fd(fd); + err_free_id: + xa_erase(&cache->ondemand_ids, object_id); + err: ++ spin_lock(&object->ondemand->lock); ++ /* Avoid marking an opened object as closed. */ ++ if (object->ondemand->ondemand_id <= 0) ++ cachefiles_ondemand_set_object_close(object); ++ spin_unlock(&object->ondemand->lock); + cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); + return ret; + } +@@ -373,10 +397,8 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + + if (msg->opcode == CACHEFILES_OP_OPEN) { + ret = cachefiles_ondemand_get_fd(req); +- if (ret) { +- cachefiles_ondemand_set_object_close(req->object); ++ if (ret) + goto out; +- } + } + + msg->msg_id = xas.xa_index; +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-remove-err_put_fd-label-in-cachefiles_ond.patch b/queue-6.6/cachefiles-remove-err_put_fd-label-in-cachefiles_ond.patch new file mode 100644 index 00000000000..3f98d1caa31 --- /dev/null +++ b/queue-6.6/cachefiles-remove-err_put_fd-label-in-cachefiles_ond.patch @@ -0,0 +1,108 @@ +From ea2bcc15388200fc81350db96ffa976dbb3c3d13 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 19:43:01 +0800 +Subject: cachefiles: remove err_put_fd label in + cachefiles_ondemand_daemon_read() + +From: Baokun Li + +[ Upstream commit 3e6d704f02aa4c50c7bc5fe91a4401df249a137b ] + +The err_put_fd label is only used once, so remove it to make the code +more readable. In addition, the logic for deleting error request and +CLOSE request is merged to simplify the code. + +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240522114308.2402121-6-libaokun@huaweicloud.com +Acked-by: Jeff Layton +Reviewed-by: Jia Zhu +Reviewed-by: Gao Xiang +Reviewed-by: Jingbo Xu +Signed-off-by: Christian Brauner +Stable-dep-of: 4b4391e77a6b ("cachefiles: defer exposing anon_fd until after copy_to_user() succeeds") +Signed-off-by: Sasha Levin +--- + fs/cachefiles/ondemand.c | 45 ++++++++++++++-------------------------- + 1 file changed, 16 insertions(+), 29 deletions(-) + +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index fd73811c7ce4f..99b4bffad4a4f 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -337,7 +337,6 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + { + struct cachefiles_req *req; + struct cachefiles_msg *msg; +- unsigned long id = 0; + size_t n; + int ret = 0; + XA_STATE(xas, &cache->reqs, cache->req_id_next); +@@ -372,49 +371,37 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + cachefiles_grab_object(req->object, cachefiles_obj_get_read_req); + xa_unlock(&cache->reqs); + +- id = xas.xa_index; +- + if (msg->opcode == CACHEFILES_OP_OPEN) { + ret = cachefiles_ondemand_get_fd(req); + if (ret) { + cachefiles_ondemand_set_object_close(req->object); +- goto error; ++ goto out; + } + } + +- msg->msg_id = id; ++ msg->msg_id = xas.xa_index; + msg->object_id = req->object->ondemand->ondemand_id; + + if (copy_to_user(_buffer, msg, n) != 0) { + ret = -EFAULT; +- goto err_put_fd; +- } +- +- cachefiles_put_object(req->object, cachefiles_obj_put_read_req); +- /* CLOSE request has no reply */ +- if (msg->opcode == CACHEFILES_OP_CLOSE) { +- xa_erase(&cache->reqs, id); +- complete(&req->done); ++ if (msg->opcode == CACHEFILES_OP_OPEN) ++ close_fd(((struct cachefiles_open *)msg->data)->fd); + } +- +- cachefiles_req_put(req); +- return n; +- +-err_put_fd: +- if (msg->opcode == CACHEFILES_OP_OPEN) +- close_fd(((struct cachefiles_open *)msg->data)->fd); +-error: ++out: + cachefiles_put_object(req->object, cachefiles_obj_put_read_req); +- xas_reset(&xas); +- xas_lock(&xas); +- if (xas_load(&xas) == req) { +- req->error = ret; +- complete(&req->done); +- xas_store(&xas, NULL); ++ /* Remove error request and CLOSE request has no reply */ ++ if (ret || msg->opcode == CACHEFILES_OP_CLOSE) { ++ xas_reset(&xas); ++ xas_lock(&xas); ++ if (xas_load(&xas) == req) { ++ req->error = ret; ++ complete(&req->done); ++ xas_store(&xas, NULL); ++ } ++ xas_unlock(&xas); + } +- xas_unlock(&xas); + cachefiles_req_put(req); +- return ret; ++ return ret ? ret : n; + } + + typedef int (*init_req_fn)(struct cachefiles_req *req, void *private); +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-remove-requests-from-xarray-during-flushi.patch b/queue-6.6/cachefiles-remove-requests-from-xarray-during-flushi.patch new file mode 100644 index 00000000000..b97b7e2ba04 --- /dev/null +++ b/queue-6.6/cachefiles-remove-requests-from-xarray-during-flushi.patch @@ -0,0 +1,62 @@ +From 25bd7b232a20e38f5c9993b74c27193d7ec0cb07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 19:42:58 +0800 +Subject: cachefiles: remove requests from xarray during flushing requests + +From: Baokun Li + +[ Upstream commit 0fc75c5940fa634d84e64c93bfc388e1274ed013 ] + +Even with CACHEFILES_DEAD set, we can still read the requests, so in the +following concurrency the request may be used after it has been freed: + + mount | daemon_thread1 | daemon_thread2 +------------------------------------------------------------ + cachefiles_ondemand_init_object + cachefiles_ondemand_send_req + REQ_A = kzalloc(sizeof(*req) + data_len) + wait_for_completion(&REQ_A->done) + cachefiles_daemon_read + cachefiles_ondemand_daemon_read + // close dev fd + cachefiles_flush_reqs + complete(&REQ_A->done) + kfree(REQ_A) + xa_lock(&cache->reqs); + cachefiles_ondemand_select_req + req->msg.opcode != CACHEFILES_OP_READ + // req use-after-free !!! + xa_unlock(&cache->reqs); + xa_destroy(&cache->reqs) + +Hence remove requests from cache->reqs when flushing them to avoid +accessing freed requests. + +Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") +Signed-off-by: Baokun Li +Link: https://lore.kernel.org/r/20240522114308.2402121-3-libaokun@huaweicloud.com +Acked-by: Jeff Layton +Reviewed-by: Jia Zhu +Reviewed-by: Gao Xiang +Reviewed-by: Jingbo Xu +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/cachefiles/daemon.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c +index 5f4df9588620f..7d1f456e376dd 100644 +--- a/fs/cachefiles/daemon.c ++++ b/fs/cachefiles/daemon.c +@@ -158,6 +158,7 @@ static void cachefiles_flush_reqs(struct cachefiles_cache *cache) + xa_for_each(xa, index, req) { + req->error = -EIO; + complete(&req->done); ++ __xa_erase(xa, index); + } + xa_unlock(xa); + +-- +2.43.0 + diff --git a/queue-6.6/cachefiles-resend-an-open-request-if-the-read-reques.patch b/queue-6.6/cachefiles-resend-an-open-request-if-the-read-reques.patch new file mode 100644 index 00000000000..3bfe694b3f3 --- /dev/null +++ b/queue-6.6/cachefiles-resend-an-open-request-if-the-read-reques.patch @@ -0,0 +1,272 @@ +From 785ec51b41712f8752486a575d37062701bb03cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 Nov 2023 12:14:20 +0800 +Subject: cachefiles: resend an open request if the read request's object is + closed + +From: Jia Zhu + +[ Upstream commit 0a7e54c1959c0feb2de23397ec09c7692364313e ] + +When an anonymous fd is closed by user daemon, if there is a new read +request for this file comes up, the anonymous fd should be re-opened +to handle that read request rather than fail it directly. + +1. Introduce reopening state for objects that are closed but have + inflight/subsequent read requests. +2. No longer flush READ requests but only CLOSE requests when anonymous + fd is closed. +3. Enqueue the reopen work to workqueue, thus user daemon could get rid + of daemon_read context and handle that request smoothly. Otherwise, + the user daemon will send a reopen request and wait for itself to + process the request. + +Signed-off-by: Jia Zhu +Link: https://lore.kernel.org/r/20231120041422.75170-4-zhujia.zj@bytedance.com +Reviewed-by: Jingbo Xu +Reviewed-by: David Howells +Signed-off-by: Christian Brauner +Stable-dep-of: 0a790040838c ("cachefiles: add spin_lock for cachefiles_ondemand_info") +Signed-off-by: Sasha Levin +--- + fs/cachefiles/internal.h | 3 ++ + fs/cachefiles/ondemand.c | 98 ++++++++++++++++++++++++++++------------ + 2 files changed, 72 insertions(+), 29 deletions(-) + +diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h +index b0fe76964bc0d..b9a90f1a0c015 100644 +--- a/fs/cachefiles/internal.h ++++ b/fs/cachefiles/internal.h +@@ -47,9 +47,11 @@ struct cachefiles_volume { + enum cachefiles_object_state { + CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */ + CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ ++ CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */ + }; + + struct cachefiles_ondemand_info { ++ struct work_struct ondemand_work; + int ondemand_id; + enum cachefiles_object_state state; + struct cachefiles_object *object; +@@ -326,6 +328,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ + + CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); + CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE); ++CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING); + #else + static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) +diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c +index deb7e3007aa1d..8e130de952f7d 100644 +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -18,14 +18,10 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, + info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; + cachefiles_ondemand_set_object_close(object); + +- /* +- * Flush all pending READ requests since their completion depends on +- * anon_fd. +- */ +- xas_for_each(&xas, req, ULONG_MAX) { ++ /* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */ ++ xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { + if (req->msg.object_id == object_id && +- req->msg.opcode == CACHEFILES_OP_READ) { +- req->error = -EIO; ++ req->msg.opcode == CACHEFILES_OP_CLOSE) { + complete(&req->done); + xas_store(&xas, NULL); + } +@@ -179,6 +175,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) + trace_cachefiles_ondemand_copen(req->object, id, size); + + cachefiles_ondemand_set_object_open(req->object); ++ wake_up_all(&cache->daemon_pollwq); + + out: + complete(&req->done); +@@ -222,7 +219,6 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) + + load = (void *)req->msg.data; + load->fd = fd; +- req->msg.object_id = object_id; + object->ondemand->ondemand_id = object_id; + + cachefiles_get_unbind_pincount(cache); +@@ -238,6 +234,43 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) + return ret; + } + ++static void ondemand_object_worker(struct work_struct *work) ++{ ++ struct cachefiles_ondemand_info *info = ++ container_of(work, struct cachefiles_ondemand_info, ondemand_work); ++ ++ cachefiles_ondemand_init_object(info->object); ++} ++ ++/* ++ * If there are any inflight or subsequent READ requests on the ++ * closed object, reopen it. ++ * Skip read requests whose related object is reopening. ++ */ ++static struct cachefiles_req *cachefiles_ondemand_select_req(struct xa_state *xas, ++ unsigned long xa_max) ++{ ++ struct cachefiles_req *req; ++ struct cachefiles_object *object; ++ struct cachefiles_ondemand_info *info; ++ ++ xas_for_each_marked(xas, req, xa_max, CACHEFILES_REQ_NEW) { ++ if (req->msg.opcode != CACHEFILES_OP_READ) ++ return req; ++ object = req->object; ++ info = object->ondemand; ++ if (cachefiles_ondemand_object_is_close(object)) { ++ cachefiles_ondemand_set_object_reopening(object); ++ queue_work(fscache_wq, &info->ondemand_work); ++ continue; ++ } ++ if (cachefiles_ondemand_object_is_reopening(object)) ++ continue; ++ return req; ++ } ++ return NULL; ++} ++ + ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) + { +@@ -248,16 +281,16 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + int ret = 0; + XA_STATE(xas, &cache->reqs, cache->req_id_next); + ++ xa_lock(&cache->reqs); + /* + * Cyclically search for a request that has not ever been processed, + * to prevent requests from being processed repeatedly, and make + * request distribution fair. + */ +- xa_lock(&cache->reqs); +- req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW); ++ req = cachefiles_ondemand_select_req(&xas, ULONG_MAX); + if (!req && cache->req_id_next > 0) { + xas_set(&xas, 0); +- req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW); ++ req = cachefiles_ondemand_select_req(&xas, cache->req_id_next - 1); + } + if (!req) { + xa_unlock(&cache->reqs); +@@ -277,14 +310,18 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + xa_unlock(&cache->reqs); + + id = xas.xa_index; +- msg->msg_id = id; + + if (msg->opcode == CACHEFILES_OP_OPEN) { + ret = cachefiles_ondemand_get_fd(req); +- if (ret) ++ if (ret) { ++ cachefiles_ondemand_set_object_close(req->object); + goto error; ++ } + } + ++ msg->msg_id = id; ++ msg->object_id = req->object->ondemand->ondemand_id; ++ + if (copy_to_user(_buffer, msg, n) != 0) { + ret = -EFAULT; + goto err_put_fd; +@@ -317,19 +354,23 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + void *private) + { + struct cachefiles_cache *cache = object->volume->cache; +- struct cachefiles_req *req; ++ struct cachefiles_req *req = NULL; + XA_STATE(xas, &cache->reqs, 0); + int ret; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return 0; + +- if (test_bit(CACHEFILES_DEAD, &cache->flags)) +- return -EIO; ++ if (test_bit(CACHEFILES_DEAD, &cache->flags)) { ++ ret = -EIO; ++ goto out; ++ } + + req = kzalloc(sizeof(*req) + data_len, GFP_KERNEL); +- if (!req) +- return -ENOMEM; ++ if (!req) { ++ ret = -ENOMEM; ++ goto out; ++ } + + req->object = object; + init_completion(&req->done); +@@ -367,7 +408,7 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + /* coupled with the barrier in cachefiles_flush_reqs() */ + smp_mb(); + +- if (opcode != CACHEFILES_OP_OPEN && ++ if (opcode == CACHEFILES_OP_CLOSE && + !cachefiles_ondemand_object_is_open(object)) { + WARN_ON_ONCE(object->ondemand->ondemand_id == 0); + xas_unlock(&xas); +@@ -392,7 +433,15 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + wake_up_all(&cache->daemon_pollwq); + wait_for_completion(&req->done); + ret = req->error; ++ kfree(req); ++ return ret; + out: ++ /* Reset the object to close state in error handling path. ++ * If error occurs after creating the anonymous fd, ++ * cachefiles_ondemand_fd_release() will set object to close. ++ */ ++ if (opcode == CACHEFILES_OP_OPEN) ++ cachefiles_ondemand_set_object_close(object); + kfree(req); + return ret; + } +@@ -439,7 +488,6 @@ static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, + if (!cachefiles_ondemand_object_is_open(object)) + return -ENOENT; + +- req->msg.object_id = object->ondemand->ondemand_id; + trace_cachefiles_ondemand_close(object, &req->msg); + return 0; + } +@@ -455,16 +503,7 @@ static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req, + struct cachefiles_object *object = req->object; + struct cachefiles_read *load = (void *)req->msg.data; + struct cachefiles_read_ctx *read_ctx = private; +- int object_id = object->ondemand->ondemand_id; +- +- /* Stop enqueuing requests when daemon has closed anon_fd. */ +- if (!cachefiles_ondemand_object_is_open(object)) { +- WARN_ON_ONCE(object_id == 0); +- pr_info_once("READ: anonymous fd closed prematurely.\n"); +- return -EIO; +- } + +- req->msg.object_id = object_id; + load->off = read_ctx->off; + load->len = read_ctx->len; + trace_cachefiles_ondemand_read(object, &req->msg, load); +@@ -513,6 +552,7 @@ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, + return -ENOMEM; + + object->ondemand->object = object; ++ INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker); + return 0; + } + +-- +2.43.0 + diff --git a/queue-6.6/clk-sifive-do-not-register-clkdevs-for-prci-clocks.patch b/queue-6.6/clk-sifive-do-not-register-clkdevs-for-prci-clocks.patch new file mode 100644 index 00000000000..3a9fa50ce8a --- /dev/null +++ b/queue-6.6/clk-sifive-do-not-register-clkdevs-for-prci-clocks.patch @@ -0,0 +1,62 @@ +From 589fd7b80ccde05e33a900c83ad89a407a242cb2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 May 2024 17:14:12 -0700 +Subject: clk: sifive: Do not register clkdevs for PRCI clocks + +From: Samuel Holland + +[ Upstream commit 2607133196c35f31892ee199ce7ffa717bea4ad1 ] + +These clkdevs were unnecessary, because systems using this driver always +look up clocks using the devicetree. And as Russell King points out[1], +since the provided device name was truncated, lookups via clkdev would +never match. + +Recently, commit 8d532528ff6a ("clkdev: report over-sized strings when +creating clkdev entries") caused clkdev registration to fail due to the +truncation, and this now prevents the driver from probing. Fix the +driver by removing the clkdev registration. + +Link: https://lore.kernel.org/linux-clk/ZkfYqj+OcAxd9O2t@shell.armlinux.org.uk/ [1] +Fixes: 30b8e27e3b58 ("clk: sifive: add a driver for the SiFive FU540 PRCI IP block") +Fixes: 8d532528ff6a ("clkdev: report over-sized strings when creating clkdev entries") +Reported-by: Guenter Roeck +Closes: https://lore.kernel.org/linux-clk/7eda7621-0dde-4153-89e4-172e4c095d01@roeck-us.net/ +Suggested-by: Russell King +Signed-off-by: Samuel Holland +Link: https://lore.kernel.org/r/20240528001432.1200403-1-samuel.holland@sifive.com +Signed-off-by: Stephen Boyd +Signed-off-by: Sasha Levin +--- + drivers/clk/sifive/sifive-prci.c | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/drivers/clk/sifive/sifive-prci.c b/drivers/clk/sifive/sifive-prci.c +index af81eb835bc23..b1be6a2d24aa9 100644 +--- a/drivers/clk/sifive/sifive-prci.c ++++ b/drivers/clk/sifive/sifive-prci.c +@@ -4,7 +4,6 @@ + * Copyright (C) 2020 Zong Li + */ + +-#include + #include + #include + #include +@@ -536,13 +535,6 @@ static int __prci_register_clocks(struct device *dev, struct __prci_data *pd, + return r; + } + +- r = clk_hw_register_clkdev(&pic->hw, pic->name, dev_name(dev)); +- if (r) { +- dev_warn(dev, "Failed to register clkdev for %s: %d\n", +- init.name, r); +- return r; +- } +- + pd->hw_clks.hws[i] = &pic->hw; + } + +-- +2.43.0 + diff --git a/queue-6.6/cxl-region-fix-memregion-leaks-in-devm_cxl_add_regio.patch b/queue-6.6/cxl-region-fix-memregion-leaks-in-devm_cxl_add_regio.patch new file mode 100644 index 00000000000..dc838ac24b1 --- /dev/null +++ b/queue-6.6/cxl-region-fix-memregion-leaks-in-devm_cxl_add_regio.patch @@ -0,0 +1,61 @@ +From 6e12130bb41516501f951ae8dce5aa3e052dce70 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 May 2024 13:34:21 +0800 +Subject: cxl/region: Fix memregion leaks in devm_cxl_add_region() + +From: Li Zhijian + +[ Upstream commit 49ba7b515c4c0719b866d16f068e62d16a8a3dd1 ] + +Move the mode verification to __create_region() before allocating the +memregion to avoid the memregion leaks. + +Fixes: 6e099264185d ("cxl/region: Add volatile region creation support") +Signed-off-by: Li Zhijian +Reviewed-by: Dan Williams +Link: https://lore.kernel.org/r/20240507053421.456439-1-lizhijian@fujitsu.com +Signed-off-by: Dave Jiang +Signed-off-by: Sasha Levin +--- + drivers/cxl/core/region.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c +index c65ab42546238..7a646fed17211 100644 +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -2186,15 +2186,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, + struct device *dev; + int rc; + +- switch (mode) { +- case CXL_DECODER_RAM: +- case CXL_DECODER_PMEM: +- break; +- default: +- dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); +- return ERR_PTR(-EINVAL); +- } +- + cxlr = cxl_region_alloc(cxlrd, id); + if (IS_ERR(cxlr)) + return cxlr; +@@ -2245,6 +2236,15 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, + { + int rc; + ++ switch (mode) { ++ case CXL_DECODER_RAM: ++ case CXL_DECODER_PMEM: ++ break; ++ default: ++ dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); ++ return ERR_PTR(-EINVAL); ++ } ++ + rc = memregion_alloc(GFP_KERNEL); + if (rc < 0) + return ERR_PTR(rc); +-- +2.43.0 + diff --git a/queue-6.6/cxl-test-add-missing-vmalloc.h-for-tools-testing-cxl.patch b/queue-6.6/cxl-test-add-missing-vmalloc.h-for-tools-testing-cxl.patch new file mode 100644 index 00000000000..2e58c173e04 --- /dev/null +++ b/queue-6.6/cxl-test-add-missing-vmalloc.h-for-tools-testing-cxl.patch @@ -0,0 +1,55 @@ +From 4356b3f266364e92e2b27858874ea8a54d75f88d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 15:55:51 -0700 +Subject: cxl/test: Add missing vmalloc.h for tools/testing/cxl/test/mem.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Dave Jiang + +[ Upstream commit d55510527153d17a3af8cc2df69c04f95ae1350d ] + +tools/testing/cxl/test/mem.c uses vmalloc() and vfree() but does not +include linux/vmalloc.h. Kernel v6.10 made changes that causes the +currently included headers not depend on vmalloc.h and therefore +mem.c can no longer compile. Add linux/vmalloc.h to fix compile +issue. + + CC [M] tools/testing/cxl/test/mem.o +tools/testing/cxl/test/mem.c: In function ‘label_area_release’: +tools/testing/cxl/test/mem.c:1428:9: error: implicit declaration of function ‘vfree’; did you mean ‘kvfree’? [-Werror=implicit-function-declaration] + 1428 | vfree(lsa); + | ^~~~~ + | kvfree +tools/testing/cxl/test/mem.c: In function ‘cxl_mock_mem_probe’: +tools/testing/cxl/test/mem.c:1466:22: error: implicit declaration of function ‘vmalloc’; did you mean ‘kmalloc’? [-Werror=implicit-function-declaration] + 1466 | mdata->lsa = vmalloc(LSA_SIZE); + | ^~~~~~~ + | kmalloc + +Fixes: 7d3eb23c4ccf ("tools/testing/cxl: Introduce a mock memory device + driver") +Reviewed-by: Dan Williams +Reviewed-by: Alison Schofield +Link: https://lore.kernel.org/r/20240528225551.1025977-1-dave.jiang@intel.com +Signed-off-by: Dave Jiang +Signed-off-by: Sasha Levin +--- + tools/testing/cxl/test/mem.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c +index 68118c37f0b56..0ed100617d993 100644 +--- a/tools/testing/cxl/test/mem.c ++++ b/tools/testing/cxl/test/mem.c +@@ -3,6 +3,7 @@ + + #include + #include ++#include + #include + #include + #include +-- +2.43.0 + diff --git a/queue-6.6/drm-bridge-panel-fix-runtime-warning-on-panel-bridge.patch b/queue-6.6/drm-bridge-panel-fix-runtime-warning-on-panel-bridge.patch new file mode 100644 index 00000000000..93351da862e --- /dev/null +++ b/queue-6.6/drm-bridge-panel-fix-runtime-warning-on-panel-bridge.patch @@ -0,0 +1,51 @@ +From fcd82476168cfbccd7467fa49f0d788cead1d914 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jun 2024 11:27:39 +0100 +Subject: drm/bridge/panel: Fix runtime warning on panel bridge release + +From: Adam Miotk + +[ Upstream commit ce62600c4dbee8d43b02277669dd91785a9b81d9 ] + +Device managed panel bridge wrappers are created by calling to +drm_panel_bridge_add_typed() and registering a release handler for +clean-up when the device gets unbound. + +Since the memory for this bridge is also managed and linked to the panel +device, the release function should not try to free that memory. +Moreover, the call to devm_kfree() inside drm_panel_bridge_remove() will +fail in this case and emit a warning because the panel bridge resource +is no longer on the device resources list (it has been removed from +there before the call to release handlers). + +Fixes: 67022227ffb1 ("drm/bridge: Add a devm_ allocator for panel bridge.") +Signed-off-by: Adam Miotk +Signed-off-by: Maxime Ripard +Link: https://patchwork.freedesktop.org/patch/msgid/20240610102739.139852-1-adam.miotk@arm.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/bridge/panel.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c +index 9316384b44745..a1dd2ead8dcc4 100644 +--- a/drivers/gpu/drm/bridge/panel.c ++++ b/drivers/gpu/drm/bridge/panel.c +@@ -360,9 +360,12 @@ EXPORT_SYMBOL(drm_panel_bridge_set_orientation); + + static void devm_drm_panel_bridge_release(struct device *dev, void *res) + { +- struct drm_bridge **bridge = res; ++ struct drm_bridge *bridge = *(struct drm_bridge **)res; + +- drm_panel_bridge_remove(*bridge); ++ if (!bridge) ++ return; ++ ++ drm_bridge_remove(bridge); + } + + /** +-- +2.43.0 + diff --git a/queue-6.6/drm-komeda-check-for-error-valued-pointer.patch b/queue-6.6/drm-komeda-check-for-error-valued-pointer.patch new file mode 100644 index 00000000000..9808c8c8b7d --- /dev/null +++ b/queue-6.6/drm-komeda-check-for-error-valued-pointer.patch @@ -0,0 +1,37 @@ +From 56f99141f2c18b135a18bc353b956a1431240492 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jun 2024 11:20:56 +0100 +Subject: drm/komeda: check for error-valued pointer + +From: Amjad Ouled-Ameur + +[ Upstream commit b880018edd3a577e50366338194dee9b899947e0 ] + +komeda_pipeline_get_state() may return an error-valued pointer, thus +check the pointer for negative or null value before dereferencing. + +Fixes: 502932a03fce ("drm/komeda: Add the initial scaler support for CORE") +Signed-off-by: Amjad Ouled-Ameur +Signed-off-by: Maxime Ripard +Link: https://patchwork.freedesktop.org/patch/msgid/20240610102056.40406-1-amjad.ouled-ameur@arm.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +index f3e744172673c..f4e76b46ca327 100644 +--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c ++++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +@@ -259,7 +259,7 @@ komeda_component_get_avail_scaler(struct komeda_component *c, + u32 avail_scalers; + + pipe_st = komeda_pipeline_get_state(c->pipeline, state); +- if (!pipe_st) ++ if (IS_ERR_OR_NULL(pipe_st)) + return NULL; + + avail_scalers = (pipe_st->active_comps & KOMEDA_PIPELINE_SCALERS) ^ +-- +2.43.0 + diff --git a/queue-6.6/drm-panel-sitronix-st7789v-add-check-for-of_drm_get_.patch b/queue-6.6/drm-panel-sitronix-st7789v-add-check-for-of_drm_get_.patch new file mode 100644 index 00000000000..3722d73cf94 --- /dev/null +++ b/queue-6.6/drm-panel-sitronix-st7789v-add-check-for-of_drm_get_.patch @@ -0,0 +1,43 @@ +From abf75a3fba78ed34a414aeadb1afc8fe36e5ebbb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 11:08:32 +0800 +Subject: drm/panel: sitronix-st7789v: Add check for + of_drm_get_panel_orientation + +From: Chen Ni + +[ Upstream commit 629f2b4e05225e53125aaf7ff0b87d5d53897128 ] + +Add check for the return value of of_drm_get_panel_orientation() and +return the error if it fails in order to catch the error. + +Fixes: b27c0f6d208d ("drm/panel: sitronix-st7789v: add panel orientation support") +Signed-off-by: Chen Ni +Reviewed-by: Michael Riesch +Acked-by: Jessica Zhang +Link: https://lore.kernel.org/r/20240528030832.2529471-1-nichen@iscas.ac.cn +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20240528030832.2529471-1-nichen@iscas.ac.cn +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/panel/panel-sitronix-st7789v.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +index e8f385b9c6182..28bfc48a91272 100644 +--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c ++++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +@@ -643,7 +643,9 @@ static int st7789v_probe(struct spi_device *spi) + if (ret) + return dev_err_probe(dev, ret, "Failed to get backlight\n"); + +- of_drm_get_panel_orientation(spi->dev.of_node, &ctx->orientation); ++ ret = of_drm_get_panel_orientation(spi->dev.of_node, &ctx->orientation); ++ if (ret) ++ return dev_err_probe(&spi->dev, ret, "Failed to get orientation\n"); + + drm_panel_add(&ctx->panel); + +-- +2.43.0 + diff --git a/queue-6.6/drm-vmwgfx-3d-disabled-should-not-effect-stdu-memory.patch b/queue-6.6/drm-vmwgfx-3d-disabled-should-not-effect-stdu-memory.patch new file mode 100644 index 00000000000..1a55d967f0e --- /dev/null +++ b/queue-6.6/drm-vmwgfx-3d-disabled-should-not-effect-stdu-memory.patch @@ -0,0 +1,45 @@ +From a01f76b3db939bcf16e189f5083b892648b96342 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 13:47:18 -0500 +Subject: drm/vmwgfx: 3D disabled should not effect STDU memory limits + +From: Ian Forbes + +[ Upstream commit fb5e19d2dd03eb995ccd468d599b2337f7f66555 ] + +This limit became a hard cap starting with the change referenced below. +Surface creation on the device will fail if the requested size is larger +than this limit so altering the value arbitrarily will expose modes that +are too large for the device's hard limits. + +Fixes: 7ebb47c9f9ab ("drm/vmwgfx: Read new register for GB memory when available") + +Signed-off-by: Ian Forbes +Signed-off-by: Zack Rusin +Link: https://patchwork.freedesktop.org/patch/msgid/20240521184720.767-3-ian.forbes@broadcom.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +index 58fb40c93100a..bea576434e475 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +@@ -956,13 +956,6 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) + vmw_read(dev_priv, + SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB); + +- /* +- * Workaround for low memory 2D VMs to compensate for the +- * allocation taken by fbdev +- */ +- if (!(dev_priv->capabilities & SVGA_CAP_3D)) +- mem_size *= 3; +- + dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE; + dev_priv->max_primary_mem = + vmw_read(dev_priv, SVGA_REG_MAX_PRIMARY_MEM); +-- +2.43.0 + diff --git a/queue-6.6/drm-vmwgfx-don-t-memcmp-equivalent-pointers.patch b/queue-6.6/drm-vmwgfx-don-t-memcmp-equivalent-pointers.patch new file mode 100644 index 00000000000..a402cd66e0a --- /dev/null +++ b/queue-6.6/drm-vmwgfx-don-t-memcmp-equivalent-pointers.patch @@ -0,0 +1,38 @@ +From ceddcab7a0ab352dc17fe3c4204a6d723588450f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Mar 2024 14:07:16 -0500 +Subject: drm/vmwgfx: Don't memcmp equivalent pointers + +From: Ian Forbes + +[ Upstream commit 5703fc058efdafcdd6b70776ee562478f0753acb ] + +These pointers are frequently the same and memcmp does not compare the +pointers before comparing their contents so this was wasting cycles +comparing 16 KiB of memory which will always be equal. + +Fixes: bb6780aa5a1d ("drm/vmwgfx: Diff cursors when using cmds") +Signed-off-by: Ian Forbes +Signed-off-by: Zack Rusin +Link: https://patchwork.freedesktop.org/patch/msgid/20240328190716.27367-1-ian.forbes@broadcom.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +index 93e2a27daed0c..08f2470edab27 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +@@ -216,7 +216,7 @@ static bool vmw_du_cursor_plane_has_changed(struct vmw_plane_state *old_vps, + new_image = vmw_du_cursor_plane_acquire_image(new_vps); + + changed = false; +- if (old_image && new_image) ++ if (old_image && new_image && old_image != new_image) + changed = memcmp(old_image, new_image, size) != 0; + + return changed; +-- +2.43.0 + diff --git a/queue-6.6/drm-vmwgfx-filter-modes-which-exceed-graphics-memory.patch b/queue-6.6/drm-vmwgfx-filter-modes-which-exceed-graphics-memory.patch new file mode 100644 index 00000000000..f9232a720dd --- /dev/null +++ b/queue-6.6/drm-vmwgfx-filter-modes-which-exceed-graphics-memory.patch @@ -0,0 +1,106 @@ +From db11500ff2d4a5d581032f22a3146877e45a1076 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 13:47:17 -0500 +Subject: drm/vmwgfx: Filter modes which exceed graphics memory + +From: Ian Forbes + +[ Upstream commit 426826933109093503e7ef15d49348fc5ab505fe ] + +SVGA requires individual surfaces to fit within graphics memory +(max_mob_pages) which means that modes with a final buffer size that would +exceed graphics memory must be pruned otherwise creation will fail. + +Additionally llvmpipe requires its buffer height and width to be a multiple +of its tile size which is 64. As a result we have to anticipate that +llvmpipe will round up the mode size passed to it by the compositor when +it creates buffers and filter modes where this rounding exceeds graphics +memory. + +This fixes an issue where VMs with low graphics memory (< 64MiB) configured +with high resolution mode boot to a black screen because surface creation +fails. + +Fixes: d947d1b71deb ("drm/vmwgfx: Add and connect connector helper function") +Signed-off-by: Ian Forbes +Signed-off-by: Zack Rusin +Link: https://patchwork.freedesktop.org/patch/msgid/20240521184720.767-2-ian.forbes@broadcom.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 45 ++++++++++++++++++++++++++-- + 1 file changed, 43 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +index 12d623ee59c25..4ccab07faff08 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +@@ -41,7 +41,14 @@ + #define vmw_connector_to_stdu(x) \ + container_of(x, struct vmw_screen_target_display_unit, base.connector) + +- ++/* ++ * Some renderers such as llvmpipe will align the width and height of their ++ * buffers to match their tile size. We need to keep this in mind when exposing ++ * modes to userspace so that this possible over-allocation will not exceed ++ * graphics memory. 64x64 pixels seems to be a reasonable upper bound for the ++ * tile size of current renderers. ++ */ ++#define GPU_TILE_SIZE 64 + + enum stdu_content_type { + SAME_AS_DISPLAY = 0, +@@ -825,7 +832,41 @@ static void vmw_stdu_connector_destroy(struct drm_connector *connector) + vmw_stdu_destroy(vmw_connector_to_stdu(connector)); + } + ++static enum drm_mode_status ++vmw_stdu_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) ++{ ++ enum drm_mode_status ret; ++ struct drm_device *dev = connector->dev; ++ struct vmw_private *dev_priv = vmw_priv(dev); ++ u64 assumed_cpp = dev_priv->assume_16bpp ? 2 : 4; ++ /* Align width and height to account for GPU tile over-alignment */ ++ u64 required_mem = ALIGN(mode->hdisplay, GPU_TILE_SIZE) * ++ ALIGN(mode->vdisplay, GPU_TILE_SIZE) * ++ assumed_cpp; ++ required_mem = ALIGN(required_mem, PAGE_SIZE); ++ ++ ret = drm_mode_validate_size(mode, dev_priv->stdu_max_width, ++ dev_priv->stdu_max_height); ++ if (ret != MODE_OK) ++ return ret; + ++ ret = drm_mode_validate_size(mode, dev_priv->texture_max_width, ++ dev_priv->texture_max_height); ++ if (ret != MODE_OK) ++ return ret; ++ ++ if (required_mem > dev_priv->max_primary_mem) ++ return MODE_MEM; ++ ++ if (required_mem > dev_priv->max_mob_pages * PAGE_SIZE) ++ return MODE_MEM; ++ ++ if (required_mem > dev_priv->max_mob_size) ++ return MODE_MEM; ++ ++ return MODE_OK; ++} + + static const struct drm_connector_funcs vmw_stdu_connector_funcs = { + .dpms = vmw_du_connector_dpms, +@@ -841,7 +882,7 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = { + static const struct + drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = { + .get_modes = vmw_connector_get_modes, +- .mode_valid = vmw_connector_mode_valid ++ .mode_valid = vmw_stdu_connector_mode_valid + }; + + +-- +2.43.0 + diff --git a/queue-6.6/drm-vmwgfx-refactor-drm-connector-probing-for-displa.patch b/queue-6.6/drm-vmwgfx-refactor-drm-connector-probing-for-displa.patch new file mode 100644 index 00000000000..006738c521f --- /dev/null +++ b/queue-6.6/drm-vmwgfx-refactor-drm-connector-probing-for-displa.patch @@ -0,0 +1,445 @@ +From 5b67809cf8c2cd69c50b97f42d12bb8995fcc5f8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Jan 2024 15:08:00 -0500 +Subject: drm/vmwgfx: Refactor drm connector probing for display modes + +From: Martin Krastev + +[ Upstream commit 935f795045a6f9b13d28d46ebdad04bfea8750dd ] + +Implement drm_connector_helper_funcs.mode_valid and .get_modes, +replacing custom drm_connector_funcs.fill_modes code with +drm_helper_probe_single_connector_modes; for STDU, LDU & SOU +display units. + +Signed-off-by: Martin Krastev +Reviewed-by: Zack Rusin +Signed-off-by: Zack Rusin +Link: https://patchwork.freedesktop.org/patch/msgid/20240126200804.732454-2-zack.rusin@broadcom.com +Stable-dep-of: 426826933109 ("drm/vmwgfx: Filter modes which exceed graphics memory") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 272 +++++++++------------------ + drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 6 +- + drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 5 +- + drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 5 +- + drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 4 +- + 5 files changed, 101 insertions(+), 191 deletions(-) + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +index a884072851322..59de170a31853 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + + void vmw_du_cleanup(struct vmw_display_unit *du) + { +@@ -2279,107 +2280,6 @@ vmw_du_connector_detect(struct drm_connector *connector, bool force) + connector_status_connected : connector_status_disconnected); + } + +-static struct drm_display_mode vmw_kms_connector_builtin[] = { +- /* 640x480@60Hz */ +- { DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656, +- 752, 800, 0, 480, 489, 492, 525, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 800x600@60Hz */ +- { DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840, +- 968, 1056, 0, 600, 601, 605, 628, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1024x768@60Hz */ +- { DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048, +- 1184, 1344, 0, 768, 771, 777, 806, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 1152x864@75Hz */ +- { DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216, +- 1344, 1600, 0, 864, 865, 868, 900, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1280x720@60Hz */ +- { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74500, 1280, 1344, +- 1472, 1664, 0, 720, 723, 728, 748, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1280x768@60Hz */ +- { DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344, +- 1472, 1664, 0, 768, 771, 778, 798, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1280x800@60Hz */ +- { DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352, +- 1480, 1680, 0, 800, 803, 809, 831, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 1280x960@60Hz */ +- { DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376, +- 1488, 1800, 0, 960, 961, 964, 1000, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1280x1024@60Hz */ +- { DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328, +- 1440, 1688, 0, 1024, 1025, 1028, 1066, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1360x768@60Hz */ +- { DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424, +- 1536, 1792, 0, 768, 771, 777, 795, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1440x1050@60Hz */ +- { DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488, +- 1632, 1864, 0, 1050, 1053, 1057, 1089, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1440x900@60Hz */ +- { DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520, +- 1672, 1904, 0, 900, 903, 909, 934, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1600x1200@60Hz */ +- { DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664, +- 1856, 2160, 0, 1200, 1201, 1204, 1250, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1680x1050@60Hz */ +- { DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784, +- 1960, 2240, 0, 1050, 1053, 1059, 1089, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1792x1344@60Hz */ +- { DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920, +- 2120, 2448, 0, 1344, 1345, 1348, 1394, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1853x1392@60Hz */ +- { DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952, +- 2176, 2528, 0, 1392, 1393, 1396, 1439, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1920x1080@60Hz */ +- { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 173000, 1920, 2048, +- 2248, 2576, 0, 1080, 1083, 1088, 1120, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1920x1200@60Hz */ +- { DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056, +- 2256, 2592, 0, 1200, 1203, 1209, 1245, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 1920x1440@60Hz */ +- { DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048, +- 2256, 2600, 0, 1440, 1441, 1444, 1500, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 2560x1440@60Hz */ +- { DRM_MODE("2560x1440", DRM_MODE_TYPE_DRIVER, 241500, 2560, 2608, +- 2640, 2720, 0, 1440, 1443, 1448, 1481, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 2560x1600@60Hz */ +- { DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752, +- 3032, 3504, 0, 1600, 1603, 1609, 1658, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, +- /* 2880x1800@60Hz */ +- { DRM_MODE("2880x1800", DRM_MODE_TYPE_DRIVER, 337500, 2880, 2928, +- 2960, 3040, 0, 1800, 1803, 1809, 1852, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 3840x2160@60Hz */ +- { DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 533000, 3840, 3888, +- 3920, 4000, 0, 2160, 2163, 2168, 2222, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* 3840x2400@60Hz */ +- { DRM_MODE("3840x2400", DRM_MODE_TYPE_DRIVER, 592250, 3840, 3888, +- 3920, 4000, 0, 2400, 2403, 2409, 2469, 0, +- DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, +- /* Terminate */ +- { DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) }, +-}; +- + /** + * vmw_guess_mode_timing - Provide fake timings for a + * 60Hz vrefresh mode. +@@ -2401,88 +2301,6 @@ void vmw_guess_mode_timing(struct drm_display_mode *mode) + } + + +-int vmw_du_connector_fill_modes(struct drm_connector *connector, +- uint32_t max_width, uint32_t max_height) +-{ +- struct vmw_display_unit *du = vmw_connector_to_du(connector); +- struct drm_device *dev = connector->dev; +- struct vmw_private *dev_priv = vmw_priv(dev); +- struct drm_display_mode *mode = NULL; +- struct drm_display_mode *bmode; +- struct drm_display_mode prefmode = { DRM_MODE("preferred", +- DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, +- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +- DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) +- }; +- int i; +- u32 assumed_bpp = 4; +- +- if (dev_priv->assume_16bpp) +- assumed_bpp = 2; +- +- max_width = min(max_width, dev_priv->texture_max_width); +- max_height = min(max_height, dev_priv->texture_max_height); +- +- /* +- * For STDU extra limit for a mode on SVGA_REG_SCREENTARGET_MAX_WIDTH/ +- * HEIGHT registers. +- */ +- if (dev_priv->active_display_unit == vmw_du_screen_target) { +- max_width = min(max_width, dev_priv->stdu_max_width); +- max_height = min(max_height, dev_priv->stdu_max_height); +- } +- +- /* Add preferred mode */ +- mode = drm_mode_duplicate(dev, &prefmode); +- if (!mode) +- return 0; +- mode->hdisplay = du->pref_width; +- mode->vdisplay = du->pref_height; +- vmw_guess_mode_timing(mode); +- drm_mode_set_name(mode); +- +- if (vmw_kms_validate_mode_vram(dev_priv, +- mode->hdisplay * assumed_bpp, +- mode->vdisplay)) { +- drm_mode_probed_add(connector, mode); +- } else { +- drm_mode_destroy(dev, mode); +- mode = NULL; +- } +- +- if (du->pref_mode) { +- list_del_init(&du->pref_mode->head); +- drm_mode_destroy(dev, du->pref_mode); +- } +- +- /* mode might be null here, this is intended */ +- du->pref_mode = mode; +- +- for (i = 0; vmw_kms_connector_builtin[i].type != 0; i++) { +- bmode = &vmw_kms_connector_builtin[i]; +- if (bmode->hdisplay > max_width || +- bmode->vdisplay > max_height) +- continue; +- +- if (!vmw_kms_validate_mode_vram(dev_priv, +- bmode->hdisplay * assumed_bpp, +- bmode->vdisplay)) +- continue; +- +- mode = drm_mode_duplicate(dev, bmode); +- if (!mode) +- return 0; +- +- drm_mode_probed_add(connector, mode); +- } +- +- drm_connector_list_update(connector); +- /* Move the prefered mode first, help apps pick the right mode. */ +- drm_mode_sort(&connector->modes); +- +- return 1; +-} +- + /** + * vmw_kms_update_layout_ioctl - Handler for DRM_VMW_UPDATE_LAYOUT ioctl + * @dev: drm device for the ioctl +@@ -3023,3 +2841,91 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update) + vmw_validation_unref_lists(&val_ctx); + return ret; + } ++ ++/** ++ * vmw_connector_mode_valid - implements drm_connector_helper_funcs.mode_valid callback ++ * ++ * @connector: the drm connector, part of a DU container ++ * @mode: drm mode to check ++ * ++ * Returns MODE_OK on success, or a drm_mode_status error code. ++ */ ++enum drm_mode_status vmw_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) ++{ ++ struct drm_device *dev = connector->dev; ++ struct vmw_private *dev_priv = vmw_priv(dev); ++ u32 max_width = dev_priv->texture_max_width; ++ u32 max_height = dev_priv->texture_max_height; ++ u32 assumed_cpp = 4; ++ ++ if (dev_priv->assume_16bpp) ++ assumed_cpp = 2; ++ ++ if (dev_priv->active_display_unit == vmw_du_screen_target) { ++ max_width = min(dev_priv->stdu_max_width, max_width); ++ max_height = min(dev_priv->stdu_max_height, max_height); ++ } ++ ++ if (max_width < mode->hdisplay) ++ return MODE_BAD_HVALUE; ++ ++ if (max_height < mode->vdisplay) ++ return MODE_BAD_VVALUE; ++ ++ if (!vmw_kms_validate_mode_vram(dev_priv, ++ mode->hdisplay * assumed_cpp, ++ mode->vdisplay)) ++ return MODE_MEM; ++ ++ return MODE_OK; ++} ++ ++/** ++ * vmw_connector_get_modes - implements drm_connector_helper_funcs.get_modes callback ++ * ++ * @connector: the drm connector, part of a DU container ++ * ++ * Returns the number of added modes. ++ */ ++int vmw_connector_get_modes(struct drm_connector *connector) ++{ ++ struct vmw_display_unit *du = vmw_connector_to_du(connector); ++ struct drm_device *dev = connector->dev; ++ struct vmw_private *dev_priv = vmw_priv(dev); ++ struct drm_display_mode *mode = NULL; ++ struct drm_display_mode prefmode = { DRM_MODE("preferred", ++ DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) ++ }; ++ u32 max_width; ++ u32 max_height; ++ u32 num_modes; ++ ++ /* Add preferred mode */ ++ mode = drm_mode_duplicate(dev, &prefmode); ++ if (!mode) ++ return 0; ++ ++ mode->hdisplay = du->pref_width; ++ mode->vdisplay = du->pref_height; ++ vmw_guess_mode_timing(mode); ++ drm_mode_set_name(mode); ++ ++ drm_mode_probed_add(connector, mode); ++ drm_dbg_kms(dev, "preferred mode " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode)); ++ ++ /* Probe connector for all modes not exceeding our geom limits */ ++ max_width = dev_priv->texture_max_width; ++ max_height = dev_priv->texture_max_height; ++ ++ if (dev_priv->active_display_unit == vmw_du_screen_target) { ++ max_width = min(dev_priv->stdu_max_width, max_width); ++ max_height = min(dev_priv->stdu_max_height, max_height); ++ } ++ ++ num_modes = 1 + drm_add_modes_noedid(connector, max_width, max_height); ++ ++ return num_modes; ++} +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +index 9fda4f4ec7a97..19a843da87b78 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +@@ -378,7 +378,6 @@ struct vmw_display_unit { + unsigned pref_width; + unsigned pref_height; + bool pref_active; +- struct drm_display_mode *pref_mode; + + /* + * Gui positioning +@@ -428,8 +427,6 @@ void vmw_du_connector_save(struct drm_connector *connector); + void vmw_du_connector_restore(struct drm_connector *connector); + enum drm_connector_status + vmw_du_connector_detect(struct drm_connector *connector, bool force); +-int vmw_du_connector_fill_modes(struct drm_connector *connector, +- uint32_t max_width, uint32_t max_height); + int vmw_kms_helper_dirty(struct vmw_private *dev_priv, + struct vmw_framebuffer *framebuffer, + const struct drm_clip_rect *clips, +@@ -438,6 +435,9 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, + int num_clips, + int increment, + struct vmw_kms_dirty *dirty); ++enum drm_mode_status vmw_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode); ++int vmw_connector_get_modes(struct drm_connector *connector); + + void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, + struct drm_file *file_priv, +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +index a82fa97003705..c4db4aecca6c3 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +@@ -304,7 +304,7 @@ static void vmw_ldu_connector_destroy(struct drm_connector *connector) + static const struct drm_connector_funcs vmw_legacy_connector_funcs = { + .dpms = vmw_du_connector_dpms, + .detect = vmw_du_connector_detect, +- .fill_modes = vmw_du_connector_fill_modes, ++ .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = vmw_ldu_connector_destroy, + .reset = vmw_du_connector_reset, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, +@@ -313,6 +313,8 @@ static const struct drm_connector_funcs vmw_legacy_connector_funcs = { + + static const struct + drm_connector_helper_funcs vmw_ldu_connector_helper_funcs = { ++ .get_modes = vmw_connector_get_modes, ++ .mode_valid = vmw_connector_mode_valid + }; + + static int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv, +@@ -449,7 +451,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) + ldu->base.pref_active = (unit == 0); + ldu->base.pref_width = dev_priv->initial_width; + ldu->base.pref_height = dev_priv->initial_height; +- ldu->base.pref_mode = NULL; + + /* + * Remove this after enabling atomic because property values can +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +index 556a403b7eb56..30c3ad27b6629 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +@@ -347,7 +347,7 @@ static void vmw_sou_connector_destroy(struct drm_connector *connector) + static const struct drm_connector_funcs vmw_sou_connector_funcs = { + .dpms = vmw_du_connector_dpms, + .detect = vmw_du_connector_detect, +- .fill_modes = vmw_du_connector_fill_modes, ++ .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = vmw_sou_connector_destroy, + .reset = vmw_du_connector_reset, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, +@@ -357,6 +357,8 @@ static const struct drm_connector_funcs vmw_sou_connector_funcs = { + + static const struct + drm_connector_helper_funcs vmw_sou_connector_helper_funcs = { ++ .get_modes = vmw_connector_get_modes, ++ .mode_valid = vmw_connector_mode_valid + }; + + +@@ -826,7 +828,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) + sou->base.pref_active = (unit == 0); + sou->base.pref_width = dev_priv->initial_width; + sou->base.pref_height = dev_priv->initial_height; +- sou->base.pref_mode = NULL; + + /* + * Remove this after enabling atomic because property values can +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +index ba0c0e12cfe9d..12d623ee59c25 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +@@ -830,7 +830,7 @@ static void vmw_stdu_connector_destroy(struct drm_connector *connector) + static const struct drm_connector_funcs vmw_stdu_connector_funcs = { + .dpms = vmw_du_connector_dpms, + .detect = vmw_du_connector_detect, +- .fill_modes = vmw_du_connector_fill_modes, ++ .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = vmw_stdu_connector_destroy, + .reset = vmw_du_connector_reset, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, +@@ -840,6 +840,8 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = { + + static const struct + drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = { ++ .get_modes = vmw_connector_get_modes, ++ .mode_valid = vmw_connector_mode_valid + }; + + +-- +2.43.0 + diff --git a/queue-6.6/drm-vmwgfx-remove-stdu-logic-from-generic-mode_valid.patch b/queue-6.6/drm-vmwgfx-remove-stdu-logic-from-generic-mode_valid.patch new file mode 100644 index 00000000000..d17334a0c6e --- /dev/null +++ b/queue-6.6/drm-vmwgfx-remove-stdu-logic-from-generic-mode_valid.patch @@ -0,0 +1,93 @@ +From b369d268ed6b2e43725f0b5b2d23de327d3494b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 13:47:19 -0500 +Subject: drm/vmwgfx: Remove STDU logic from generic mode_valid function + +From: Ian Forbes + +[ Upstream commit dde1de06bd7248fd83c4ce5cf0dbe9e4e95bbb91 ] + +STDU has its own mode_valid function now so this logic can be removed from +the generic version. + +Fixes: 935f795045a6 ("drm/vmwgfx: Refactor drm connector probing for display modes") + +Signed-off-by: Ian Forbes +Signed-off-by: Zack Rusin +Link: https://patchwork.freedesktop.org/patch/msgid/20240521184720.767-4-ian.forbes@broadcom.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 3 --- + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 26 +++++++++----------------- + 2 files changed, 9 insertions(+), 20 deletions(-) + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +index 6acc7ad0e9eb8..13423c7b0cbdb 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +@@ -1067,9 +1067,6 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf, + int vmw_kms_write_svga(struct vmw_private *vmw_priv, + unsigned width, unsigned height, unsigned pitch, + unsigned bpp, unsigned depth); +-bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv, +- uint32_t pitch, +- uint32_t height); + int vmw_kms_present(struct vmw_private *dev_priv, + struct drm_file *file_priv, + struct vmw_framebuffer *vfb, +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +index 59de170a31853..93e2a27daed0c 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +@@ -2151,13 +2151,12 @@ int vmw_kms_write_svga(struct vmw_private *vmw_priv, + return 0; + } + ++static + bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv, +- uint32_t pitch, +- uint32_t height) ++ u64 pitch, ++ u64 height) + { +- return ((u64) pitch * (u64) height) < (u64) +- ((dev_priv->active_display_unit == vmw_du_screen_target) ? +- dev_priv->max_primary_mem : dev_priv->vram_size); ++ return (pitch * height) < (u64)dev_priv->vram_size; + } + + /** +@@ -2853,25 +2852,18 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update) + enum drm_mode_status vmw_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) + { ++ enum drm_mode_status ret; + struct drm_device *dev = connector->dev; + struct vmw_private *dev_priv = vmw_priv(dev); +- u32 max_width = dev_priv->texture_max_width; +- u32 max_height = dev_priv->texture_max_height; + u32 assumed_cpp = 4; + + if (dev_priv->assume_16bpp) + assumed_cpp = 2; + +- if (dev_priv->active_display_unit == vmw_du_screen_target) { +- max_width = min(dev_priv->stdu_max_width, max_width); +- max_height = min(dev_priv->stdu_max_height, max_height); +- } +- +- if (max_width < mode->hdisplay) +- return MODE_BAD_HVALUE; +- +- if (max_height < mode->vdisplay) +- return MODE_BAD_VVALUE; ++ ret = drm_mode_validate_size(mode, dev_priv->texture_max_width, ++ dev_priv->texture_max_height); ++ if (ret != MODE_OK) ++ return ret; + + if (!vmw_kms_validate_mode_vram(dev_priv, + mode->hdisplay * assumed_cpp, +-- +2.43.0 + diff --git a/queue-6.6/geneve-fix-incorrect-inner-network-header-offset-whe.patch b/queue-6.6/geneve-fix-incorrect-inner-network-header-offset-whe.patch new file mode 100644 index 00000000000..0350195c02b --- /dev/null +++ b/queue-6.6/geneve-fix-incorrect-inner-network-header-offset-whe.patch @@ -0,0 +1,106 @@ +From f558a60861032e2d43b2c6ecd6b2aad18c80ae39 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jun 2024 23:32:48 +0300 +Subject: geneve: Fix incorrect inner network header offset when + innerprotoinherit is set + +From: Gal Pressman + +[ Upstream commit c6ae073f5903f6c6439d0ac855836a4da5c0a701 ] + +When innerprotoinherit is set, the tunneled packets do not have an inner +Ethernet header. +Change 'maclen' to not always assume the header length is ETH_HLEN, as +there might not be a MAC header. + +This resolves issues with drivers (e.g. mlx5, in +mlx5e_tx_tunnel_accel()) who rely on the skb inner network header offset +to be correct, and use it for TX offloads. + +Fixes: d8a6213d70ac ("geneve: fix header validation in geneve[6]_xmit_skb") +Signed-off-by: Gal Pressman +Signed-off-by: Tariq Toukan +Reviewed-by: Wojciech Drewek +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/geneve.c | 10 ++++++---- + include/net/ip_tunnels.h | 5 +++-- + 2 files changed, 9 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c +index 0a18b67d0d669..8333a5620deff 100644 +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -915,6 +915,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, + struct geneve_dev *geneve, + const struct ip_tunnel_info *info) + { ++ bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; + bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); + struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); + const struct ip_tunnel_key *key = &info->key; +@@ -926,7 +927,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, + __be16 sport; + int err; + +- if (!skb_vlan_inet_prepare(skb)) ++ if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) + return -EINVAL; + + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); +@@ -999,7 +1000,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, + } + + err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), +- geneve->cfg.inner_proto_inherit); ++ inner_proto_inherit); + if (unlikely(err)) + return err; + +@@ -1015,6 +1016,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, + struct geneve_dev *geneve, + const struct ip_tunnel_info *info) + { ++ bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; + bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); + struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); + const struct ip_tunnel_key *key = &info->key; +@@ -1024,7 +1026,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, + __be16 sport; + int err; + +- if (!skb_vlan_inet_prepare(skb)) ++ if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) + return -EINVAL; + + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); +@@ -1079,7 +1081,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, + ttl = ttl ? : ip6_dst_hoplimit(dst); + } + err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), +- geneve->cfg.inner_proto_inherit); ++ inner_proto_inherit); + if (unlikely(err)) + return err; + +diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h +index 822f0fad39623..4e69f52a51177 100644 +--- a/include/net/ip_tunnels.h ++++ b/include/net/ip_tunnels.h +@@ -362,9 +362,10 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) + + /* Variant of pskb_inet_may_pull(). + */ +-static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) ++static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, ++ bool inner_proto_inherit) + { +- int nhlen = 0, maclen = ETH_HLEN; ++ int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; + __be16 type = skb->protocol; + + /* Essentially this is skb_protocol(skb, true) +-- +2.43.0 + diff --git a/queue-6.6/gpio-tqmx86-fix-broken-irq_type_edge_both-interrupt-.patch b/queue-6.6/gpio-tqmx86-fix-broken-irq_type_edge_both-interrupt-.patch new file mode 100644 index 00000000000..805509a35a4 --- /dev/null +++ b/queue-6.6/gpio-tqmx86-fix-broken-irq_type_edge_both-interrupt-.patch @@ -0,0 +1,114 @@ +From 453b3b4ec19760d14b5ef7ce9a9b84fe5a5a4a6e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 May 2024 12:20:02 +0200 +Subject: gpio: tqmx86: fix broken IRQ_TYPE_EDGE_BOTH interrupt type + +From: Matthias Schiffer + +[ Upstream commit 90dd7de4ef7ba584823dfbeba834c2919a4bb55b ] + +The TQMx86 GPIO controller only supports falling and rising edge +triggers, but not both. Fix this by implementing a software both-edge +mode that toggles the edge type after every interrupt. + +Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller") +Co-developed-by: Gregor Herburger +Signed-off-by: Gregor Herburger +Signed-off-by: Matthias Schiffer +Link: https://lore.kernel.org/r/515324f0491c4d44f4ef49f170354aca002d81ef.1717063994.git.matthias.schiffer@ew.tq-group.com +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpio-tqmx86.c | 46 ++++++++++++++++++++++++++++++++++---- + 1 file changed, 42 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c +index 7e428c872a257..f2e7e8754d95d 100644 +--- a/drivers/gpio/gpio-tqmx86.c ++++ b/drivers/gpio/gpio-tqmx86.c +@@ -32,6 +32,10 @@ + #define TQMX86_GPII_NONE 0 + #define TQMX86_GPII_FALLING BIT(0) + #define TQMX86_GPII_RISING BIT(1) ++/* Stored in irq_type as a trigger type, but not actually valid as a register ++ * value, so the name doesn't use "GPII" ++ */ ++#define TQMX86_INT_BOTH (BIT(0) | BIT(1)) + #define TQMX86_GPII_MASK (BIT(0) | BIT(1)) + #define TQMX86_GPII_BITS 2 + /* Stored in irq_type with GPII bits */ +@@ -113,9 +117,15 @@ static void tqmx86_gpio_irq_config(struct tqmx86_gpio_data *gpio, int offset) + { + u8 type = TQMX86_GPII_NONE, gpiic; + +- if (gpio->irq_type[offset] & TQMX86_INT_UNMASKED) ++ if (gpio->irq_type[offset] & TQMX86_INT_UNMASKED) { + type = gpio->irq_type[offset] & TQMX86_GPII_MASK; + ++ if (type == TQMX86_INT_BOTH) ++ type = tqmx86_gpio_get(&gpio->chip, offset + TQMX86_NGPO) ++ ? TQMX86_GPII_FALLING ++ : TQMX86_GPII_RISING; ++ } ++ + gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); + gpiic &= ~(TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS)); + gpiic |= type << (offset * TQMX86_GPII_BITS); +@@ -169,7 +179,7 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type) + new_type = TQMX86_GPII_FALLING; + break; + case IRQ_TYPE_EDGE_BOTH: +- new_type = TQMX86_GPII_FALLING | TQMX86_GPII_RISING; ++ new_type = TQMX86_INT_BOTH; + break; + default: + return -EINVAL; /* not supported */ +@@ -189,8 +199,8 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc) + struct gpio_chip *chip = irq_desc_get_handler_data(desc); + struct tqmx86_gpio_data *gpio = gpiochip_get_data(chip); + struct irq_chip *irq_chip = irq_desc_get_chip(desc); +- unsigned long irq_bits; +- int i = 0; ++ unsigned long irq_bits, flags; ++ int i; + u8 irq_status; + + chained_irq_enter(irq_chip, desc); +@@ -199,6 +209,34 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc) + tqmx86_gpio_write(gpio, irq_status, TQMX86_GPIIS); + + irq_bits = irq_status; ++ ++ raw_spin_lock_irqsave(&gpio->spinlock, flags); ++ for_each_set_bit(i, &irq_bits, TQMX86_NGPI) { ++ /* ++ * Edge-both triggers are implemented by flipping the edge ++ * trigger after each interrupt, as the controller only supports ++ * either rising or falling edge triggers, but not both. ++ * ++ * Internally, the TQMx86 GPIO controller has separate status ++ * registers for rising and falling edge interrupts. GPIIC ++ * configures which bits from which register are visible in the ++ * interrupt status register GPIIS and defines what triggers the ++ * parent IRQ line. Writing to GPIIS always clears both rising ++ * and falling interrupt flags internally, regardless of the ++ * currently configured trigger. ++ * ++ * In consequence, we can cleanly implement the edge-both ++ * trigger in software by first clearing the interrupt and then ++ * setting the new trigger based on the current GPIO input in ++ * tqmx86_gpio_irq_config() - even if an edge arrives between ++ * reading the input and setting the trigger, we will have a new ++ * interrupt pending. ++ */ ++ if ((gpio->irq_type[i] & TQMX86_GPII_MASK) == TQMX86_INT_BOTH) ++ tqmx86_gpio_irq_config(gpio, i); ++ } ++ raw_spin_unlock_irqrestore(&gpio->spinlock, flags); ++ + for_each_set_bit(i, &irq_bits, TQMX86_NGPI) + generic_handle_domain_irq(gpio->chip.irq.domain, + i + TQMX86_NGPO); +-- +2.43.0 + diff --git a/queue-6.6/gpio-tqmx86-fix-typo-in-kconfig-label.patch b/queue-6.6/gpio-tqmx86-fix-typo-in-kconfig-label.patch new file mode 100644 index 00000000000..41ec15b8b22 --- /dev/null +++ b/queue-6.6/gpio-tqmx86-fix-typo-in-kconfig-label.patch @@ -0,0 +1,38 @@ +From 4a7a863eb1cef2be55dd391ee93f5bb0afa38cdf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 May 2024 12:19:59 +0200 +Subject: gpio: tqmx86: fix typo in Kconfig label + +From: Gregor Herburger + +[ Upstream commit 8c219e52ca4d9a67cd6a7074e91bf29b55edc075 ] + +Fix description for GPIO_TQMX86 from QTMX86 to TQMx86. + +Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller") +Signed-off-by: Gregor Herburger +Signed-off-by: Matthias Schiffer +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/e0e38c9944ad6d281d9a662a45d289b88edc808e.1717063994.git.matthias.schiffer@ew.tq-group.com +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig +index d56b835359d3b..ebd4e113dc265 100644 +--- a/drivers/gpio/Kconfig ++++ b/drivers/gpio/Kconfig +@@ -1507,7 +1507,7 @@ config GPIO_TPS68470 + are "output only" GPIOs. + + config GPIO_TQMX86 +- tristate "TQ-Systems QTMX86 GPIO" ++ tristate "TQ-Systems TQMx86 GPIO" + depends on MFD_TQMX86 || COMPILE_TEST + depends on HAS_IOPORT_MAP + select GPIOLIB_IRQCHIP +-- +2.43.0 + diff --git a/queue-6.6/gpio-tqmx86-introduce-shadow-register-for-gpio-outpu.patch b/queue-6.6/gpio-tqmx86-introduce-shadow-register-for-gpio-outpu.patch new file mode 100644 index 00000000000..432240b6e23 --- /dev/null +++ b/queue-6.6/gpio-tqmx86-introduce-shadow-register-for-gpio-outpu.patch @@ -0,0 +1,89 @@ +From 504a5d2b0e9d248a6b7e6bf56f845f106c3e6067 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 May 2024 12:20:00 +0200 +Subject: gpio: tqmx86: introduce shadow register for GPIO output value + +From: Matthias Schiffer + +[ Upstream commit 9d6a811b522ba558bcb4ec01d12e72a0af8e9f6e ] + +The TQMx86 GPIO controller uses the same register address for input and +output data. Reading the register will always return current inputs +rather than the previously set outputs (regardless of the current +direction setting). Therefore, using a RMW pattern does not make sense +when setting output values. Instead, the previously set output register +value needs to be stored as a shadow register. + +As there is no reliable way to get the current output values from the +hardware, also initialize all channels to 0, to ensure that stored and +actual output values match. This should usually not have any effect in +practise, as the TQMx86 UEFI sets all outputs to 0 during boot. + +Also prepare for extension of the driver to more than 8 GPIOs by using +DECLARE_BITMAP. + +Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller") +Signed-off-by: Matthias Schiffer +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/d0555933becd45fa92a85675d26e4d59343ddc01.1717063994.git.matthias.schiffer@ew.tq-group.com +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpio-tqmx86.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c +index 3a28c1f273c39..b7e2dbbdc4ebe 100644 +--- a/drivers/gpio/gpio-tqmx86.c ++++ b/drivers/gpio/gpio-tqmx86.c +@@ -6,6 +6,7 @@ + * Vadim V.Vlasov + */ + ++#include + #include + #include + #include +@@ -38,6 +39,7 @@ struct tqmx86_gpio_data { + void __iomem *io_base; + int irq; + raw_spinlock_t spinlock; ++ DECLARE_BITMAP(output, TQMX86_NGPIO); + u8 irq_type[TQMX86_NGPI]; + }; + +@@ -64,15 +66,10 @@ static void tqmx86_gpio_set(struct gpio_chip *chip, unsigned int offset, + { + struct tqmx86_gpio_data *gpio = gpiochip_get_data(chip); + unsigned long flags; +- u8 val; + + raw_spin_lock_irqsave(&gpio->spinlock, flags); +- val = tqmx86_gpio_read(gpio, TQMX86_GPIOD); +- if (value) +- val |= BIT(offset); +- else +- val &= ~BIT(offset); +- tqmx86_gpio_write(gpio, val, TQMX86_GPIOD); ++ __assign_bit(offset, gpio->output, value); ++ tqmx86_gpio_write(gpio, bitmap_get_value8(gpio->output, 0), TQMX86_GPIOD); + raw_spin_unlock_irqrestore(&gpio->spinlock, flags); + } + +@@ -277,6 +274,13 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) + + tqmx86_gpio_write(gpio, (u8)~TQMX86_DIR_INPUT_MASK, TQMX86_GPIODD); + ++ /* ++ * Reading the previous output state is not possible with TQMx86 hardware. ++ * Initialize all outputs to 0 to have a defined state that matches the ++ * shadow register. ++ */ ++ tqmx86_gpio_write(gpio, 0, TQMX86_GPIOD); ++ + chip = &gpio->chip; + chip->label = "gpio-tqmx86"; + chip->owner = THIS_MODULE; +-- +2.43.0 + diff --git a/queue-6.6/gpio-tqmx86-store-irq-trigger-type-and-unmask-status.patch b/queue-6.6/gpio-tqmx86-store-irq-trigger-type-and-unmask-status.patch new file mode 100644 index 00000000000..caa080c6b9a --- /dev/null +++ b/queue-6.6/gpio-tqmx86-store-irq-trigger-type-and-unmask-status.patch @@ -0,0 +1,137 @@ +From fc61bc239bf78581b607b8055e49d75a37377b75 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 May 2024 12:20:01 +0200 +Subject: gpio: tqmx86: store IRQ trigger type and unmask status separately + +From: Matthias Schiffer + +[ Upstream commit 08af509efdf8dad08e972b48de0e2c2a7919ea8b ] + +irq_set_type() should not implicitly unmask the IRQ. + +All accesses to the interrupt configuration register are moved to a new +helper tqmx86_gpio_irq_config(). We also introduce the new rule that +accessing irq_type must happen while locked, which will become +significant for fixing EDGE_BOTH handling. + +Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller") +Signed-off-by: Matthias Schiffer +Link: https://lore.kernel.org/r/6aa4f207f77cb58ef64ffb947e91949b0f753ccd.1717063994.git.matthias.schiffer@ew.tq-group.com +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpio-tqmx86.c | 48 ++++++++++++++++++++++---------------- + 1 file changed, 28 insertions(+), 20 deletions(-) + +diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c +index b7e2dbbdc4ebe..7e428c872a257 100644 +--- a/drivers/gpio/gpio-tqmx86.c ++++ b/drivers/gpio/gpio-tqmx86.c +@@ -29,15 +29,19 @@ + #define TQMX86_GPIIC 3 /* GPI Interrupt Configuration Register */ + #define TQMX86_GPIIS 4 /* GPI Interrupt Status Register */ + ++#define TQMX86_GPII_NONE 0 + #define TQMX86_GPII_FALLING BIT(0) + #define TQMX86_GPII_RISING BIT(1) + #define TQMX86_GPII_MASK (BIT(0) | BIT(1)) + #define TQMX86_GPII_BITS 2 ++/* Stored in irq_type with GPII bits */ ++#define TQMX86_INT_UNMASKED BIT(2) + + struct tqmx86_gpio_data { + struct gpio_chip chip; + void __iomem *io_base; + int irq; ++ /* Lock must be held for accessing output and irq_type fields */ + raw_spinlock_t spinlock; + DECLARE_BITMAP(output, TQMX86_NGPIO); + u8 irq_type[TQMX86_NGPI]; +@@ -104,21 +108,32 @@ static int tqmx86_gpio_get_direction(struct gpio_chip *chip, + return GPIO_LINE_DIRECTION_OUT; + } + ++static void tqmx86_gpio_irq_config(struct tqmx86_gpio_data *gpio, int offset) ++ __must_hold(&gpio->spinlock) ++{ ++ u8 type = TQMX86_GPII_NONE, gpiic; ++ ++ if (gpio->irq_type[offset] & TQMX86_INT_UNMASKED) ++ type = gpio->irq_type[offset] & TQMX86_GPII_MASK; ++ ++ gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); ++ gpiic &= ~(TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS)); ++ gpiic |= type << (offset * TQMX86_GPII_BITS); ++ tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); ++} ++ + static void tqmx86_gpio_irq_mask(struct irq_data *data) + { + unsigned int offset = (data->hwirq - TQMX86_NGPO); + struct tqmx86_gpio_data *gpio = gpiochip_get_data( + irq_data_get_irq_chip_data(data)); + unsigned long flags; +- u8 gpiic, mask; +- +- mask = TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS); + + raw_spin_lock_irqsave(&gpio->spinlock, flags); +- gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); +- gpiic &= ~mask; +- tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); ++ gpio->irq_type[offset] &= ~TQMX86_INT_UNMASKED; ++ tqmx86_gpio_irq_config(gpio, offset); + raw_spin_unlock_irqrestore(&gpio->spinlock, flags); ++ + gpiochip_disable_irq(&gpio->chip, irqd_to_hwirq(data)); + } + +@@ -128,16 +143,12 @@ static void tqmx86_gpio_irq_unmask(struct irq_data *data) + struct tqmx86_gpio_data *gpio = gpiochip_get_data( + irq_data_get_irq_chip_data(data)); + unsigned long flags; +- u8 gpiic, mask; +- +- mask = TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS); + + gpiochip_enable_irq(&gpio->chip, irqd_to_hwirq(data)); ++ + raw_spin_lock_irqsave(&gpio->spinlock, flags); +- gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); +- gpiic &= ~mask; +- gpiic |= gpio->irq_type[offset] << (offset * TQMX86_GPII_BITS); +- tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); ++ gpio->irq_type[offset] |= TQMX86_INT_UNMASKED; ++ tqmx86_gpio_irq_config(gpio, offset); + raw_spin_unlock_irqrestore(&gpio->spinlock, flags); + } + +@@ -148,7 +159,7 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type) + unsigned int offset = (data->hwirq - TQMX86_NGPO); + unsigned int edge_type = type & IRQF_TRIGGER_MASK; + unsigned long flags; +- u8 new_type, gpiic; ++ u8 new_type; + + switch (edge_type) { + case IRQ_TYPE_EDGE_RISING: +@@ -164,13 +175,10 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type) + return -EINVAL; /* not supported */ + } + +- gpio->irq_type[offset] = new_type; +- + raw_spin_lock_irqsave(&gpio->spinlock, flags); +- gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); +- gpiic &= ~((TQMX86_GPII_MASK) << (offset * TQMX86_GPII_BITS)); +- gpiic |= new_type << (offset * TQMX86_GPII_BITS); +- tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); ++ gpio->irq_type[offset] &= ~TQMX86_GPII_MASK; ++ gpio->irq_type[offset] |= new_type; ++ tqmx86_gpio_irq_config(gpio, offset); + raw_spin_unlock_irqrestore(&gpio->spinlock, flags); + + return 0; +-- +2.43.0 + diff --git a/queue-6.6/gve-ignore-nonrelevant-gso-type-bits-when-processing.patch b/queue-6.6/gve-ignore-nonrelevant-gso-type-bits-when-processing.patch new file mode 100644 index 00000000000..d6828939064 --- /dev/null +++ b/queue-6.6/gve-ignore-nonrelevant-gso-type-bits-when-processing.patch @@ -0,0 +1,86 @@ +From 397d4308fa0aceb2e64a03c13f866eaa9acc1866 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jun 2024 15:57:18 -0700 +Subject: gve: ignore nonrelevant GSO type bits when processing TSO headers + +From: Joshua Washington + +[ Upstream commit 1b9f756344416e02b41439bf2324b26aa25e141c ] + +TSO currently fails when the skb's gso_type field has more than one bit +set. + +TSO packets can be passed from userspace using PF_PACKET, TUNTAP and a +few others, using virtio_net_hdr (e.g., PACKET_VNET_HDR). This includes +virtualization, such as QEMU, a real use-case. + +The gso_type and gso_size fields as passed from userspace in +virtio_net_hdr are not trusted blindly by the kernel. It adds gso_type +|= SKB_GSO_DODGY to force the packet to enter the software GSO stack +for verification. + +This issue might similarly come up when the CWR bit is set in the TCP +header for congestion control, causing the SKB_GSO_TCP_ECN gso_type bit +to be set. + +Fixes: a57e5de476be ("gve: DQO: Add TX path") +Signed-off-by: Joshua Washington +Reviewed-by: Praveen Kaligineedi +Reviewed-by: Harshitha Ramamurthy +Reviewed-by: Willem de Bruijn +Suggested-by: Eric Dumazet +Acked-by: Andrei Vagin + +v2 - Remove unnecessary comments, remove line break between fixes tag +and signoffs. + +v3 - Add back unrelated empty line removal. + +Link: https://lore.kernel.org/r/20240610225729.2985343-1-joshwash@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/google/gve/gve_tx_dqo.c | 20 +++++--------------- + 1 file changed, 5 insertions(+), 15 deletions(-) + +diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c +index 1e19b834a6130..5a44354bbdfdf 100644 +--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c ++++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c +@@ -501,28 +501,18 @@ static int gve_prep_tso(struct sk_buff *skb) + if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) + return -1; + ++ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) ++ return -EINVAL; ++ + /* Needed because we will modify header. */ + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + tcp = tcp_hdr(skb); +- +- /* Remove payload length from checksum. */ + paylen = skb->len - skb_transport_offset(skb); +- +- switch (skb_shinfo(skb)->gso_type) { +- case SKB_GSO_TCPV4: +- case SKB_GSO_TCPV6: +- csum_replace_by_diff(&tcp->check, +- (__force __wsum)htonl(paylen)); +- +- /* Compute length of segmentation header. */ +- header_len = skb_tcp_all_headers(skb); +- break; +- default: +- return -EINVAL; +- } ++ csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); ++ header_len = skb_tcp_all_headers(skb); + + if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) + return -EINVAL; +-- +2.43.0 + diff --git a/queue-6.6/hid-core-remove-unnecessary-warn_on-in-implement.patch b/queue-6.6/hid-core-remove-unnecessary-warn_on-in-implement.patch new file mode 100644 index 00000000000..aed9c11b7f3 --- /dev/null +++ b/queue-6.6/hid-core-remove-unnecessary-warn_on-in-implement.patch @@ -0,0 +1,67 @@ +From 269e1e19bf8c0cba225d987c98041321310907bf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 May 2024 07:19:14 -0700 +Subject: HID: core: remove unnecessary WARN_ON() in implement() + +From: Nikita Zhandarovich + +[ Upstream commit 4aa2dcfbad538adf7becd0034a3754e1bd01b2b5 ] + +Syzkaller hit a warning [1] in a call to implement() when trying +to write a value into a field of smaller size in an output report. + +Since implement() already has a warn message printed out with the +help of hid_warn() and value in question gets trimmed with: + ... + value &= m; + ... +WARN_ON may be considered superfluous. Remove it to suppress future +syzkaller triggers. + +[1] +WARNING: CPU: 0 PID: 5084 at drivers/hid/hid-core.c:1451 implement drivers/hid/hid-core.c:1451 [inline] +WARNING: CPU: 0 PID: 5084 at drivers/hid/hid-core.c:1451 hid_output_report+0x548/0x760 drivers/hid/hid-core.c:1863 +Modules linked in: +CPU: 0 PID: 5084 Comm: syz-executor424 Not tainted 6.9.0-rc7-syzkaller-00183-gcf87f46fd34d #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024 +RIP: 0010:implement drivers/hid/hid-core.c:1451 [inline] +RIP: 0010:hid_output_report+0x548/0x760 drivers/hid/hid-core.c:1863 +... +Call Trace: + + __usbhid_submit_report drivers/hid/usbhid/hid-core.c:591 [inline] + usbhid_submit_report+0x43d/0x9e0 drivers/hid/usbhid/hid-core.c:636 + hiddev_ioctl+0x138b/0x1f00 drivers/hid/usbhid/hiddev.c:726 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:904 [inline] + __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:890 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f +... + +Fixes: 95d1c8951e5b ("HID: simplify implement() a bit") +Reported-by: +Suggested-by: Alan Stern +Signed-off-by: Nikita Zhandarovich +Signed-off-by: Jiri Kosina +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-core.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c +index e0181218ad857..85ddeb13a3fae 100644 +--- a/drivers/hid/hid-core.c ++++ b/drivers/hid/hid-core.c +@@ -1448,7 +1448,6 @@ static void implement(const struct hid_device *hid, u8 *report, + hid_warn(hid, + "%s() called with too large value %d (n: %d)! (%s)\n", + __func__, value, n, current->comm); +- WARN_ON(1); + value &= m; + } + } +-- +2.43.0 + diff --git a/queue-6.6/hid-logitech-dj-fix-memory-leak-in-logi_dj_recv_swit.patch b/queue-6.6/hid-logitech-dj-fix-memory-leak-in-logi_dj_recv_swit.patch new file mode 100644 index 00000000000..684a8e9dbc5 --- /dev/null +++ b/queue-6.6/hid-logitech-dj-fix-memory-leak-in-logi_dj_recv_swit.patch @@ -0,0 +1,41 @@ +From 64a08954c429c6de2804cc43844b956a5f9becc9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 May 2024 15:05:39 +0200 +Subject: HID: logitech-dj: Fix memory leak in logi_dj_recv_switch_to_dj_mode() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: José Expósito + +[ Upstream commit ce3af2ee95170b7d9e15fff6e500d67deab1e7b3 ] + +Fix a memory leak on logi_dj_recv_send_report() error path. + +Fixes: 6f20d3261265 ("HID: logitech-dj: Fix error handling in logi_dj_recv_switch_to_dj_mode()") +Signed-off-by: José Expósito +Signed-off-by: Jiri Kosina +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-logitech-dj.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c +index 3c3c497b6b911..37958edec55f5 100644 +--- a/drivers/hid/hid-logitech-dj.c ++++ b/drivers/hid/hid-logitech-dj.c +@@ -1284,8 +1284,10 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev, + */ + msleep(50); + +- if (retval) ++ if (retval) { ++ kfree(dj_report); + return retval; ++ } + } + + /* +-- +2.43.0 + diff --git a/queue-6.6/hid-nvidia-shield-add-missing-check-for-input_ff_cre.patch b/queue-6.6/hid-nvidia-shield-add-missing-check-for-input_ff_cre.patch new file mode 100644 index 00000000000..13f64d6fb0c --- /dev/null +++ b/queue-6.6/hid-nvidia-shield-add-missing-check-for-input_ff_cre.patch @@ -0,0 +1,39 @@ +From d915eeb5490fe2fa38d08b1cd6d0f44092c1e169 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 May 2024 11:30:51 +0800 +Subject: HID: nvidia-shield: Add missing check for input_ff_create_memless + +From: Chen Ni + +[ Upstream commit 0a3f9f7fc59feb8a91a2793b8b60977895c72365 ] + +Add check for the return value of input_ff_create_memless() and return +the error if it fails in order to catch the error. + +Fixes: 09308562d4af ("HID: nvidia-shield: Initial driver implementation with Thunderstrike support") +Signed-off-by: Chen Ni +Reviewed-by: Rahul Rameshbabu +Signed-off-by: Jiri Kosina +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-nvidia-shield.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c +index edd0b0f1193bd..97dfa3694ff04 100644 +--- a/drivers/hid/hid-nvidia-shield.c ++++ b/drivers/hid/hid-nvidia-shield.c +@@ -283,7 +283,9 @@ static struct input_dev *shield_haptics_create( + return haptics; + + input_set_capability(haptics, EV_FF, FF_RUMBLE); +- input_ff_create_memless(haptics, NULL, play_effect); ++ ret = input_ff_create_memless(haptics, NULL, play_effect); ++ if (ret) ++ goto err; + + ret = input_register_device(haptics); + if (ret) +-- +2.43.0 + diff --git a/queue-6.6/io_uring-io-wq-avoid-garbage-value-of-match-in-io_wq.patch b/queue-6.6/io_uring-io-wq-avoid-garbage-value-of-match-in-io_wq.patch new file mode 100644 index 00000000000..e08d9139b7b --- /dev/null +++ b/queue-6.6/io_uring-io-wq-avoid-garbage-value-of-match-in-io_wq.patch @@ -0,0 +1,58 @@ +From 1b888041099f7e9d35732313db0c293e8dff08d6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Jun 2024 20:12:43 +0800 +Subject: io_uring/io-wq: avoid garbage value of 'match' in io_wq_enqueue() + +From: Su Hui + +[ Upstream commit 91215f70ea8541e9011c0b48f8b59b9e0ce6953b ] + +Clang static checker (scan-build) warning: +o_uring/io-wq.c:line 1051, column 3 +The expression is an uninitialized value. The computed value will +also be garbage. + +'match.nr_pending' is used in io_acct_cancel_pending_work(), but it is +not fully initialized. Change the order of assignment for 'match' to fix +this problem. + +Fixes: 42abc95f05bf ("io-wq: decouple work_list protection from the big wqe->lock") +Signed-off-by: Su Hui +Link: https://lore.kernel.org/r/20240604121242.2661244-1-suhui@nfschina.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io-wq.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c +index 4a07742349048..8a99aabcac2c3 100644 +--- a/io_uring/io-wq.c ++++ b/io_uring/io-wq.c +@@ -929,7 +929,11 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) + { + struct io_wq_acct *acct = io_work_get_acct(wq, work); + unsigned long work_flags = work->flags; +- struct io_cb_cancel_data match; ++ struct io_cb_cancel_data match = { ++ .fn = io_wq_work_match_item, ++ .data = work, ++ .cancel_all = false, ++ }; + bool do_create; + + /* +@@ -967,10 +971,6 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) + raw_spin_unlock(&wq->lock); + + /* fatal condition, failed to create the first worker */ +- match.fn = io_wq_work_match_item, +- match.data = work, +- match.cancel_all = false, +- + io_acct_cancel_pending_work(wq, acct, &match); + } + } +-- +2.43.0 + diff --git a/queue-6.6/io_uring-io-wq-use-set_bit-and-test_bit-at-worker-fl.patch b/queue-6.6/io_uring-io-wq-use-set_bit-and-test_bit-at-worker-fl.patch new file mode 100644 index 00000000000..a20c53981b0 --- /dev/null +++ b/queue-6.6/io_uring-io-wq-use-set_bit-and-test_bit-at-worker-fl.patch @@ -0,0 +1,209 @@ +From 5ae96266cf2a4aaa2222d71134bea60787594826 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 May 2024 10:00:01 -0700 +Subject: io_uring/io-wq: Use set_bit() and test_bit() at worker->flags + +From: Breno Leitao + +[ Upstream commit 8a565304927fbd28c9f028c492b5c1714002cbab ] + +Utilize set_bit() and test_bit() on worker->flags within io_uring/io-wq +to address potential data races. + +The structure io_worker->flags may be accessed through various data +paths, leading to concurrency issues. When KCSAN is enabled, it reveals +data races occurring in io_worker_handle_work and +io_wq_activate_free_worker functions. + + BUG: KCSAN: data-race in io_worker_handle_work / io_wq_activate_free_worker + write to 0xffff8885c4246404 of 4 bytes by task 49071 on cpu 28: + io_worker_handle_work (io_uring/io-wq.c:434 io_uring/io-wq.c:569) + io_wq_worker (io_uring/io-wq.c:?) + + + read to 0xffff8885c4246404 of 4 bytes by task 49024 on cpu 5: + io_wq_activate_free_worker (io_uring/io-wq.c:? io_uring/io-wq.c:285) + io_wq_enqueue (io_uring/io-wq.c:947) + io_queue_iowq (io_uring/io_uring.c:524) + io_req_task_submit (io_uring/io_uring.c:1511) + io_handle_tw_list (io_uring/io_uring.c:1198) + + +Line numbers against commit 18daea77cca6 ("Merge tag 'for-linus' of +git://git.kernel.org/pub/scm/virt/kvm/kvm"). + +These races involve writes and reads to the same memory location by +different tasks running on different CPUs. To mitigate this, refactor +the code to use atomic operations such as set_bit(), test_bit(), and +clear_bit() instead of basic "and" and "or" operations. This ensures +thread-safe manipulation of worker flags. + +Also, move `create_index` to avoid holes in the structure. + +Signed-off-by: Breno Leitao +Link: https://lore.kernel.org/r/20240507170002.2269003-1-leitao@debian.org +Signed-off-by: Jens Axboe +Stable-dep-of: 91215f70ea85 ("io_uring/io-wq: avoid garbage value of 'match' in io_wq_enqueue()") +Signed-off-by: Sasha Levin +--- + io_uring/io-wq.c | 47 ++++++++++++++++++++++++----------------------- + 1 file changed, 24 insertions(+), 23 deletions(-) + +diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c +index 318ed067dbf64..4a07742349048 100644 +--- a/io_uring/io-wq.c ++++ b/io_uring/io-wq.c +@@ -25,10 +25,10 @@ + #define WORKER_IDLE_TIMEOUT (5 * HZ) + + enum { +- IO_WORKER_F_UP = 1, /* up and active */ +- IO_WORKER_F_RUNNING = 2, /* account as running */ +- IO_WORKER_F_FREE = 4, /* worker on free list */ +- IO_WORKER_F_BOUND = 8, /* is doing bounded work */ ++ IO_WORKER_F_UP = 0, /* up and active */ ++ IO_WORKER_F_RUNNING = 1, /* account as running */ ++ IO_WORKER_F_FREE = 2, /* worker on free list */ ++ IO_WORKER_F_BOUND = 3, /* is doing bounded work */ + }; + + enum { +@@ -44,7 +44,8 @@ enum { + */ + struct io_worker { + refcount_t ref; +- unsigned flags; ++ int create_index; ++ unsigned long flags; + struct hlist_nulls_node nulls_node; + struct list_head all_list; + struct task_struct *task; +@@ -58,7 +59,6 @@ struct io_worker { + + unsigned long create_state; + struct callback_head create_work; +- int create_index; + + union { + struct rcu_head rcu; +@@ -165,7 +165,7 @@ static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq, + + static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker) + { +- return io_get_acct(worker->wq, worker->flags & IO_WORKER_F_BOUND); ++ return io_get_acct(worker->wq, test_bit(IO_WORKER_F_BOUND, &worker->flags)); + } + + static void io_worker_ref_put(struct io_wq *wq) +@@ -225,7 +225,7 @@ static void io_worker_exit(struct io_worker *worker) + wait_for_completion(&worker->ref_done); + + raw_spin_lock(&wq->lock); +- if (worker->flags & IO_WORKER_F_FREE) ++ if (test_bit(IO_WORKER_F_FREE, &worker->flags)) + hlist_nulls_del_rcu(&worker->nulls_node); + list_del_rcu(&worker->all_list); + raw_spin_unlock(&wq->lock); +@@ -410,7 +410,7 @@ static void io_wq_dec_running(struct io_worker *worker) + struct io_wq_acct *acct = io_wq_get_acct(worker); + struct io_wq *wq = worker->wq; + +- if (!(worker->flags & IO_WORKER_F_UP)) ++ if (!test_bit(IO_WORKER_F_UP, &worker->flags)) + return; + + if (!atomic_dec_and_test(&acct->nr_running)) +@@ -430,8 +430,8 @@ static void io_wq_dec_running(struct io_worker *worker) + */ + static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker) + { +- if (worker->flags & IO_WORKER_F_FREE) { +- worker->flags &= ~IO_WORKER_F_FREE; ++ if (test_bit(IO_WORKER_F_FREE, &worker->flags)) { ++ clear_bit(IO_WORKER_F_FREE, &worker->flags); + raw_spin_lock(&wq->lock); + hlist_nulls_del_init_rcu(&worker->nulls_node); + raw_spin_unlock(&wq->lock); +@@ -444,8 +444,8 @@ static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker) + static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker) + __must_hold(wq->lock) + { +- if (!(worker->flags & IO_WORKER_F_FREE)) { +- worker->flags |= IO_WORKER_F_FREE; ++ if (!test_bit(IO_WORKER_F_FREE, &worker->flags)) { ++ set_bit(IO_WORKER_F_FREE, &worker->flags); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list); + } + } +@@ -634,7 +634,8 @@ static int io_wq_worker(void *data) + bool exit_mask = false, last_timeout = false; + char buf[TASK_COMM_LEN]; + +- worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); ++ set_mask_bits(&worker->flags, 0, ++ BIT(IO_WORKER_F_UP) | BIT(IO_WORKER_F_RUNNING)); + + snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); + set_task_comm(current, buf); +@@ -698,11 +699,11 @@ void io_wq_worker_running(struct task_struct *tsk) + + if (!worker) + return; +- if (!(worker->flags & IO_WORKER_F_UP)) ++ if (!test_bit(IO_WORKER_F_UP, &worker->flags)) + return; +- if (worker->flags & IO_WORKER_F_RUNNING) ++ if (test_bit(IO_WORKER_F_RUNNING, &worker->flags)) + return; +- worker->flags |= IO_WORKER_F_RUNNING; ++ set_bit(IO_WORKER_F_RUNNING, &worker->flags); + io_wq_inc_running(worker); + } + +@@ -716,12 +717,12 @@ void io_wq_worker_sleeping(struct task_struct *tsk) + + if (!worker) + return; +- if (!(worker->flags & IO_WORKER_F_UP)) ++ if (!test_bit(IO_WORKER_F_UP, &worker->flags)) + return; +- if (!(worker->flags & IO_WORKER_F_RUNNING)) ++ if (!test_bit(IO_WORKER_F_RUNNING, &worker->flags)) + return; + +- worker->flags &= ~IO_WORKER_F_RUNNING; ++ clear_bit(IO_WORKER_F_RUNNING, &worker->flags); + io_wq_dec_running(worker); + } + +@@ -735,7 +736,7 @@ static void io_init_new_worker(struct io_wq *wq, struct io_worker *worker, + raw_spin_lock(&wq->lock); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list); + list_add_tail_rcu(&worker->all_list, &wq->all_list); +- worker->flags |= IO_WORKER_F_FREE; ++ set_bit(IO_WORKER_F_FREE, &worker->flags); + raw_spin_unlock(&wq->lock); + wake_up_new_task(tsk); + } +@@ -841,7 +842,7 @@ static bool create_io_worker(struct io_wq *wq, int index) + init_completion(&worker->ref_done); + + if (index == IO_WQ_ACCT_BOUND) +- worker->flags |= IO_WORKER_F_BOUND; ++ set_bit(IO_WORKER_F_BOUND, &worker->flags); + + tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE); + if (!IS_ERR(tsk)) { +@@ -927,8 +928,8 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data) + void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) + { + struct io_wq_acct *acct = io_work_get_acct(wq, work); ++ unsigned long work_flags = work->flags; + struct io_cb_cancel_data match; +- unsigned work_flags = work->flags; + bool do_create; + + /* +-- +2.43.0 + diff --git a/queue-6.6/iommu-amd-fix-sysfs-leak-in-iommu-init.patch b/queue-6.6/iommu-amd-fix-sysfs-leak-in-iommu-init.patch new file mode 100644 index 00000000000..7f0ca5a90cf --- /dev/null +++ b/queue-6.6/iommu-amd-fix-sysfs-leak-in-iommu-init.patch @@ -0,0 +1,47 @@ +From eca638f0a3735c70cff89e1a0f1b5e6f49011121 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 May 2024 08:42:20 +0800 +Subject: iommu/amd: Fix sysfs leak in iommu init + +From: Kun(llfl) + +[ Upstream commit a295ec52c8624883885396fde7b4df1a179627c3 ] + +During the iommu initialization, iommu_init_pci() adds sysfs nodes. +However, these nodes aren't remove in free_iommu_resources() subsequently. + +Fixes: 39ab9555c241 ("iommu: Add sysfs bindings for struct iommu_device") +Signed-off-by: Kun(llfl) +Reviewed-by: Suravee Suthikulpanit +Link: https://lore.kernel.org/r/c8e0d11c6ab1ee48299c288009cf9c5dae07b42d.1715215003.git.llfl@linux.alibaba.com +Signed-off-by: Joerg Roedel +Signed-off-by: Sasha Levin +--- + drivers/iommu/amd/init.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c +index a2ad2dbd04d92..ef3fae113dd64 100644 +--- a/drivers/iommu/amd/init.c ++++ b/drivers/iommu/amd/init.c +@@ -1692,8 +1692,17 @@ static void __init free_pci_segments(void) + } + } + ++static void __init free_sysfs(struct amd_iommu *iommu) ++{ ++ if (iommu->iommu.dev) { ++ iommu_device_unregister(&iommu->iommu); ++ iommu_device_sysfs_remove(&iommu->iommu); ++ } ++} ++ + static void __init free_iommu_one(struct amd_iommu *iommu) + { ++ free_sysfs(iommu); + free_cwwb_sem(iommu); + free_command_buffer(iommu); + free_event_buffer(iommu); +-- +2.43.0 + diff --git a/queue-6.6/iommu-return-right-value-in-iommu_sva_bind_device.patch b/queue-6.6/iommu-return-right-value-in-iommu_sva_bind_device.patch new file mode 100644 index 00000000000..4512743a159 --- /dev/null +++ b/queue-6.6/iommu-return-right-value-in-iommu_sva_bind_device.patch @@ -0,0 +1,49 @@ +From b5f5cc89156b769e53cd6e76f87836fbf8207d07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 12:25:28 +0800 +Subject: iommu: Return right value in iommu_sva_bind_device() + +From: Lu Baolu + +[ Upstream commit 89e8a2366e3bce584b6c01549d5019c5cda1205e ] + +iommu_sva_bind_device() should return either a sva bond handle or an +ERR_PTR value in error cases. Existing drivers (idxd and uacce) only +check the return value with IS_ERR(). This could potentially lead to +a kernel NULL pointer dereference issue if the function returns NULL +instead of an error pointer. + +In reality, this doesn't cause any problems because iommu_sva_bind_device() +only returns NULL when the kernel is not configured with CONFIG_IOMMU_SVA. +In this case, iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) will +return an error, and the device drivers won't call iommu_sva_bind_device() +at all. + +Fixes: 26b25a2b98e4 ("iommu: Bind process address spaces to devices") +Signed-off-by: Lu Baolu +Reviewed-by: Jean-Philippe Brucker +Reviewed-by: Kevin Tian +Reviewed-by: Vasant Hegde +Link: https://lore.kernel.org/r/20240528042528.71396-1-baolu.lu@linux.intel.com +Signed-off-by: Joerg Roedel +Signed-off-by: Sasha Levin +--- + include/linux/iommu.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/iommu.h b/include/linux/iommu.h +index 0225cf7445de2..b6ef263e85c06 100644 +--- a/include/linux/iommu.h ++++ b/include/linux/iommu.h +@@ -1199,7 +1199,7 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle); + static inline struct iommu_sva * + iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) + { +- return NULL; ++ return ERR_PTR(-ENODEV); + } + + static inline void iommu_sva_unbind_device(struct iommu_sva *handle) +-- +2.43.0 + diff --git a/queue-6.6/ionic-fix-use-after-netif_napi_del.patch b/queue-6.6/ionic-fix-use-after-netif_napi_del.patch new file mode 100644 index 00000000000..b348521d2cf --- /dev/null +++ b/queue-6.6/ionic-fix-use-after-netif_napi_del.patch @@ -0,0 +1,97 @@ +From e4a1499962605a73c30a134999bc1e388086a815 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Jun 2024 06:04:46 +0000 +Subject: ionic: fix use after netif_napi_del() + +From: Taehee Yoo + +[ Upstream commit 79f18a41dd056115d685f3b0a419c7cd40055e13 ] + +When queues are started, netif_napi_add() and napi_enable() are called. +If there are 4 queues and only 3 queues are used for the current +configuration, only 3 queues' napi should be registered and enabled. +The ionic_qcq_enable() checks whether the .poll pointer is not NULL for +enabling only the using queue' napi. Unused queues' napi will not be +registered by netif_napi_add(), so the .poll pointer indicates NULL. +But it couldn't distinguish whether the napi was unregistered or not +because netif_napi_del() doesn't reset the .poll pointer to NULL. +So, ionic_qcq_enable() calls napi_enable() for the queue, which was +unregistered by netif_napi_del(). + +Reproducer: + ethtool -L rx 1 tx 1 combined 0 + ethtool -L rx 0 tx 0 combined 1 + ethtool -L rx 0 tx 0 combined 4 + +Splat looks like: +kernel BUG at net/core/dev.c:6666! +Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI +CPU: 3 PID: 1057 Comm: kworker/3:3 Not tainted 6.10.0-rc2+ #16 +Workqueue: events ionic_lif_deferred_work [ionic] +RIP: 0010:napi_enable+0x3b/0x40 +Code: 48 89 c2 48 83 e2 f6 80 b9 61 09 00 00 00 74 0d 48 83 bf 60 01 00 00 00 74 03 80 ce 01 f0 4f +RSP: 0018:ffffb6ed83227d48 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: ffff97560cda0828 RCX: 0000000000000029 +RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff97560cda0a28 +RBP: ffffb6ed83227d50 R08: 0000000000000400 R09: 0000000000000001 +R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 +R13: ffff97560ce3c1a0 R14: 0000000000000000 R15: ffff975613ba0a20 +FS: 0000000000000000(0000) GS:ffff975d5f780000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f8f734ee200 CR3: 0000000103e50000 CR4: 00000000007506f0 +PKRU: 55555554 +Call Trace: + + ? die+0x33/0x90 + ? do_trap+0xd9/0x100 + ? napi_enable+0x3b/0x40 + ? do_error_trap+0x83/0xb0 + ? napi_enable+0x3b/0x40 + ? napi_enable+0x3b/0x40 + ? exc_invalid_op+0x4e/0x70 + ? napi_enable+0x3b/0x40 + ? asm_exc_invalid_op+0x16/0x20 + ? napi_enable+0x3b/0x40 + ionic_qcq_enable+0xb7/0x180 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] + ionic_start_queues+0xc4/0x290 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] + ionic_link_status_check+0x11c/0x170 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] + ionic_lif_deferred_work+0x129/0x280 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] + process_one_work+0x145/0x360 + worker_thread+0x2bb/0x3d0 + ? __pfx_worker_thread+0x10/0x10 + kthread+0xcc/0x100 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x2d/0x50 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1a/0x30 + +Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") +Signed-off-by: Taehee Yoo +Reviewed-by: Brett Creeley +Reviewed-by: Shannon Nelson +Link: https://lore.kernel.org/r/20240612060446.1754392-1-ap420073@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +index 4f05cddc65cb4..7e6e1bed525af 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +@@ -296,10 +296,8 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) + if (ret) + return ret; + +- if (qcq->napi.poll) +- napi_enable(&qcq->napi); +- + if (qcq->flags & IONIC_QCQ_F_INTR) { ++ napi_enable(&qcq->napi); + irq_set_affinity_hint(qcq->intr.vector, + &qcq->intr.affinity_mask); + ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, +-- +2.43.0 + diff --git a/queue-6.6/liquidio-adjust-a-null-pointer-handling-path-in-lio_.patch b/queue-6.6/liquidio-adjust-a-null-pointer-handling-path-in-lio_.patch new file mode 100644 index 00000000000..29b5032d120 --- /dev/null +++ b/queue-6.6/liquidio-adjust-a-null-pointer-handling-path-in-lio_.patch @@ -0,0 +1,69 @@ +From fdf1f66ef8c2f91bc23edad2a841779f24c8ce03 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jun 2024 13:11:35 +0300 +Subject: liquidio: Adjust a NULL pointer handling path in + lio_vf_rep_copy_packet + +From: Aleksandr Mishin + +[ Upstream commit c44711b78608c98a3e6b49ce91678cd0917d5349 ] + +In lio_vf_rep_copy_packet() pg_info->page is compared to a NULL value, +but then it is unconditionally passed to skb_add_rx_frag() which looks +strange and could lead to null pointer dereference. + +lio_vf_rep_copy_packet() call trace looks like: + octeon_droq_process_packets + octeon_droq_fast_process_packets + octeon_droq_dispatch_pkt + octeon_create_recv_info + ...search in the dispatch_list... + ->disp_fn(rdisp->rinfo, ...) + lio_vf_rep_pkt_recv(struct octeon_recv_info *recv_info, ...) +In this path there is no code which sets pg_info->page to NULL. +So this check looks unneeded and doesn't solve potential problem. +But I guess the author had reason to add a check and I have no such card +and can't do real test. +In addition, the code in the function liquidio_push_packet() in +liquidio/lio_core.c does exactly the same. + +Based on this, I consider the most acceptable compromise solution to +adjust this issue by moving skb_add_rx_frag() into conditional scope. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: 1f233f327913 ("liquidio: switchdev support for LiquidIO NIC") +Signed-off-by: Aleksandr Mishin +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +index 600de587d7a98..e70b9ccca380e 100644 +--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c ++++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +@@ -272,13 +272,12 @@ lio_vf_rep_copy_packet(struct octeon_device *oct, + pg_info->page_offset; + memcpy(skb->data, va, MIN_SKB_SIZE); + skb_put(skb, MIN_SKB_SIZE); ++ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, ++ pg_info->page, ++ pg_info->page_offset + MIN_SKB_SIZE, ++ len - MIN_SKB_SIZE, ++ LIO_RXBUFFER_SZ); + } +- +- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, +- pg_info->page, +- pg_info->page_offset + MIN_SKB_SIZE, +- len - MIN_SKB_SIZE, +- LIO_RXBUFFER_SZ); + } else { + struct octeon_skb_page_info *pg_info = + ((struct octeon_skb_page_info *)(skb->cb)); +-- +2.43.0 + diff --git a/queue-6.6/modpost-do-not-warn-about-missing-module_description.patch b/queue-6.6/modpost-do-not-warn-about-missing-module_description.patch new file mode 100644 index 00000000000..a71290099dc --- /dev/null +++ b/queue-6.6/modpost-do-not-warn-about-missing-module_description.patch @@ -0,0 +1,44 @@ +From a918c813fef3d9d046174a64508226334be7b3c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jun 2024 03:36:12 +0900 +Subject: modpost: do not warn about missing MODULE_DESCRIPTION() for vmlinux.o + +From: Masahiro Yamada + +[ Upstream commit 9185afeac2a3dcce8300a5684291a43c2838cfd6 ] + +Building with W=1 incorrectly emits the following warning: + + WARNING: modpost: missing MODULE_DESCRIPTION() in vmlinux.o + +This check should apply only to modules. + +Fixes: 1fffe7a34c89 ("script: modpost: emit a warning when the description is missing") +Signed-off-by: Masahiro Yamada +Reviewed-by: Vincenzo Palazzo +Signed-off-by: Sasha Levin +--- + scripts/mod/modpost.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 269bd79bcd9ad..828d5cc367169 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -1684,10 +1684,11 @@ static void read_symbols(const char *modname) + namespace = get_next_modinfo(&info, "import_ns", + namespace); + } ++ ++ if (extra_warn && !get_modinfo(&info, "description")) ++ warn("missing MODULE_DESCRIPTION() in %s\n", modname); + } + +- if (extra_warn && !get_modinfo(&info, "description")) +- warn("missing MODULE_DESCRIPTION() in %s\n", modname); + for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { + symname = remove_dot(info.strtab + sym->st_name); + +-- +2.43.0 + diff --git a/queue-6.6/net-bridge-mst-fix-suspicious-rcu-usage-in-br_mst_se.patch b/queue-6.6/net-bridge-mst-fix-suspicious-rcu-usage-in-br_mst_se.patch new file mode 100644 index 00000000000..eac6cfe4327 --- /dev/null +++ b/queue-6.6/net-bridge-mst-fix-suspicious-rcu-usage-in-br_mst_se.patch @@ -0,0 +1,40 @@ +From 1555da7e0187c0a80238768820bb5032d1ae9c73 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Jun 2024 13:36:54 +0300 +Subject: net: bridge: mst: fix suspicious rcu usage in br_mst_set_state + +From: Nikolay Aleksandrov + +[ Upstream commit 546ceb1dfdac866648ec959cbc71d9525bd73462 ] + +I converted br_mst_set_state to RCU to avoid a vlan use-after-free +but forgot to change the vlan group dereference helper. Switch to vlan +group RCU deref helper to fix the suspicious rcu usage warning. + +Fixes: 3a7c1661ae13 ("net: bridge: mst: fix vlan use-after-free") +Reported-by: syzbot+9bbe2de1bc9d470eb5fe@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=9bbe2de1bc9d470eb5fe +Signed-off-by: Nikolay Aleksandrov +Link: https://lore.kernel.org/r/20240609103654.914987-3-razor@blackwall.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/bridge/br_mst.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c +index 1de72816b0fb2..1820f09ff59ce 100644 +--- a/net/bridge/br_mst.c ++++ b/net/bridge/br_mst.c +@@ -102,7 +102,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, + int err = 0; + + rcu_read_lock(); +- vg = nbp_vlan_group(p); ++ vg = nbp_vlan_group_rcu(p); + if (!vg) + goto out; + +-- +2.43.0 + diff --git a/queue-6.6/net-bridge-mst-pass-vlan-group-directly-to-br_mst_vl.patch b/queue-6.6/net-bridge-mst-pass-vlan-group-directly-to-br_mst_vl.patch new file mode 100644 index 00000000000..98235143ab1 --- /dev/null +++ b/queue-6.6/net-bridge-mst-pass-vlan-group-directly-to-br_mst_vl.patch @@ -0,0 +1,72 @@ +From 6a8759389672ae450cb46a53fc30488e42b5c582 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Jun 2024 13:36:53 +0300 +Subject: net: bridge: mst: pass vlan group directly to br_mst_vlan_set_state + +From: Nikolay Aleksandrov + +[ Upstream commit 36c92936e868601fa1f43da6758cf55805043509 ] + +Pass the already obtained vlan group pointer to br_mst_vlan_set_state() +instead of dereferencing it again. Each caller has already correctly +dereferenced it for their context. This change is required for the +following suspicious RCU dereference fix. No functional changes +intended. + +Fixes: 3a7c1661ae13 ("net: bridge: mst: fix vlan use-after-free") +Reported-by: syzbot+9bbe2de1bc9d470eb5fe@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=9bbe2de1bc9d470eb5fe +Signed-off-by: Nikolay Aleksandrov +Link: https://lore.kernel.org/r/20240609103654.914987-2-razor@blackwall.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/bridge/br_mst.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c +index 3c66141d34d62..1de72816b0fb2 100644 +--- a/net/bridge/br_mst.c ++++ b/net/bridge/br_mst.c +@@ -73,11 +73,10 @@ int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state) + } + EXPORT_SYMBOL_GPL(br_mst_get_state); + +-static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v, ++static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg, ++ struct net_bridge_vlan *v, + u8 state) + { +- struct net_bridge_vlan_group *vg = nbp_vlan_group(p); +- + if (br_vlan_get_state(v) == state) + return; + +@@ -121,7 +120,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, + if (v->brvlan->msti != msti) + continue; + +- br_mst_vlan_set_state(p, v, state); ++ br_mst_vlan_set_state(vg, v, state); + } + + out: +@@ -140,13 +139,13 @@ static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti) + * it. + */ + if (v != pv && v->brvlan->msti == msti) { +- br_mst_vlan_set_state(pv->port, pv, v->state); ++ br_mst_vlan_set_state(vg, pv, v->state); + return; + } + } + + /* Otherwise, start out in a new MSTI with all ports disabled. */ +- return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED); ++ return br_mst_vlan_set_state(vg, pv, BR_STATE_DISABLED); + } + + int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti) +-- +2.43.0 + diff --git a/queue-6.6/net-change-proto-and-proto_ops-accept-type.patch b/queue-6.6/net-change-proto-and-proto_ops-accept-type.patch new file mode 100644 index 00000000000..fb9245f0e17 --- /dev/null +++ b/queue-6.6/net-change-proto-and-proto_ops-accept-type.patch @@ -0,0 +1,1026 @@ +From 5548e1d39c40fb6c0c0cb4428fa05c9cdff467cf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 May 2024 09:20:08 -0600 +Subject: net: change proto and proto_ops accept type + +From: Jens Axboe + +[ Upstream commit 92ef0fd55ac80dfc2e4654edfe5d1ddfa6e070fe ] + +Rather than pass in flags, error pointer, and whether this is a kernel +invocation or not, add a struct proto_accept_arg struct as the argument. +This then holds all of these arguments, and prepares accept for being +able to pass back more information. + +No functional changes in this patch. + +Acked-by: Jakub Kicinski +Signed-off-by: Jens Axboe +Stable-dep-of: 1b536948e805 ("af_unix: Annotate data-race of sk->sk_state in unix_accept().") +Signed-off-by: Sasha Levin +--- + crypto/af_alg.c | 11 ++++++----- + crypto/algif_hash.c | 10 +++++----- + drivers/xen/pvcalls-back.c | 6 +++++- + fs/ocfs2/cluster/tcp.c | 5 ++++- + include/crypto/if_alg.h | 3 ++- + include/linux/net.h | 4 +++- + include/net/inet_common.h | 4 ++-- + include/net/inet_connection_sock.h | 2 +- + include/net/sock.h | 12 +++++++++--- + net/atm/svc.c | 8 ++++---- + net/ax25/af_ax25.c | 6 +++--- + net/bluetooth/iso.c | 4 ++-- + net/bluetooth/l2cap_sock.c | 4 ++-- + net/bluetooth/rfcomm/sock.c | 6 +++--- + net/bluetooth/sco.c | 4 ++-- + net/core/sock.c | 4 ++-- + net/ipv4/af_inet.c | 10 +++++----- + net/ipv4/inet_connection_sock.c | 6 +++--- + net/iucv/af_iucv.c | 4 ++-- + net/llc/af_llc.c | 7 +++---- + net/mptcp/protocol.c | 11 +++++------ + net/netrom/af_netrom.c | 6 +++--- + net/nfc/llcp_sock.c | 4 ++-- + net/phonet/pep.c | 12 ++++++------ + net/phonet/socket.c | 7 +++---- + net/rds/tcp_listen.c | 6 +++++- + net/rose/af_rose.c | 6 +++--- + net/sctp/socket.c | 8 ++++---- + net/smc/af_smc.c | 6 +++--- + net/socket.c | 13 ++++++++++--- + net/tipc/socket.c | 13 +++++-------- + net/unix/af_unix.c | 21 ++++++++++----------- + net/vmw_vsock/af_vsock.c | 6 +++--- + net/x25/af_x25.c | 4 ++-- + 34 files changed, 132 insertions(+), 111 deletions(-) + +diff --git a/crypto/af_alg.c b/crypto/af_alg.c +index 68cc9290cabe9..598bf46691706 100644 +--- a/crypto/af_alg.c ++++ b/crypto/af_alg.c +@@ -407,7 +407,8 @@ static int alg_setsockopt(struct socket *sock, int level, int optname, + return err; + } + +-int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern) ++int af_alg_accept(struct sock *sk, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct alg_sock *ask = alg_sk(sk); + const struct af_alg_type *type; +@@ -422,7 +423,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern) + if (!type) + goto unlock; + +- sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern); ++ sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, arg->kern); + err = -ENOMEM; + if (!sk2) + goto unlock; +@@ -468,10 +469,10 @@ int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern) + } + EXPORT_SYMBOL_GPL(af_alg_accept); + +-static int alg_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int alg_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { +- return af_alg_accept(sock->sk, newsock, kern); ++ return af_alg_accept(sock->sk, newsock, arg); + } + + static const struct proto_ops alg_proto_ops = { +diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c +index e24c829d7a015..7c7394d46a235 100644 +--- a/crypto/algif_hash.c ++++ b/crypto/algif_hash.c +@@ -223,8 +223,8 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + return err ?: len; + } + +-static int hash_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int hash_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); +@@ -252,7 +252,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags, + if (err) + goto out_free_state; + +- err = af_alg_accept(ask->parent, newsock, kern); ++ err = af_alg_accept(ask->parent, newsock, arg); + if (err) + goto out_free_state; + +@@ -355,7 +355,7 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg, + } + + static int hash_accept_nokey(struct socket *sock, struct socket *newsock, +- int flags, bool kern) ++ struct proto_accept_arg *arg) + { + int err; + +@@ -363,7 +363,7 @@ static int hash_accept_nokey(struct socket *sock, struct socket *newsock, + if (err) + return err; + +- return hash_accept(sock, newsock, flags, kern); ++ return hash_accept(sock, newsock, arg); + } + + static struct proto_ops algif_hash_ops_nokey = { +diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c +index d52593466a792..fd7ed65e0197d 100644 +--- a/drivers/xen/pvcalls-back.c ++++ b/drivers/xen/pvcalls-back.c +@@ -517,6 +517,10 @@ static void __pvcalls_back_accept(struct work_struct *work) + { + struct sockpass_mapping *mappass = container_of( + work, struct sockpass_mapping, register_work); ++ struct proto_accept_arg arg = { ++ .flags = O_NONBLOCK, ++ .kern = true, ++ }; + struct sock_mapping *map; + struct pvcalls_ioworker *iow; + struct pvcalls_fedata *fedata; +@@ -548,7 +552,7 @@ static void __pvcalls_back_accept(struct work_struct *work) + sock->type = mappass->sock->type; + sock->ops = mappass->sock->ops; + +- ret = inet_accept(mappass->sock, sock, O_NONBLOCK, true); ++ ret = inet_accept(mappass->sock, sock, &arg); + if (ret == -EAGAIN) { + sock_release(sock); + return; +diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c +index 960080753d3bd..2b8fa3e782fb6 100644 +--- a/fs/ocfs2/cluster/tcp.c ++++ b/fs/ocfs2/cluster/tcp.c +@@ -1784,6 +1784,9 @@ static int o2net_accept_one(struct socket *sock, int *more) + struct o2nm_node *node = NULL; + struct o2nm_node *local_node = NULL; + struct o2net_sock_container *sc = NULL; ++ struct proto_accept_arg arg = { ++ .flags = O_NONBLOCK, ++ }; + struct o2net_node *nn; + unsigned int nofs_flag; + +@@ -1802,7 +1805,7 @@ static int o2net_accept_one(struct socket *sock, int *more) + + new_sock->type = sock->type; + new_sock->ops = sock->ops; +- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); ++ ret = sock->ops->accept(sock, new_sock, &arg); + if (ret < 0) + goto out; + +diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h +index 08b803a4fcde4..93f868afe6935 100644 +--- a/include/crypto/if_alg.h ++++ b/include/crypto/if_alg.h +@@ -164,7 +164,8 @@ int af_alg_unregister_type(const struct af_alg_type *type); + + int af_alg_release(struct socket *sock); + void af_alg_release_parent(struct sock *sk); +-int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern); ++int af_alg_accept(struct sock *sk, struct socket *newsock, ++ struct proto_accept_arg *arg); + + void af_alg_free_sg(struct af_alg_sgl *sgl); + +diff --git a/include/linux/net.h b/include/linux/net.h +index c9b4a63791a45..a764a0f6cc39a 100644 +--- a/include/linux/net.h ++++ b/include/linux/net.h +@@ -153,6 +153,7 @@ struct sockaddr; + struct msghdr; + struct module; + struct sk_buff; ++struct proto_accept_arg; + typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, + unsigned int, size_t); + typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *); +@@ -171,7 +172,8 @@ struct proto_ops { + int (*socketpair)(struct socket *sock1, + struct socket *sock2); + int (*accept) (struct socket *sock, +- struct socket *newsock, int flags, bool kern); ++ struct socket *newsock, ++ struct proto_accept_arg *arg); + int (*getname) (struct socket *sock, + struct sockaddr *addr, + int peer); +diff --git a/include/net/inet_common.h b/include/net/inet_common.h +index f50a644d87a98..c17a6585d0b0b 100644 +--- a/include/net/inet_common.h ++++ b/include/net/inet_common.h +@@ -29,8 +29,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags, int is_sendmsg); + int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); +-int inet_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern); ++int inet_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg); + void __inet_accept(struct socket *sock, struct socket *newsock, + struct sock *newsk); + int inet_send_prepare(struct sock *sk); +diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h +index 6ecac01115d9c..2d9ecea74f5c1 100644 +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -251,7 +251,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, + return (unsigned long)min_t(u64, when, max_when); + } + +-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern); ++struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg); + + int inet_csk_get_port(struct sock *sk, unsigned short snum); + +diff --git a/include/net/sock.h b/include/net/sock.h +index 5942b5ff4c786..88f69d39a63f9 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1244,6 +1244,12 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size) + size - offsetof(struct sock, sk_node.pprev)); + } + ++struct proto_accept_arg { ++ int flags; ++ int err; ++ bool kern; ++}; ++ + /* Networking protocol blocks we attach to sockets. + * socket layer -> transport layer interface + */ +@@ -1258,8 +1264,8 @@ struct proto { + int addr_len); + int (*disconnect)(struct sock *sk, int flags); + +- struct sock * (*accept)(struct sock *sk, int flags, int *err, +- bool kern); ++ struct sock * (*accept)(struct sock *sk, ++ struct proto_accept_arg *arg); + + int (*ioctl)(struct sock *sk, int cmd, + int *karg); +@@ -1931,7 +1937,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, + int sock_no_bind(struct socket *, struct sockaddr *, int); + int sock_no_connect(struct socket *, struct sockaddr *, int, int); + int sock_no_socketpair(struct socket *, struct socket *); +-int sock_no_accept(struct socket *, struct socket *, int, bool); ++int sock_no_accept(struct socket *, struct socket *, struct proto_accept_arg *); + int sock_no_getname(struct socket *, struct sockaddr *, int); + int sock_no_ioctl(struct socket *, unsigned int, unsigned long); + int sock_no_listen(struct socket *, int); +diff --git a/net/atm/svc.c b/net/atm/svc.c +index 36a814f1fbd16..f8137ae693b08 100644 +--- a/net/atm/svc.c ++++ b/net/atm/svc.c +@@ -324,8 +324,8 @@ static int svc_listen(struct socket *sock, int backlog) + return error; + } + +-static int svc_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int svc_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sock *sk = sock->sk; + struct sk_buff *skb; +@@ -336,7 +336,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags, + + lock_sock(sk); + +- error = svc_create(sock_net(sk), newsock, 0, kern); ++ error = svc_create(sock_net(sk), newsock, 0, arg->kern); + if (error) + goto out; + +@@ -355,7 +355,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags, + error = -sk->sk_err; + break; + } +- if (flags & O_NONBLOCK) { ++ if (arg->flags & O_NONBLOCK) { + error = -EAGAIN; + break; + } +diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c +index 26a3095bec462..97973c65bd633 100644 +--- a/net/ax25/af_ax25.c ++++ b/net/ax25/af_ax25.c +@@ -1373,8 +1373,8 @@ static int __must_check ax25_connect(struct socket *sock, + return err; + } + +-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int ax25_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sk_buff *skb; + struct sock *newsk; +@@ -1411,7 +1411,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, + if (skb) + break; + +- if (flags & O_NONBLOCK) { ++ if (arg->flags & O_NONBLOCK) { + err = -EWOULDBLOCK; + break; + } +diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c +index 05b9edb480f09..41e4ee15e7438 100644 +--- a/net/bluetooth/iso.c ++++ b/net/bluetooth/iso.c +@@ -1036,7 +1036,7 @@ static int iso_sock_listen(struct socket *sock, int backlog) + } + + static int iso_sock_accept(struct socket *sock, struct socket *newsock, +- int flags, bool kern) ++ struct proto_accept_arg *arg) + { + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct sock *sk = sock->sk, *ch; +@@ -1045,7 +1045,7 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, + + lock_sock(sk); + +- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + + BT_DBG("sk %p timeo %ld", sk, timeo); + +diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c +index 97d0a0f5829a6..0f1c1d7efbc14 100644 +--- a/net/bluetooth/l2cap_sock.c ++++ b/net/bluetooth/l2cap_sock.c +@@ -326,7 +326,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) + } + + static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, +- int flags, bool kern) ++ struct proto_accept_arg *arg) + { + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct sock *sk = sock->sk, *nsk; +@@ -335,7 +335,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, + + lock_sock_nested(sk, L2CAP_NESTING_PARENT); + +- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + + BT_DBG("sk %p timeo %ld", sk, timeo); + +diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c +index b54e8a530f55a..515b33796db18 100644 +--- a/net/bluetooth/rfcomm/sock.c ++++ b/net/bluetooth/rfcomm/sock.c +@@ -468,8 +468,8 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog) + return err; + } + +-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct sock *sk = sock->sk, *nsk; +@@ -483,7 +483,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f + goto done; + } + +- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + + BT_DBG("sk %p timeo %ld", sk, timeo); + +diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c +index 3c3650902c839..32f391b6e3a24 100644 +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -646,7 +646,7 @@ static int sco_sock_listen(struct socket *sock, int backlog) + } + + static int sco_sock_accept(struct socket *sock, struct socket *newsock, +- int flags, bool kern) ++ struct proto_accept_arg *arg) + { + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct sock *sk = sock->sk, *ch; +@@ -655,7 +655,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, + + lock_sock(sk); + +- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + + BT_DBG("sk %p timeo %ld", sk, timeo); + +diff --git a/net/core/sock.c b/net/core/sock.c +index 7f64a7b95cfb2..b69b2cbceb177 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -3222,8 +3222,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2) + } + EXPORT_SYMBOL(sock_no_socketpair); + +-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++int sock_no_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + return -EOPNOTSUPP; + } +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 3feff7f738a48..98c40996096c1 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -772,16 +772,16 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new + * Accept a pending connection. The TCP layer now gives BSD semantics. + */ + +-int inet_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++int inet_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sock *sk1 = sock->sk, *sk2; +- int err = -EINVAL; + + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ +- sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); ++ arg->err = -EINVAL; ++ sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, arg); + if (!sk2) +- return err; ++ return arg->err; + + lock_sock(sk2); + __inet_accept(sock, newsock, sk2); +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index a018981b45142..974996a35d01b 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -657,7 +657,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) + /* + * This will accept the next outstanding connection. + */ +-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) ++struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg) + { + struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock_queue *queue = &icsk->icsk_accept_queue; +@@ -676,7 +676,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) + + /* Find already established connection */ + if (reqsk_queue_empty(queue)) { +- long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ long timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + + /* If this is a non blocking socket don't sleep */ + error = -EAGAIN; +@@ -741,7 +741,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) + out_err: + newsk = NULL; + req = NULL; +- *err = error; ++ arg->err = error; + goto out; + } + EXPORT_SYMBOL(inet_csk_accept); +diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c +index 498a0c35b7bb2..f6d7659da156a 100644 +--- a/net/iucv/af_iucv.c ++++ b/net/iucv/af_iucv.c +@@ -795,7 +795,7 @@ static int iucv_sock_listen(struct socket *sock, int backlog) + + /* Accept a pending connection */ + static int iucv_sock_accept(struct socket *sock, struct socket *newsock, +- int flags, bool kern) ++ struct proto_accept_arg *arg) + { + DECLARE_WAITQUEUE(wait, current); + struct sock *sk = sock->sk, *nsk; +@@ -809,7 +809,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, + goto done; + } + +- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + + /* Wait for an incoming connection */ + add_wait_queue_exclusive(sk_sleep(sk), &wait); +diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c +index fde1140d899ef..4eb52add7103b 100644 +--- a/net/llc/af_llc.c ++++ b/net/llc/af_llc.c +@@ -688,14 +688,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) + * llc_ui_accept - accept a new incoming connection. + * @sock: Socket which connections arrive on. + * @newsock: Socket to move incoming connection to. +- * @flags: User specified operational flags. +- * @kern: If the socket is kernel internal ++ * @arg: User specified arguments + * + * Accept a new incoming connection. + * Returns 0 upon success, negative otherwise. + */ +-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int llc_ui_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sock *sk = sock->sk, *newsk; + struct llc_sock *llc, *newllc; +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 4ace52e4211ad..1f0c8cdc8dd55 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -3852,11 +3852,10 @@ static int mptcp_listen(struct socket *sock, int backlog) + } + + static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, +- int flags, bool kern) ++ struct proto_accept_arg *arg) + { + struct mptcp_sock *msk = mptcp_sk(sock->sk); + struct sock *ssk, *newsk; +- int err; + + pr_debug("msk=%p", msk); + +@@ -3868,9 +3867,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, + return -EINVAL; + + pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); +- newsk = inet_csk_accept(ssk, flags, &err, kern); ++ newsk = inet_csk_accept(ssk, arg); + if (!newsk) +- return err; ++ return arg->err; + + pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); + if (sk_is_mptcp(newsk)) { +@@ -3891,7 +3890,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, + newsk = new_mptcp_sock; + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); + +- newsk->sk_kern_sock = kern; ++ newsk->sk_kern_sock = arg->kern; + lock_sock(newsk); + __inet_accept(sock, newsock, newsk); + +@@ -3920,7 +3919,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, + } + } else { + tcpfallback: +- newsk->sk_kern_sock = kern; ++ newsk->sk_kern_sock = arg->kern; + lock_sock(newsk); + __inet_accept(sock, newsock, newsk); + /* we are being invoked after accepting a non-mp-capable +diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c +index f26dee48e03af..1a1713f3aaf82 100644 +--- a/net/netrom/af_netrom.c ++++ b/net/netrom/af_netrom.c +@@ -772,8 +772,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, + return err; + } + +-static int nr_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int nr_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sk_buff *skb; + struct sock *newsk; +@@ -805,7 +805,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags, + if (skb) + break; + +- if (flags & O_NONBLOCK) { ++ if (arg->flags & O_NONBLOCK) { + err = -EWOULDBLOCK; + break; + } +diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c +index 819157bbb5a2c..ff1768e4b7bb3 100644 +--- a/net/nfc/llcp_sock.c ++++ b/net/nfc/llcp_sock.c +@@ -447,7 +447,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, + } + + static int llcp_sock_accept(struct socket *sock, struct socket *newsock, +- int flags, bool kern) ++ struct proto_accept_arg *arg) + { + DECLARE_WAITQUEUE(wait, current); + struct sock *sk = sock->sk, *new_sk; +@@ -463,7 +463,7 @@ static int llcp_sock_accept(struct socket *sock, struct socket *newsock, + goto error; + } + +- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + + /* Wait for an incoming connection. */ + add_wait_queue_exclusive(sk_sleep(sk), &wait); +diff --git a/net/phonet/pep.c b/net/phonet/pep.c +index 3dd5f52bc1b58..53a858478e22f 100644 +--- a/net/phonet/pep.c ++++ b/net/phonet/pep.c +@@ -759,8 +759,8 @@ static void pep_sock_close(struct sock *sk, long timeout) + sock_put(sk); + } + +-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, +- bool kern) ++static struct sock *pep_sock_accept(struct sock *sk, ++ struct proto_accept_arg *arg) + { + struct pep_sock *pn = pep_sk(sk), *newpn; + struct sock *newsk = NULL; +@@ -772,8 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, + u8 pipe_handle, enabled, n_sb; + u8 aligned = 0; + +- skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, +- errp); ++ skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, ++ &arg->err); + if (!skb) + return NULL; + +@@ -836,7 +836,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, + + /* Create a new to-be-accepted sock */ + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, +- kern); ++ arg->kern); + if (!newsk) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); + err = -ENOBUFS; +@@ -878,7 +878,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, + drop: + release_sock(sk); + kfree_skb(skb); +- *errp = err; ++ arg->err = err; + return newsk; + } + +diff --git a/net/phonet/socket.c b/net/phonet/socket.c +index 1018340d89a7d..5ce0b3ee5def8 100644 +--- a/net/phonet/socket.c ++++ b/net/phonet/socket.c +@@ -292,18 +292,17 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, + } + + static int pn_socket_accept(struct socket *sock, struct socket *newsock, +- int flags, bool kern) ++ struct proto_accept_arg *arg) + { + struct sock *sk = sock->sk; + struct sock *newsk; +- int err; + + if (unlikely(sk->sk_state != TCP_LISTEN)) + return -EINVAL; + +- newsk = sk->sk_prot->accept(sk, flags, &err, kern); ++ newsk = sk->sk_prot->accept(sk, arg); + if (!newsk) +- return err; ++ return arg->err; + + lock_sock(newsk); + sock_graft(newsk, newsock); +diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c +index 53b3535a1e4a8..eb0b5373d7dab 100644 +--- a/net/rds/tcp_listen.c ++++ b/net/rds/tcp_listen.c +@@ -105,6 +105,10 @@ int rds_tcp_accept_one(struct socket *sock) + int conn_state; + struct rds_conn_path *cp; + struct in6_addr *my_addr, *peer_addr; ++ struct proto_accept_arg arg = { ++ .flags = O_NONBLOCK, ++ .kern = true, ++ }; + #if !IS_ENABLED(CONFIG_IPV6) + struct in6_addr saddr, daddr; + #endif +@@ -119,7 +123,7 @@ int rds_tcp_accept_one(struct socket *sock) + if (ret) + goto out; + +- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true); ++ ret = sock->ops->accept(sock, new_sock, &arg); + if (ret < 0) + goto out; + +diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c +index 42e8b9e37516b..cc7afa4198f03 100644 +--- a/net/rose/af_rose.c ++++ b/net/rose/af_rose.c +@@ -919,8 +919,8 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le + return err; + } + +-static int rose_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int rose_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sk_buff *skb; + struct sock *newsk; +@@ -953,7 +953,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags, + if (skb) + break; + +- if (flags & O_NONBLOCK) { ++ if (arg->flags & O_NONBLOCK) { + err = -EWOULDBLOCK; + break; + } +diff --git a/net/sctp/socket.c b/net/sctp/socket.c +index 6b9fcdb0952a0..c6963210c7ee1 100644 +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -4846,7 +4846,7 @@ static int sctp_disconnect(struct sock *sk, int flags) + * descriptor will be returned from accept() to represent the newly + * formed association. + */ +-static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) ++static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg) + { + struct sctp_sock *sp; + struct sctp_endpoint *ep; +@@ -4870,7 +4870,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) + goto out; + } + +- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + + error = sctp_wait_for_accept(sk, timeo); + if (error) +@@ -4881,7 +4881,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) + */ + asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); + +- newsk = sp->pf->create_accept_sk(sk, asoc, kern); ++ newsk = sp->pf->create_accept_sk(sk, asoc, arg->kern); + if (!newsk) { + error = -ENOMEM; + goto out; +@@ -4898,7 +4898,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) + + out: + release_sock(sk); +- *err = error; ++ arg->err = error; + return newsk; + } + +diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c +index 3158b94fd347a..b9bd340814e97 100644 +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -2633,7 +2633,7 @@ static int smc_listen(struct socket *sock, int backlog) + } + + static int smc_accept(struct socket *sock, struct socket *new_sock, +- int flags, bool kern) ++ struct proto_accept_arg *arg) + { + struct sock *sk = sock->sk, *nsk; + DECLARE_WAITQUEUE(wait, current); +@@ -2652,7 +2652,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, + } + + /* Wait for an incoming connection */ +- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + add_wait_queue_exclusive(sk_sleep(sk), &wait); + while (!(nsk = smc_accept_dequeue(sk, new_sock))) { + set_current_state(TASK_INTERRUPTIBLE); +@@ -2679,7 +2679,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, + if (rc) + goto out; + +- if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { ++ if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) { + /* wait till data arrives on the socket */ + timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * + MSEC_PER_SEC); +diff --git a/net/socket.c b/net/socket.c +index 8d83c4bb163b4..5ff2107186001 100644 +--- a/net/socket.c ++++ b/net/socket.c +@@ -1902,6 +1902,9 @@ struct file *do_accept(struct file *file, unsigned file_flags, + struct file *newfile; + int err, len; + struct sockaddr_storage address; ++ struct proto_accept_arg arg = { ++ .flags = file_flags, ++ }; + const struct proto_ops *ops; + + sock = sock_from_file(file); +@@ -1930,8 +1933,8 @@ struct file *do_accept(struct file *file, unsigned file_flags, + if (err) + goto out_fd; + +- err = ops->accept(sock, newsock, sock->file->f_flags | file_flags, +- false); ++ arg.flags |= sock->file->f_flags; ++ err = ops->accept(sock, newsock, &arg); + if (err < 0) + goto out_fd; + +@@ -3556,6 +3559,10 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) + { + struct sock *sk = sock->sk; + const struct proto_ops *ops = READ_ONCE(sock->ops); ++ struct proto_accept_arg arg = { ++ .flags = flags, ++ .kern = true, ++ }; + int err; + + err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, +@@ -3563,7 +3570,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) + if (err < 0) + goto done; + +- err = ops->accept(sock, *newsock, flags, true); ++ err = ops->accept(sock, *newsock, &arg); + if (err < 0) { + sock_release(*newsock); + *newsock = NULL; +diff --git a/net/tipc/socket.c b/net/tipc/socket.c +index bb1118d02f953..eb996dd3d00f0 100644 +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -147,8 +147,6 @@ static void tipc_data_ready(struct sock *sk); + static void tipc_write_space(struct sock *sk); + static void tipc_sock_destruct(struct sock *sk); + static int tipc_release(struct socket *sock); +-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, +- bool kern); + static void tipc_sk_timeout(struct timer_list *t); + static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua); + static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua); +@@ -2712,13 +2710,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) + * tipc_accept - wait for connection request + * @sock: listening socket + * @new_sock: new socket that is to be connected +- * @flags: file-related flags associated with socket +- * @kern: caused by kernel or by userspace? ++ * @arg: arguments for accept + * + * Return: 0 on success, errno otherwise + */ +-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, +- bool kern) ++static int tipc_accept(struct socket *sock, struct socket *new_sock, ++ struct proto_accept_arg *arg) + { + struct sock *new_sk, *sk = sock->sk; + struct tipc_sock *new_tsock; +@@ -2734,14 +2731,14 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, + res = -EINVAL; + goto exit; + } +- timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); ++ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + res = tipc_wait_for_accept(sock, timeo); + if (res) + goto exit; + + buf = skb_peek(&sk->sk_receive_queue); + +- res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern); ++ res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern); + if (res) + goto exit; + security_sk_clone(sock->sk, new_sock->sk); +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 9d48eef5d62e3..ab57fa0595e21 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -750,7 +750,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int); + static int unix_stream_connect(struct socket *, struct sockaddr *, + int addr_len, int flags); + static int unix_socketpair(struct socket *, struct socket *); +-static int unix_accept(struct socket *, struct socket *, int, bool); ++static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg); + static int unix_getname(struct socket *, struct sockaddr *, int); + static __poll_t unix_poll(struct file *, struct socket *, poll_table *); + static __poll_t unix_dgram_poll(struct file *, struct socket *, +@@ -1699,19 +1699,18 @@ static void unix_sock_inherit_flags(const struct socket *old, + set_bit(SOCK_PASSSEC, &new->flags); + } + +-static int unix_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int unix_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sock *sk = sock->sk; + struct sk_buff *skb; + struct sock *tsk; +- int err; + +- err = -EOPNOTSUPP; ++ arg->err = -EOPNOTSUPP; + if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) + goto out; + +- err = -EINVAL; ++ arg->err = -EINVAL; + if (sk->sk_state != TCP_LISTEN) + goto out; + +@@ -1719,12 +1718,12 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, + * so that no locks are necessary. + */ + +- skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, +- &err); ++ skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, ++ &arg->err); + if (!skb) { + /* This means receive shutdown. */ +- if (err == 0) +- err = -EINVAL; ++ if (arg->err == 0) ++ arg->err = -EINVAL; + goto out; + } + +@@ -1742,7 +1741,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, + return 0; + + out: +- return err; ++ return arg->err; + } + + +diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c +index 4afb6a541cf38..22cc19e797914 100644 +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -1489,8 +1489,8 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, + return err; + } + +-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int vsock_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sock *listener; + int err; +@@ -1517,7 +1517,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, + /* Wait for children sockets to appear; these are the new sockets + * created upon connection establishment. + */ +- timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK); ++ timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK); + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + + while ((connected = vsock_dequeue_accept(listener)) == NULL && +diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c +index f15a4493eb0bf..b2074b0cd6129 100644 +--- a/net/x25/af_x25.c ++++ b/net/x25/af_x25.c +@@ -871,8 +871,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout) + return rc; + } + +-static int x25_accept(struct socket *sock, struct socket *newsock, int flags, +- bool kern) ++static int x25_accept(struct socket *sock, struct socket *newsock, ++ struct proto_accept_arg *arg) + { + struct sock *sk = sock->sk; + struct sock *newsk; +-- +2.43.0 + diff --git a/queue-6.6/net-dsa-qca8k-fix-usages-of-device_get_named_child_n.patch b/queue-6.6/net-dsa-qca8k-fix-usages-of-device_get_named_child_n.patch new file mode 100644 index 00000000000..28aec3a947e --- /dev/null +++ b/queue-6.6/net-dsa-qca8k-fix-usages-of-device_get_named_child_n.patch @@ -0,0 +1,71 @@ +From 5489dda6813e0ef33bd0afbaa35e4a4596747379 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jun 2024 19:13:03 +0300 +Subject: net dsa: qca8k: fix usages of device_get_named_child_node() + +From: Andy Shevchenko + +[ Upstream commit d029edefed39647c797c2710aedd9d31f84c069e ] + +The documentation for device_get_named_child_node() mentions this +important point: + +" +The caller is responsible for calling fwnode_handle_put() on the +returned fwnode pointer. +" + +Add fwnode_handle_put() to avoid leaked references. + +Fixes: 1e264f9d2918 ("net: dsa: qca8k: add LEDs basic support") +Reviewed-by: Simon Horman +Signed-off-by: Andy Shevchenko +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/qca/qca8k-leds.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/dsa/qca/qca8k-leds.c b/drivers/net/dsa/qca/qca8k-leds.c +index e8c16e76e34bb..77a79c2494022 100644 +--- a/drivers/net/dsa/qca/qca8k-leds.c ++++ b/drivers/net/dsa/qca/qca8k-leds.c +@@ -431,8 +431,11 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p + init_data.devname_mandatory = true; + init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d", ds->slave_mii_bus->id, + port_num); +- if (!init_data.devicename) ++ if (!init_data.devicename) { ++ fwnode_handle_put(led); ++ fwnode_handle_put(leds); + return -ENOMEM; ++ } + + ret = devm_led_classdev_register_ext(priv->dev, &port_led->cdev, &init_data); + if (ret) +@@ -441,6 +444,7 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p + kfree(init_data.devicename); + } + ++ fwnode_handle_put(leds); + return 0; + } + +@@ -471,9 +475,13 @@ qca8k_setup_led_ctrl(struct qca8k_priv *priv) + * the correct port for LED setup. + */ + ret = qca8k_parse_port_leds(priv, port, qca8k_port_to_phy(port_num)); +- if (ret) ++ if (ret) { ++ fwnode_handle_put(port); ++ fwnode_handle_put(ports); + return ret; ++ } + } + ++ fwnode_handle_put(ports); + return 0; + } +-- +2.43.0 + diff --git a/queue-6.6/net-hns3-add-cond_resched-to-hns3-ring-buffer-init-p.patch b/queue-6.6/net-hns3-add-cond_resched-to-hns3-ring-buffer-init-p.patch new file mode 100644 index 00000000000..89be6ef7a75 --- /dev/null +++ b/queue-6.6/net-hns3-add-cond_resched-to-hns3-ring-buffer-init-p.patch @@ -0,0 +1,64 @@ +From 9ea1bfc5ffe9f665a26e6acb3ff0f6bba75fb187 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jun 2024 15:20:58 +0800 +Subject: net: hns3: add cond_resched() to hns3 ring buffer init process + +From: Jie Wang + +[ Upstream commit 968fde83841a8c23558dfbd0a0c69d636db52b55 ] + +Currently hns3 ring buffer init process would hold cpu too long with big +Tx/Rx ring depth. This could cause soft lockup. + +So this patch adds cond_resched() to the process. Then cpu can break to +run other tasks instead of busy looping. + +Fixes: a723fb8efe29 ("net: hns3: refine for set ring parameters") +Signed-off-by: Jie Wang +Signed-off-by: Jijie Shao +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++++ + drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++ + 2 files changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +index 677cfaa5fe08c..db9574e9fb7bc 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +@@ -3539,6 +3539,9 @@ static int hns3_alloc_ring_buffers(struct hns3_enet_ring *ring) + ret = hns3_alloc_and_attach_buffer(ring, i); + if (ret) + goto out_buffer_fail; ++ ++ if (!(i % HNS3_RESCHED_BD_NUM)) ++ cond_resched(); + } + + return 0; +@@ -5112,6 +5115,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv) + } + + u64_stats_init(&priv->ring[i].syncp); ++ cond_resched(); + } + + return 0; +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +index acd756b0c7c9a..d36c4ed16d8dd 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +@@ -214,6 +214,8 @@ enum hns3_nic_state { + #define HNS3_CQ_MODE_EQE 1U + #define HNS3_CQ_MODE_CQE 0U + ++#define HNS3_RESCHED_BD_NUM 1024 ++ + enum hns3_pkt_l2t_type { + HNS3_L2_TYPE_UNICAST, + HNS3_L2_TYPE_MULTICAST, +-- +2.43.0 + diff --git a/queue-6.6/net-hns3-fix-kernel-crash-problem-in-concurrent-scen.patch b/queue-6.6/net-hns3-fix-kernel-crash-problem-in-concurrent-scen.patch new file mode 100644 index 00000000000..e82ab87dc62 --- /dev/null +++ b/queue-6.6/net-hns3-fix-kernel-crash-problem-in-concurrent-scen.patch @@ -0,0 +1,84 @@ +From 865b8f434110c47eb8d9b0edf2e548b1434798dc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jun 2024 15:20:57 +0800 +Subject: net: hns3: fix kernel crash problem in concurrent scenario + +From: Yonglong Liu + +[ Upstream commit 12cda920212a49fa22d9e8b9492ac4ea013310a4 ] + +When link status change, the nic driver need to notify the roce +driver to handle this event, but at this time, the roce driver +may uninit, then cause kernel crash. + +To fix the problem, when link status change, need to check +whether the roce registered, and when uninit, need to wait link +update finish. + +Fixes: 45e92b7e4e27 ("net: hns3: add calling roce callback function when link status change") +Signed-off-by: Yonglong Liu +Signed-off-by: Jijie Shao +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../hisilicon/hns3/hns3pf/hclge_main.c | 21 ++++++++++++++----- + 1 file changed, 16 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +index 14713454e0d82..c8059d96f64be 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -3031,9 +3031,7 @@ static void hclge_push_link_status(struct hclge_dev *hdev) + + static void hclge_update_link_status(struct hclge_dev *hdev) + { +- struct hnae3_handle *rhandle = &hdev->vport[0].roce; + struct hnae3_handle *handle = &hdev->vport[0].nic; +- struct hnae3_client *rclient = hdev->roce_client; + struct hnae3_client *client = hdev->nic_client; + int state; + int ret; +@@ -3057,8 +3055,15 @@ static void hclge_update_link_status(struct hclge_dev *hdev) + + client->ops->link_status_change(handle, state); + hclge_config_mac_tnl_int(hdev, state); +- if (rclient && rclient->ops->link_status_change) +- rclient->ops->link_status_change(rhandle, state); ++ ++ if (test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state)) { ++ struct hnae3_handle *rhandle = &hdev->vport[0].roce; ++ struct hnae3_client *rclient = hdev->roce_client; ++ ++ if (rclient && rclient->ops->link_status_change) ++ rclient->ops->link_status_change(rhandle, ++ state); ++ } + + hclge_push_link_status(hdev); + } +@@ -11233,6 +11238,12 @@ static int hclge_init_client_instance(struct hnae3_client *client, + return ret; + } + ++static bool hclge_uninit_need_wait(struct hclge_dev *hdev) ++{ ++ return test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || ++ test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); ++} ++ + static void hclge_uninit_client_instance(struct hnae3_client *client, + struct hnae3_ae_dev *ae_dev) + { +@@ -11241,7 +11252,7 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, + + if (hdev->roce_client) { + clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); +- while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) ++ while (hclge_uninit_need_wait(hdev)) + msleep(HCLGE_WAIT_RESET_DONE); + + hdev->roce_client->ops->uninit_instance(&vport->roce, 0); +-- +2.43.0 + diff --git a/queue-6.6/net-ipv6-fix-the-rt-cache-flush-via-sysctl-using-a-p.patch b/queue-6.6/net-ipv6-fix-the-rt-cache-flush-via-sysctl-using-a-p.patch new file mode 100644 index 00000000000..4ba5921bd00 --- /dev/null +++ b/queue-6.6/net-ipv6-fix-the-rt-cache-flush-via-sysctl-using-a-p.patch @@ -0,0 +1,53 @@ +From 8c1204492d5c2a7a2d3bf9970b9a47ee70dc957d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jun 2024 13:28:28 +0200 +Subject: net/ipv6: Fix the RT cache flush via sysctl using a previous delay + +From: Petr Pavlu + +[ Upstream commit 14a20e5b4ad998793c5f43b0330d9e1388446cf3 ] + +The net.ipv6.route.flush system parameter takes a value which specifies +a delay used during the flush operation for aging exception routes. The +written value is however not used in the currently requested flush and +instead utilized only in the next one. + +A problem is that ipv6_sysctl_rtcache_flush() first reads the old value +of net->ipv6.sysctl.flush_delay into a local delay variable and then +calls proc_dointvec() which actually updates the sysctl based on the +provided input. + +Fix the problem by switching the order of the two operations. + +Fixes: 4990509f19e8 ("[NETNS][IPV6]: Make sysctls route per namespace.") +Signed-off-by: Petr Pavlu +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/20240607112828.30285-1-petr.pavlu@suse.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv6/route.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 0a37f04177337..29fa2ca07b46a 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -6332,12 +6332,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, + if (!write) + return -EINVAL; + +- net = (struct net *)ctl->extra1; +- delay = net->ipv6.sysctl.flush_delay; + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + if (ret) + return ret; + ++ net = (struct net *)ctl->extra1; ++ delay = net->ipv6.sysctl.flush_delay; + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); + return 0; + } +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-fix-features-validation-check-for-tunneled.patch b/queue-6.6/net-mlx5e-fix-features-validation-check-for-tunneled.patch new file mode 100644 index 00000000000..b75d5ece535 --- /dev/null +++ b/queue-6.6/net-mlx5e-fix-features-validation-check-for-tunneled.patch @@ -0,0 +1,53 @@ +From 3d41d976429053eebe9e04304ed7d490349b70b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jun 2024 23:32:49 +0300 +Subject: net/mlx5e: Fix features validation check for tunneled UDP (non-VXLAN) + packets + +From: Gal Pressman + +[ Upstream commit 791b4089e326271424b78f2fae778b20e53d071b ] + +Move the vxlan_features_check() call to after we verified the packet is +a tunneled VXLAN packet. + +Without this, tunneled UDP non-VXLAN packets (for ex. GENENVE) might +wrongly not get offloaded. +In some cases, it worked by chance as GENEVE header is the same size as +VXLAN, but it is obviously incorrect. + +Fixes: e3cfc7e6b7bd ("net/mlx5e: TX, Add geneve tunnel stateless offload support") +Signed-off-by: Gal Pressman +Reviewed-by: Dragos Tatulea +Signed-off-by: Tariq Toukan +Reviewed-by: Wojciech Drewek +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 455907b1167a0..e87a776ea2bfd 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -4704,7 +4704,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + + /* Verify if UDP port is being offloaded by HW */ + if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) +- return features; ++ return vxlan_features_check(skb, features); + + #if IS_ENABLED(CONFIG_GENEVE) + /* Support Geneve offload for default UDP port */ +@@ -4730,7 +4730,6 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct mlx5e_priv *priv = netdev_priv(netdev); + + features = vlan_features_check(skb, features); +- features = vxlan_features_check(skb, features); + + /* Validate if the tunneled packet is being offloaded by HW */ + if (skb->encapsulation && +-- +2.43.0 + diff --git a/queue-6.6/net-pse-pd-use-eopnotsupp-error-code-instead-of-enot.patch b/queue-6.6/net-pse-pd-use-eopnotsupp-error-code-instead-of-enot.patch new file mode 100644 index 00000000000..5bb4b19d5ba --- /dev/null +++ b/queue-6.6/net-pse-pd-use-eopnotsupp-error-code-instead-of-enot.patch @@ -0,0 +1,47 @@ +From fe03e1eab305733244d8d9d382ceab7381f948c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jun 2024 10:34:26 +0200 +Subject: net: pse-pd: Use EOPNOTSUPP error code instead of ENOTSUPP + +From: Kory Maincent + +[ Upstream commit 144ba8580bcb82b2686c3d1a043299d844b9a682 ] + +ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP as reported by +checkpatch script. + +Fixes: 18ff0bcda6d1 ("ethtool: add interface to interact with Ethernet Power Equipment") +Reviewed-by: Andrew Lunn +Acked-by: Oleksij Rempel +Signed-off-by: Kory Maincent +Link: https://lore.kernel.org/r/20240610083426.740660-1-kory.maincent@bootlin.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/linux/pse-pd/pse.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h +index fb724c65c77bc..5ce0cd76956e0 100644 +--- a/include/linux/pse-pd/pse.h ++++ b/include/linux/pse-pd/pse.h +@@ -114,14 +114,14 @@ static inline int pse_ethtool_get_status(struct pse_control *psec, + struct netlink_ext_ack *extack, + struct pse_control_status *status) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + + static inline int pse_ethtool_set_config(struct pse_control *psec, + struct netlink_ext_ack *extack, + const struct pse_control_config *config) + { +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + } + + #endif +-- +2.43.0 + diff --git a/queue-6.6/net-sfp-always-call-sfp_sm_mod_remove-on-remove.patch b/queue-6.6/net-sfp-always-call-sfp_sm_mod_remove-on-remove.patch new file mode 100644 index 00000000000..7c0280bdd17 --- /dev/null +++ b/queue-6.6/net-sfp-always-call-sfp_sm_mod_remove-on-remove.patch @@ -0,0 +1,45 @@ +From 292c8929fb2b60dd3ff11e4d5efc08f3e9c84667 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jun 2024 10:42:51 +0200 +Subject: net: sfp: Always call `sfp_sm_mod_remove()` on remove +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Csókás, Bence + +[ Upstream commit e96b2933152fd87b6a41765b2f58b158fde855b6 ] + +If the module is in SFP_MOD_ERROR, `sfp_sm_mod_remove()` will +not be run. As a consequence, `sfp_hwmon_remove()` is not getting +run either, leaving a stale `hwmon` device behind. `sfp_sm_mod_remove()` +itself checks `sfp->sm_mod_state` anyways, so this check was not +really needed in the first place. + +Fixes: d2e816c0293f ("net: sfp: handle module remove outside state machine") +Signed-off-by: "Csókás, Bence" +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/20240605084251.63502-1-csokas.bence@prolan.hu +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/sfp.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c +index 3679a43f4eb02..8152e14250f2d 100644 +--- a/drivers/net/phy/sfp.c ++++ b/drivers/net/phy/sfp.c +@@ -2394,8 +2394,7 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) + + /* Handle remove event globally, it resets this state machine */ + if (event == SFP_E_REMOVE) { +- if (sfp->sm_mod_state > SFP_MOD_PROBE) +- sfp_sm_mod_remove(sfp); ++ sfp_sm_mod_remove(sfp); + sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); + return; + } +-- +2.43.0 + diff --git a/queue-6.6/net-stmmac-dwmac-qcom-ethqos-configure-host-dma-widt.patch b/queue-6.6/net-stmmac-dwmac-qcom-ethqos-configure-host-dma-widt.patch new file mode 100644 index 00000000000..4b9a84eb5cc --- /dev/null +++ b/queue-6.6/net-stmmac-dwmac-qcom-ethqos-configure-host-dma-widt.patch @@ -0,0 +1,64 @@ +From 2399f0672147bdf368db28911cc19b6c536bb5a4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jun 2024 11:57:18 -0700 +Subject: net: stmmac: dwmac-qcom-ethqos: Configure host DMA width + +From: Sagar Cheluvegowda + +[ Upstream commit 0579f27249047006a818e463ee66a6c314d04cea ] + +Commit 070246e4674b ("net: stmmac: Fix for mismatched host/device DMA +address width") added support in the stmmac driver for platform drivers +to indicate the host DMA width, but left it up to authors of the +specific platforms to indicate if their width differed from the addr64 +register read from the MAC itself. + +Qualcomm's EMAC4 integration supports only up to 36 bit width (as +opposed to the addr64 register indicating 40 bit width). Let's indicate +that in the platform driver to avoid a scenario where the driver will +allocate descriptors of size that is supported by the CPU which in our +case is 36 bit, but as the addr64 register is still capable of 40 bits +the device will use two descriptors as one address. + +Fixes: 8c4d92e82d50 ("net: stmmac: dwmac-qcom-ethqos: add support for emac4 on sa8775p platforms") +Signed-off-by: Sagar Cheluvegowda +Reviewed-by: Simon Horman +Reviewed-by: Andrew Halaney +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +index 31631e3f89d0a..51ff53120307a 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +@@ -93,6 +93,7 @@ struct ethqos_emac_driver_data { + bool has_emac_ge_3; + const char *link_clk_name; + bool has_integrated_pcs; ++ u32 dma_addr_width; + struct dwmac4_addrs dwmac4_addrs; + }; + +@@ -272,6 +273,7 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = { + .has_emac_ge_3 = true, + .link_clk_name = "phyaux", + .has_integrated_pcs = true, ++ .dma_addr_width = 36, + .dwmac4_addrs = { + .dma_chan = 0x00008100, + .dma_chan_offset = 0x1000, +@@ -816,6 +818,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) + plat_dat->flags |= STMMAC_FLAG_RX_CLK_RUNS_IN_LPI; + if (data->has_integrated_pcs) + plat_dat->flags |= STMMAC_FLAG_HAS_INTEGRATED_PCS; ++ if (data->dma_addr_width) ++ plat_dat->host_dma_width = data->dma_addr_width; + + if (ethqos->serdes_phy) { + plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; +-- +2.43.0 + diff --git a/queue-6.6/net-stmmac-replace-priv-speed-with-the-porttransmitr.patch b/queue-6.6/net-stmmac-replace-priv-speed-with-the-porttransmitr.patch new file mode 100644 index 00000000000..23a33b0665e --- /dev/null +++ b/queue-6.6/net-stmmac-replace-priv-speed-with-the-porttransmitr.patch @@ -0,0 +1,101 @@ +From 4cc12ca20076874dd22696a2a2c628e758f6e7a2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 8 Jun 2024 22:35:24 +0800 +Subject: net: stmmac: replace priv->speed with the portTransmitRate from the + tc-cbs parameters + +From: Xiaolei Wang + +[ Upstream commit be27b896529787e23a35ae4befb6337ce73fcca0 ] + +The current cbs parameter depends on speed after uplinking, +which is not needed and will report a configuration error +if the port is not initially connected. The UAPI exposed by +tc-cbs requires userspace to recalculate the send slope anyway, +because the formula depends on port_transmit_rate (see man tc-cbs), +which is not an invariant from tc's perspective. Therefore, we +use offload->sendslope and offload->idleslope to derive the +original port_transmit_rate from the CBS formula. + +Fixes: 1f705bc61aee ("net: stmmac: Add support for CBS QDISC") +Signed-off-by: Xiaolei Wang +Reviewed-by: Wojciech Drewek +Reviewed-by: Vladimir Oltean +Link: https://lore.kernel.org/r/20240608143524.2065736-1-xiaolei.wang@windriver.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 25 ++++++++----------- + 1 file changed, 11 insertions(+), 14 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +index 6ad3e0a119366..2467598f9d92f 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +@@ -343,10 +343,11 @@ static int tc_setup_cbs(struct stmmac_priv *priv, + struct tc_cbs_qopt_offload *qopt) + { + u32 tx_queues_count = priv->plat->tx_queues_to_use; ++ s64 port_transmit_rate_kbps; + u32 queue = qopt->queue; +- u32 ptr, speed_div; + u32 mode_to_use; + u64 value; ++ u32 ptr; + int ret; + + /* Queue 0 is not AVB capable */ +@@ -355,30 +356,26 @@ static int tc_setup_cbs(struct stmmac_priv *priv, + if (!priv->dma_cap.av) + return -EOPNOTSUPP; + ++ port_transmit_rate_kbps = qopt->idleslope - qopt->sendslope; ++ + /* Port Transmit Rate and Speed Divider */ +- switch (priv->speed) { ++ switch (div_s64(port_transmit_rate_kbps, 1000)) { + case SPEED_10000: +- ptr = 32; +- speed_div = 10000000; +- break; + case SPEED_5000: + ptr = 32; +- speed_div = 5000000; + break; + case SPEED_2500: +- ptr = 8; +- speed_div = 2500000; +- break; + case SPEED_1000: + ptr = 8; +- speed_div = 1000000; + break; + case SPEED_100: + ptr = 4; +- speed_div = 100000; + break; + default: +- return -EOPNOTSUPP; ++ netdev_err(priv->dev, ++ "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n", ++ port_transmit_rate_kbps); ++ return -EINVAL; + } + + mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; +@@ -398,10 +395,10 @@ static int tc_setup_cbs(struct stmmac_priv *priv, + } + + /* Final adjustments for HW */ +- value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div); ++ value = div_s64(qopt->idleslope * 1024ll * ptr, port_transmit_rate_kbps); + priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0); + +- value = div_s64(-qopt->sendslope * 1024ll * ptr, speed_div); ++ value = div_s64(-qopt->sendslope * 1024ll * ptr, port_transmit_rate_kbps); + priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0); + + value = qopt->hicredit * 1024ll * 8; +-- +2.43.0 + diff --git a/queue-6.6/netfilter-ipset-fix-race-between-namespace-cleanup-a.patch b/queue-6.6/netfilter-ipset-fix-race-between-namespace-cleanup-a.patch new file mode 100644 index 00000000000..a31aa1dba12 --- /dev/null +++ b/queue-6.6/netfilter-ipset-fix-race-between-namespace-cleanup-a.patch @@ -0,0 +1,289 @@ +From 157e378b451b374cc6000b72dfbeda66a82e08cc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Jun 2024 15:58:03 +0200 +Subject: netfilter: ipset: Fix race between namespace cleanup and gc in the + list:set type + +From: Jozsef Kadlecsik + +[ Upstream commit 4e7aaa6b82d63e8ddcbfb56b4fd3d014ca586f10 ] + +Lion Ackermann reported that there is a race condition between namespace cleanup +in ipset and the garbage collection of the list:set type. The namespace +cleanup can destroy the list:set type of sets while the gc of the set type is +waiting to run in rcu cleanup. The latter uses data from the destroyed set which +thus leads use after free. The patch contains the following parts: + +- When destroying all sets, first remove the garbage collectors, then wait + if needed and then destroy the sets. +- Fix the badly ordered "wait then remove gc" for the destroy a single set + case. +- Fix the missing rcu locking in the list:set type in the userspace test + case. +- Use proper RCU list handlings in the list:set type. + +The patch depends on c1193d9bbbd3 (netfilter: ipset: Add list flush to cancel_gc). + +Fixes: 97f7cf1cd80e (netfilter: ipset: fix performance regression in swap operation) +Reported-by: Lion Ackermann +Tested-by: Lion Ackermann +Signed-off-by: Jozsef Kadlecsik +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipset/ip_set_core.c | 81 +++++++++++++++------------ + net/netfilter/ipset/ip_set_list_set.c | 30 +++++----- + 2 files changed, 60 insertions(+), 51 deletions(-) + +diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c +index 3184cc6be4c9d..c7ae4d9bf3d24 100644 +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -1172,23 +1172,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { + .len = IPSET_MAXNAMELEN - 1 }, + }; + ++/* In order to return quickly when destroying a single set, it is split ++ * into two stages: ++ * - Cancel garbage collector ++ * - Destroy the set itself via call_rcu() ++ */ ++ + static void +-ip_set_destroy_set(struct ip_set *set) ++ip_set_destroy_set_rcu(struct rcu_head *head) + { +- pr_debug("set: %s\n", set->name); ++ struct ip_set *set = container_of(head, struct ip_set, rcu); + +- /* Must call it without holding any lock */ + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); + } + + static void +-ip_set_destroy_set_rcu(struct rcu_head *head) ++_destroy_all_sets(struct ip_set_net *inst) + { +- struct ip_set *set = container_of(head, struct ip_set, rcu); ++ struct ip_set *set; ++ ip_set_id_t i; ++ bool need_wait = false; + +- ip_set_destroy_set(set); ++ /* First cancel gc's: set:list sets are flushed as well */ ++ for (i = 0; i < inst->ip_set_max; i++) { ++ set = ip_set(inst, i); ++ if (set) { ++ set->variant->cancel_gc(set); ++ if (set->type->features & IPSET_TYPE_NAME) ++ need_wait = true; ++ } ++ } ++ /* Must wait for flush to be really finished */ ++ if (need_wait) ++ rcu_barrier(); ++ for (i = 0; i < inst->ip_set_max; i++) { ++ set = ip_set(inst, i); ++ if (set) { ++ ip_set(inst, i) = NULL; ++ set->variant->destroy(set); ++ module_put(set->type->me); ++ kfree(set); ++ } ++ } + } + + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, +@@ -1202,11 +1229,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, + if (unlikely(protocol_min_failed(attr))) + return -IPSET_ERR_PROTOCOL; + +- + /* Commands are serialized and references are + * protected by the ip_set_ref_lock. + * External systems (i.e. xt_set) must call +- * ip_set_put|get_nfnl_* functions, that way we ++ * ip_set_nfnl_get_* functions, that way we + * can safely check references here. + * + * list:set timer can only decrement the reference +@@ -1214,8 +1240,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, + * without holding the lock. + */ + if (!attr[IPSET_ATTR_SETNAME]) { +- /* Must wait for flush to be really finished in list:set */ +- rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); + for (i = 0; i < inst->ip_set_max; i++) { + s = ip_set(inst, i); +@@ -1226,15 +1250,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, + } + inst->is_destroyed = true; + read_unlock_bh(&ip_set_ref_lock); +- for (i = 0; i < inst->ip_set_max; i++) { +- s = ip_set(inst, i); +- if (s) { +- ip_set(inst, i) = NULL; +- /* Must cancel garbage collectors */ +- s->variant->cancel_gc(s); +- ip_set_destroy_set(s); +- } +- } ++ _destroy_all_sets(inst); + /* Modified by ip_set_destroy() only, which is serialized */ + inst->is_destroyed = false; + } else { +@@ -1255,12 +1271,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, + features = s->type->features; + ip_set(inst, i) = NULL; + read_unlock_bh(&ip_set_ref_lock); ++ /* Must cancel garbage collectors */ ++ s->variant->cancel_gc(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } +- /* Must cancel garbage collectors */ +- s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); + } + return 0; +@@ -2365,30 +2381,25 @@ ip_set_net_init(struct net *net) + } + + static void __net_exit +-ip_set_net_exit(struct net *net) ++ip_set_net_pre_exit(struct net *net) + { + struct ip_set_net *inst = ip_set_pernet(net); + +- struct ip_set *set = NULL; +- ip_set_id_t i; +- + inst->is_deleted = true; /* flag for ip_set_nfnl_put */ ++} + +- nfnl_lock(NFNL_SUBSYS_IPSET); +- for (i = 0; i < inst->ip_set_max; i++) { +- set = ip_set(inst, i); +- if (set) { +- ip_set(inst, i) = NULL; +- set->variant->cancel_gc(set); +- ip_set_destroy_set(set); +- } +- } +- nfnl_unlock(NFNL_SUBSYS_IPSET); ++static void __net_exit ++ip_set_net_exit(struct net *net) ++{ ++ struct ip_set_net *inst = ip_set_pernet(net); ++ ++ _destroy_all_sets(inst); + kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); + } + + static struct pernet_operations ip_set_net_ops = { + .init = ip_set_net_init, ++ .pre_exit = ip_set_net_pre_exit, + .exit = ip_set_net_exit, + .id = &ip_set_net_id, + .size = sizeof(struct ip_set_net), +diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c +index 54e2a1dd7f5f5..bfae7066936bb 100644 +--- a/net/netfilter/ipset/ip_set_list_set.c ++++ b/net/netfilter/ipset/ip_set_list_set.c +@@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, + struct set_elem *e; + int ret; + +- list_for_each_entry(e, &map->members, list) { ++ list_for_each_entry_rcu(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, + struct set_elem *e; + int ret; + +- list_for_each_entry(e, &map->members, list) { ++ list_for_each_entry_rcu(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct list_set *map = set->data; + struct set_adt_elem *d = value; + struct set_elem *e, *next, *prev = NULL; +- int ret; ++ int ret = 0; + +- list_for_each_entry(e, &map->members, list) { ++ rcu_read_lock(); ++ list_for_each_entry_rcu(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -201,6 +202,7 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, + + if (d->before == 0) { + ret = 1; ++ goto out; + } else if (d->before > 0) { + next = list_next_entry(e, list); + ret = !list_is_last(&e->list, &map->members) && +@@ -208,9 +210,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, + } else { + ret = prev && prev->id == d->refid; + } +- return ret; ++ goto out; + } +- return 0; ++out: ++ rcu_read_unlock(); ++ return ret; + } + + static void +@@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, + + /* Find where to add the new entry */ + n = prev = next = NULL; +- list_for_each_entry(e, &map->members, list) { ++ list_for_each_entry_rcu(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, + { + struct list_set *map = set->data; + struct set_adt_elem *d = value; +- struct set_elem *e, *next, *prev = NULL; ++ struct set_elem *e, *n, *next, *prev = NULL; + +- list_for_each_entry(e, &map->members, list) { ++ list_for_each_entry_safe(e, n, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) + continue; +@@ -424,14 +428,8 @@ static void + list_set_destroy(struct ip_set *set) + { + struct list_set *map = set->data; +- struct set_elem *e, *n; + +- list_for_each_entry_safe(e, n, &map->members, list) { +- list_del(&e->list); +- ip_set_put_byindex(map->net, e->id); +- ip_set_ext_destroy(set, e); +- kfree(e); +- } ++ WARN_ON_ONCE(!list_empty(&map->members)); + kfree(map); + + set->data = NULL; +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nft_inner-validate-mandatory-meta-and-payl.patch b/queue-6.6/netfilter-nft_inner-validate-mandatory-meta-and-payl.patch new file mode 100644 index 00000000000..bd59892b948 --- /dev/null +++ b/queue-6.6/netfilter-nft_inner-validate-mandatory-meta-and-payl.patch @@ -0,0 +1,55 @@ +From 73ea0c0d69e04c390c33f7d26ea141079d96ebe9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jun 2024 13:03:45 +0200 +Subject: netfilter: nft_inner: validate mandatory meta and payload + +From: Davide Ornaghi + +[ Upstream commit c4ab9da85b9df3692f861512fe6c9812f38b7471 ] + +Check for mandatory netlink attributes in payload and meta expression +when used embedded from the inner expression, otherwise NULL pointer +dereference is possible from userspace. + +Fixes: a150d122b6bd ("netfilter: nft_meta: add inner match support") +Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") +Signed-off-by: Davide Ornaghi +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_meta.c | 3 +++ + net/netfilter/nft_payload.c | 4 ++++ + 2 files changed, 7 insertions(+) + +diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c +index ba0d3683a45d3..9139ce38ea7b9 100644 +--- a/net/netfilter/nft_meta.c ++++ b/net/netfilter/nft_meta.c +@@ -839,6 +839,9 @@ static int nft_meta_inner_init(const struct nft_ctx *ctx, + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; + ++ if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG]) ++ return -EINVAL; ++ + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_PROTOCOL: +diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c +index 0c43d748e23ae..50429cbd42da4 100644 +--- a/net/netfilter/nft_payload.c ++++ b/net/netfilter/nft_payload.c +@@ -650,6 +650,10 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx, + struct nft_payload *priv = nft_expr_priv(expr); + u32 base; + ++ if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] || ++ !tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG]) ++ return -EINVAL; ++ + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (base) { + case NFT_PAYLOAD_TUN_HEADER: +-- +2.43.0 + diff --git a/queue-6.6/netfilter-use-flowlabel-flow-key-when-re-routing-man.patch b/queue-6.6/netfilter-use-flowlabel-flow-key-when-re-routing-man.patch new file mode 100644 index 00000000000..e290cf06e02 --- /dev/null +++ b/queue-6.6/netfilter-use-flowlabel-flow-key-when-re-routing-man.patch @@ -0,0 +1,41 @@ +From b56061547b2a20efecb96084e4b6e7f33ecf28bc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jun 2024 12:23:31 +0200 +Subject: netfilter: Use flowlabel flow key when re-routing mangled packets + +From: Florian Westphal + +[ Upstream commit 6f8f132cc7bac2ac76911e47d5baa378aafda4cb ] + +'ip6 dscp set $v' in an nftables outpute route chain has no effect. +While nftables does detect the dscp change and calls the reroute hook. +But ip6_route_me_harder never sets the dscp/flowlabel: +flowlabel/dsfield routing rules are ignored and no reroute takes place. + +Thanks to Yi Chen for an excellent reproducer script that I used +to validate this change. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Yi Chen +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/ipv6/netfilter.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c +index 857713d7a38a5..1df23cd31b767 100644 +--- a/net/ipv6/netfilter.c ++++ b/net/ipv6/netfilter.c +@@ -36,6 +36,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff + .flowi6_uid = sock_net_uid(net, sk), + .daddr = iph->daddr, + .saddr = iph->saddr, ++ .flowlabel = ip6_flowinfo(iph), + }; + int err; + +-- +2.43.0 + diff --git a/queue-6.6/nfs-add-barriers-when-testing-for-nfs_fsdata_blocked.patch b/queue-6.6/nfs-add-barriers-when-testing-for-nfs_fsdata_blocked.patch new file mode 100644 index 00000000000..e4edc0f6108 --- /dev/null +++ b/queue-6.6/nfs-add-barriers-when-testing-for-nfs_fsdata_blocked.patch @@ -0,0 +1,151 @@ +From 5b0f5de678e850752164adaa3c43b396f1a5db97 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 13:27:17 +1000 +Subject: NFS: add barriers when testing for NFS_FSDATA_BLOCKED + +From: NeilBrown + +[ Upstream commit 99bc9f2eb3f79a2b4296d9bf43153e1d10ca50d3 ] + +dentry->d_fsdata is set to NFS_FSDATA_BLOCKED while unlinking or +renaming-over a file to ensure that no open succeeds while the NFS +operation progressed on the server. + +Setting dentry->d_fsdata to NFS_FSDATA_BLOCKED is done under ->d_lock +after checking the refcount is not elevated. Any attempt to open the +file (through that name) will go through lookp_open() which will take +->d_lock while incrementing the refcount, we can be sure that once the +new value is set, __nfs_lookup_revalidate() *will* see the new value and +will block. + +We don't have any locking guarantee that when we set ->d_fsdata to NULL, +the wait_var_event() in __nfs_lookup_revalidate() will notice. +wait/wake primitives do NOT provide barriers to guarantee order. We +must use smp_load_acquire() in wait_var_event() to ensure we look at an +up-to-date value, and must use smp_store_release() before wake_up_var(). + +This patch adds those barrier functions and factors out +block_revalidate() and unblock_revalidate() far clarity. + +There is also a hypothetical bug in that if memory allocation fails +(which never happens in practice) we might leave ->d_fsdata locked. +This patch adds the missing call to unblock_revalidate(). + +Reported-and-tested-by: Richard Kojedzinszky +Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1071501 +Fixes: 3c59366c207e ("NFS: don't unhash dentry during unlink/rename") +Signed-off-by: NeilBrown +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/dir.c | 47 ++++++++++++++++++++++++++++++++--------------- + 1 file changed, 32 insertions(+), 15 deletions(-) + +diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c +index 9fc5061d51b2f..2a0f069d5a096 100644 +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -1802,9 +1802,10 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, + if (parent != READ_ONCE(dentry->d_parent)) + return -ECHILD; + } else { +- /* Wait for unlink to complete */ ++ /* Wait for unlink to complete - see unblock_revalidate() */ + wait_var_event(&dentry->d_fsdata, +- dentry->d_fsdata != NFS_FSDATA_BLOCKED); ++ smp_load_acquire(&dentry->d_fsdata) ++ != NFS_FSDATA_BLOCKED); + parent = dget_parent(dentry); + ret = reval(d_inode(parent), dentry, flags); + dput(parent); +@@ -1817,6 +1818,29 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) + return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); + } + ++static void block_revalidate(struct dentry *dentry) ++{ ++ /* old devname - just in case */ ++ kfree(dentry->d_fsdata); ++ ++ /* Any new reference that could lead to an open ++ * will take ->d_lock in lookup_open() -> d_lookup(). ++ * Holding this lock ensures we cannot race with ++ * __nfs_lookup_revalidate() and removes and need ++ * for further barriers. ++ */ ++ lockdep_assert_held(&dentry->d_lock); ++ ++ dentry->d_fsdata = NFS_FSDATA_BLOCKED; ++} ++ ++static void unblock_revalidate(struct dentry *dentry) ++{ ++ /* store_release ensures wait_var_event() sees the update */ ++ smp_store_release(&dentry->d_fsdata, NULL); ++ wake_up_var(&dentry->d_fsdata); ++} ++ + /* + * A weaker form of d_revalidate for revalidating just the d_inode(dentry) + * when we don't really care about the dentry name. This is called when a +@@ -2499,15 +2523,12 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) + spin_unlock(&dentry->d_lock); + goto out; + } +- /* old devname */ +- kfree(dentry->d_fsdata); +- dentry->d_fsdata = NFS_FSDATA_BLOCKED; ++ block_revalidate(dentry); + + spin_unlock(&dentry->d_lock); + error = nfs_safe_remove(dentry); + nfs_dentry_remove_handle_error(dir, dentry, error); +- dentry->d_fsdata = NULL; +- wake_up_var(&dentry->d_fsdata); ++ unblock_revalidate(dentry); + out: + trace_nfs_unlink_exit(dir, dentry, error); + return error; +@@ -2619,8 +2640,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) + { + struct dentry *new_dentry = data->new_dentry; + +- new_dentry->d_fsdata = NULL; +- wake_up_var(&new_dentry->d_fsdata); ++ unblock_revalidate(new_dentry); + } + + /* +@@ -2682,11 +2702,6 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, + if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) || + WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED)) + goto out; +- if (new_dentry->d_fsdata) { +- /* old devname */ +- kfree(new_dentry->d_fsdata); +- new_dentry->d_fsdata = NULL; +- } + + spin_lock(&new_dentry->d_lock); + if (d_count(new_dentry) > 2) { +@@ -2708,7 +2723,7 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, + new_dentry = dentry; + new_inode = NULL; + } else { +- new_dentry->d_fsdata = NFS_FSDATA_BLOCKED; ++ block_revalidate(new_dentry); + must_unblock = true; + spin_unlock(&new_dentry->d_lock); + } +@@ -2720,6 +2735,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, + task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, + must_unblock ? nfs_unblock_rename : NULL); + if (IS_ERR(task)) { ++ if (must_unblock) ++ unblock_revalidate(new_dentry); + error = PTR_ERR(task); + goto out; + } +-- +2.43.0 + diff --git a/queue-6.6/nfsv4.1-enforce-rootpath-check-in-fs_location-query.patch b/queue-6.6/nfsv4.1-enforce-rootpath-check-in-fs_location-query.patch new file mode 100644 index 00000000000..1ee248bd254 --- /dev/null +++ b/queue-6.6/nfsv4.1-enforce-rootpath-check-in-fs_location-query.patch @@ -0,0 +1,75 @@ +From 3d3cc7525df822588be50b833b503b6beedd50df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 May 2024 15:44:35 -0400 +Subject: NFSv4.1 enforce rootpath check in fs_location query + +From: Olga Kornievskaia + +[ Upstream commit 28568c906c1bb5f7560e18082ed7d6295860f1c2 ] + +In commit 4ca9f31a2be66 ("NFSv4.1 test and add 4.1 trunking transport"), +we introduce the ability to query the NFS server for possible trunking +locations of the existing filesystem. However, we never checked the +returned file system path for these alternative locations. According +to the RFC, the server can say that the filesystem currently known +under "fs_root" of fs_location also resides under these server +locations under the following "rootpath" pathname. The client cannot +handle trunking a filesystem that reside under different location +under different paths other than what the main path is. This patch +enforces the check that fs_root path and rootpath path in fs_location +reply is the same. + +Fixes: 4ca9f31a2be6 ("NFSv4.1 test and add 4.1 trunking transport") +Signed-off-by: Olga Kornievskaia +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 41b7eafbd9287..f0953200acd08 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -4003,6 +4003,23 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location, + } + } + ++static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1, ++ struct nfs4_pathname *path2) ++{ ++ int i; ++ ++ if (path1->ncomponents != path2->ncomponents) ++ return false; ++ for (i = 0; i < path1->ncomponents; i++) { ++ if (path1->components[i].len != path2->components[i].len) ++ return false; ++ if (memcmp(path1->components[i].data, path2->components[i].data, ++ path1->components[i].len)) ++ return false; ++ } ++ return true; ++} ++ + static int _nfs4_discover_trunking(struct nfs_server *server, + struct nfs_fh *fhandle) + { +@@ -4036,9 +4053,13 @@ static int _nfs4_discover_trunking(struct nfs_server *server, + if (status) + goto out_free_3; + +- for (i = 0; i < locations->nlocations; i++) ++ for (i = 0; i < locations->nlocations; i++) { ++ if (!_is_same_nfs4_pathname(&locations->fs_path, ++ &locations->locations[i].rootpath)) ++ continue; + test_fs_location_for_trunking(&locations->locations[i], clp, + server); ++ } + out_free_3: + kfree(locations->fattr); + out_free_2: +-- +2.43.0 + diff --git a/queue-6.6/nvme-fix-nvme_pr_-status-code-parsing.patch b/queue-6.6/nvme-fix-nvme_pr_-status-code-parsing.patch new file mode 100644 index 00000000000..15de27709a1 --- /dev/null +++ b/queue-6.6/nvme-fix-nvme_pr_-status-code-parsing.patch @@ -0,0 +1,35 @@ +From 8c2d12e2c490514d7add41e56c9453545f90c69c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 May 2024 14:16:46 +0800 +Subject: nvme: fix nvme_pr_* status code parsing + +From: Weiwen Hu + +[ Upstream commit b1a1fdd7096dd2d67911b07f8118ff113d815db4 ] + +Fix the parsing if extra status bits (e.g. MORE) is present. + +Fixes: 7fb42780d06c ("nvme: Convert NVMe errors to PR errors") +Signed-off-by: Weiwen Hu +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/pr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c +index 391b1465ebfd5..803efc97fd1ea 100644 +--- a/drivers/nvme/host/pr.c ++++ b/drivers/nvme/host/pr.c +@@ -77,7 +77,7 @@ static int nvme_sc_to_pr_err(int nvme_sc) + if (nvme_is_path_error(nvme_sc)) + return PR_STS_PATH_FAILED; + +- switch (nvme_sc) { ++ switch (nvme_sc & 0x7ff) { + case NVME_SC_SUCCESS: + return PR_STS_SUCCESS; + case NVME_SC_RESERVATION_CONFLICT: +-- +2.43.0 + diff --git a/queue-6.6/nvmet-passthru-propagate-status-from-id-override-fun.patch b/queue-6.6/nvmet-passthru-propagate-status-from-id-override-fun.patch new file mode 100644 index 00000000000..3c6fcc76e08 --- /dev/null +++ b/queue-6.6/nvmet-passthru-propagate-status-from-id-override-fun.patch @@ -0,0 +1,46 @@ +From 8cb990f73a6780380043520e2d75bc94c8251fdc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Jun 2024 16:02:40 +0200 +Subject: nvmet-passthru: propagate status from id override functions + +From: Daniel Wagner + +[ Upstream commit d76584e53f4244dbc154bec447c3852600acc914 ] + +The id override functions return a status which is not propagated to the +caller. + +Fixes: c1fef73f793b ("nvmet: add passthru code to process commands") +Signed-off-by: Daniel Wagner +Reviewed-by: Chaitanya Kulkarni +Reviewed-by: Christoph Hellwig +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/target/passthru.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c +index 9fe07d7efa96c..d4a61645d61a5 100644 +--- a/drivers/nvme/target/passthru.c ++++ b/drivers/nvme/target/passthru.c +@@ -226,13 +226,13 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) + req->cmd->common.opcode == nvme_admin_identify) { + switch (req->cmd->identify.cns) { + case NVME_ID_CNS_CTRL: +- nvmet_passthru_override_id_ctrl(req); ++ status = nvmet_passthru_override_id_ctrl(req); + break; + case NVME_ID_CNS_NS: +- nvmet_passthru_override_id_ns(req); ++ status = nvmet_passthru_override_id_ns(req); + break; + case NVME_ID_CNS_NS_DESC_LIST: +- nvmet_passthru_override_id_descs(req); ++ status = nvmet_passthru_override_id_descs(req); + break; + } + } else if (status < 0) +-- +2.43.0 + diff --git a/queue-6.6/platform-x86-dell-smbios-fix-wrong-token-data-in-sys.patch b/queue-6.6/platform-x86-dell-smbios-fix-wrong-token-data-in-sys.patch new file mode 100644 index 00000000000..bef29f51c94 --- /dev/null +++ b/queue-6.6/platform-x86-dell-smbios-fix-wrong-token-data-in-sys.patch @@ -0,0 +1,226 @@ +From da1532a2a5df537a5604ab4b6ecf5df63cb64339 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 22:49:02 +0200 +Subject: platform/x86: dell-smbios: Fix wrong token data in sysfs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Armin Wolf + +[ Upstream commit 1981b296f858010eae409548fd297659b2cc570e ] + +When reading token data from sysfs on my Inspiron 3505, the token +locations and values are wrong. This happens because match_attribute() +blindly assumes that all entries in da_tokens have an associated +entry in token_attrs. + +This however is not true as soon as da_tokens[] contains zeroed +token entries. Those entries are being skipped when initialising +token_attrs, breaking the core assumption of match_attribute(). + +Fix this by defining an extra struct for each pair of token attributes +and use container_of() to retrieve token information. + +Tested on a Dell Inspiron 3050. + +Fixes: 33b9ca1e53b4 ("platform/x86: dell-smbios: Add a sysfs interface for SMBIOS tokens") +Signed-off-by: Armin Wolf +Reviewed-by: Ilpo Järvinen +Link: https://lore.kernel.org/r/20240528204903.445546-1-W_Armin@gmx.de +Reviewed-by: Hans de Goede +Signed-off-by: Hans de Goede +Signed-off-by: Sasha Levin +--- + drivers/platform/x86/dell/dell-smbios-base.c | 92 ++++++++------------ + 1 file changed, 36 insertions(+), 56 deletions(-) + +diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c +index e61bfaf8b5c48..86b95206cb1bd 100644 +--- a/drivers/platform/x86/dell/dell-smbios-base.c ++++ b/drivers/platform/x86/dell/dell-smbios-base.c +@@ -11,6 +11,7 @@ + */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + ++#include + #include + #include + #include +@@ -25,11 +26,16 @@ static u32 da_supported_commands; + static int da_num_tokens; + static struct platform_device *platform_device; + static struct calling_interface_token *da_tokens; +-static struct device_attribute *token_location_attrs; +-static struct device_attribute *token_value_attrs; ++static struct token_sysfs_data *token_entries; + static struct attribute **token_attrs; + static DEFINE_MUTEX(smbios_mutex); + ++struct token_sysfs_data { ++ struct device_attribute location_attr; ++ struct device_attribute value_attr; ++ struct calling_interface_token *token; ++}; ++ + struct smbios_device { + struct list_head list; + struct device *device; +@@ -416,47 +422,26 @@ static void __init find_tokens(const struct dmi_header *dm, void *dummy) + } + } + +-static int match_attribute(struct device *dev, +- struct device_attribute *attr) +-{ +- int i; +- +- for (i = 0; i < da_num_tokens * 2; i++) { +- if (!token_attrs[i]) +- continue; +- if (strcmp(token_attrs[i]->name, attr->attr.name) == 0) +- return i/2; +- } +- dev_dbg(dev, "couldn't match: %s\n", attr->attr.name); +- return -EINVAL; +-} +- + static ssize_t location_show(struct device *dev, + struct device_attribute *attr, char *buf) + { +- int i; ++ struct token_sysfs_data *data = container_of(attr, struct token_sysfs_data, location_attr); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + +- i = match_attribute(dev, attr); +- if (i > 0) +- return sysfs_emit(buf, "%08x", da_tokens[i].location); +- return 0; ++ return sysfs_emit(buf, "%08x", data->token->location); + } + + static ssize_t value_show(struct device *dev, + struct device_attribute *attr, char *buf) + { +- int i; ++ struct token_sysfs_data *data = container_of(attr, struct token_sysfs_data, value_attr); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + +- i = match_attribute(dev, attr); +- if (i > 0) +- return sysfs_emit(buf, "%08x", da_tokens[i].value); +- return 0; ++ return sysfs_emit(buf, "%08x", data->token->value); + } + + static struct attribute_group smbios_attribute_group = { +@@ -473,22 +458,15 @@ static int build_tokens_sysfs(struct platform_device *dev) + { + char *location_name; + char *value_name; +- size_t size; + int ret; + int i, j; + +- /* (number of tokens + 1 for null terminated */ +- size = sizeof(struct device_attribute) * (da_num_tokens + 1); +- token_location_attrs = kzalloc(size, GFP_KERNEL); +- if (!token_location_attrs) ++ token_entries = kcalloc(da_num_tokens, sizeof(*token_entries), GFP_KERNEL); ++ if (!token_entries) + return -ENOMEM; +- token_value_attrs = kzalloc(size, GFP_KERNEL); +- if (!token_value_attrs) +- goto out_allocate_value; + + /* need to store both location and value + terminator*/ +- size = sizeof(struct attribute *) * ((2 * da_num_tokens) + 1); +- token_attrs = kzalloc(size, GFP_KERNEL); ++ token_attrs = kcalloc((2 * da_num_tokens) + 1, sizeof(*token_attrs), GFP_KERNEL); + if (!token_attrs) + goto out_allocate_attrs; + +@@ -496,27 +474,32 @@ static int build_tokens_sysfs(struct platform_device *dev) + /* skip empty */ + if (da_tokens[i].tokenID == 0) + continue; ++ ++ token_entries[i].token = &da_tokens[i]; ++ + /* add location */ + location_name = kasprintf(GFP_KERNEL, "%04x_location", + da_tokens[i].tokenID); + if (location_name == NULL) + goto out_unwind_strings; +- sysfs_attr_init(&token_location_attrs[i].attr); +- token_location_attrs[i].attr.name = location_name; +- token_location_attrs[i].attr.mode = 0444; +- token_location_attrs[i].show = location_show; +- token_attrs[j++] = &token_location_attrs[i].attr; ++ ++ sysfs_attr_init(&token_entries[i].location_attr.attr); ++ token_entries[i].location_attr.attr.name = location_name; ++ token_entries[i].location_attr.attr.mode = 0444; ++ token_entries[i].location_attr.show = location_show; ++ token_attrs[j++] = &token_entries[i].location_attr.attr; + + /* add value */ + value_name = kasprintf(GFP_KERNEL, "%04x_value", + da_tokens[i].tokenID); + if (value_name == NULL) + goto loop_fail_create_value; +- sysfs_attr_init(&token_value_attrs[i].attr); +- token_value_attrs[i].attr.name = value_name; +- token_value_attrs[i].attr.mode = 0444; +- token_value_attrs[i].show = value_show; +- token_attrs[j++] = &token_value_attrs[i].attr; ++ ++ sysfs_attr_init(&token_entries[i].value_attr.attr); ++ token_entries[i].value_attr.attr.name = value_name; ++ token_entries[i].value_attr.attr.mode = 0444; ++ token_entries[i].value_attr.show = value_show; ++ token_attrs[j++] = &token_entries[i].value_attr.attr; + continue; + + loop_fail_create_value: +@@ -532,14 +515,12 @@ static int build_tokens_sysfs(struct platform_device *dev) + + out_unwind_strings: + while (i--) { +- kfree(token_location_attrs[i].attr.name); +- kfree(token_value_attrs[i].attr.name); ++ kfree(token_entries[i].location_attr.attr.name); ++ kfree(token_entries[i].value_attr.attr.name); + } + kfree(token_attrs); + out_allocate_attrs: +- kfree(token_value_attrs); +-out_allocate_value: +- kfree(token_location_attrs); ++ kfree(token_entries); + + return -ENOMEM; + } +@@ -551,12 +532,11 @@ static void free_group(struct platform_device *pdev) + sysfs_remove_group(&pdev->dev.kobj, + &smbios_attribute_group); + for (i = 0; i < da_num_tokens; i++) { +- kfree(token_location_attrs[i].attr.name); +- kfree(token_value_attrs[i].attr.name); ++ kfree(token_entries[i].location_attr.attr.name); ++ kfree(token_entries[i].value_attr.attr.name); + } + kfree(token_attrs); +- kfree(token_value_attrs); +- kfree(token_location_attrs); ++ kfree(token_entries); + } + + static int __init dell_smbios_init(void) +-- +2.43.0 + diff --git a/queue-6.6/scsi-ufs-core-quiesce-request-queues-before-checking.patch b/queue-6.6/scsi-ufs-core-quiesce-request-queues-before-checking.patch new file mode 100644 index 00000000000..d7c4eb00be6 --- /dev/null +++ b/queue-6.6/scsi-ufs-core-quiesce-request-queues-before-checking.patch @@ -0,0 +1,109 @@ +From 2eec26eb4182387aec8a37e4bd066e5e7c1a9619 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jun 2024 18:06:23 +0800 +Subject: scsi: ufs: core: Quiesce request queues before checking pending cmds + +From: Ziqi Chen + +[ Upstream commit 77691af484e28af7a692e511b9ed5ca63012ec6e ] + +In ufshcd_clock_scaling_prepare(), after SCSI layer is blocked, +ufshcd_pending_cmds() is called to check whether there are pending +transactions or not. And only if there are no pending transactions can we +proceed to kickstart the clock scaling sequence. + +ufshcd_pending_cmds() traverses over all SCSI devices and calls +sbitmap_weight() on their budget_map. sbitmap_weight() can be broken down +to three steps: + + 1. Calculate the nr outstanding bits set in the 'word' bitmap. + + 2. Calculate the nr outstanding bits set in the 'cleared' bitmap. + + 3. Subtract the result from step 1 by the result from step 2. + +This can lead to a race condition as outlined below: + +Assume there is one pending transaction in the request queue of one SCSI +device, say sda, and the budget token of this request is 0, the 'word' is +0x1 and the 'cleared' is 0x0. + + 1. When step 1 executes, it gets the result as 1. + + 2. Before step 2 executes, block layer tries to dispatch a new request to + sda. Since the SCSI layer is blocked, the request cannot pass through + SCSI but the block layer would do budget_get() and budget_put() to + sda's budget map regardless, so the 'word' has become 0x3 and 'cleared' + has become 0x2 (assume the new request got budget token 1). + + 3. When step 2 executes, it gets the result as 1. + + 4. When step 3 executes, it gets the result as 0, meaning there is no + pending transactions, which is wrong. + + Thread A Thread B + ufshcd_pending_cmds() __blk_mq_sched_dispatch_requests() + | | + sbitmap_weight(word) | + | scsi_mq_get_budget() + | | + | scsi_mq_put_budget() + | | + sbitmap_weight(cleared) + ... + +When this race condition happens, the clock scaling sequence is started +with transactions still in flight, leading to subsequent hibernate enter +failure, broken link, task abort and back to back error recovery. + +Fix this race condition by quiescing the request queues before calling +ufshcd_pending_cmds() so that block layer won't touch the budget map when +ufshcd_pending_cmds() is working on it. In addition, remove the SCSI layer +blocking/unblocking to reduce redundancies and latencies. + +Fixes: 8d077ede48c1 ("scsi: ufs: Optimize the command queueing code") +Co-developed-by: Can Guo +Signed-off-by: Can Guo +Signed-off-by: Ziqi Chen +Link: https://lore.kernel.org/r/1717754818-39863-1-git-send-email-quic_ziqichen@quicinc.com +Reviewed-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ufs/core/ufshcd.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c +index 589c90f4d4021..40689757a2690 100644 +--- a/drivers/ufs/core/ufshcd.c ++++ b/drivers/ufs/core/ufshcd.c +@@ -1267,7 +1267,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) + * make sure that there are no outstanding requests when + * clock scaling is in progress + */ +- ufshcd_scsi_block_requests(hba); ++ blk_mq_quiesce_tagset(&hba->host->tag_set); + mutex_lock(&hba->wb_mutex); + down_write(&hba->clk_scaling_lock); + +@@ -1276,7 +1276,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) + ret = -EBUSY; + up_write(&hba->clk_scaling_lock); + mutex_unlock(&hba->wb_mutex); +- ufshcd_scsi_unblock_requests(hba); ++ blk_mq_unquiesce_tagset(&hba->host->tag_set); + goto out; + } + +@@ -1297,7 +1297,7 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc + + mutex_unlock(&hba->wb_mutex); + +- ufshcd_scsi_unblock_requests(hba); ++ blk_mq_unquiesce_tagset(&hba->host->tag_set); + ufshcd_release(hba); + } + +-- +2.43.0 + diff --git a/queue-6.6/selftests-ftrace-fix-to-check-required-event-file.patch b/queue-6.6/selftests-ftrace-fix-to-check-required-event-file.patch new file mode 100644 index 00000000000..b1e7ff53c62 --- /dev/null +++ b/queue-6.6/selftests-ftrace-fix-to-check-required-event-file.patch @@ -0,0 +1,40 @@ +From a8fb477b3fc256c404054ff45207996c128412da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 09:00:22 +0900 +Subject: selftests/ftrace: Fix to check required event file + +From: Masami Hiramatsu (Google) + +[ Upstream commit f6c3c83db1d939ebdb8c8922748ae647d8126d91 ] + +The dynevent/test_duplicates.tc test case uses `syscalls/sys_enter_openat` +event for defining eprobe on it. Since this `syscalls` events depend on +CONFIG_FTRACE_SYSCALLS=y, if it is not set, the test will fail. + +Add the event file to `required` line so that the test will return +`unsupported` result. + +Fixes: 297e1dcdca3d ("selftests/ftrace: Add selftest for testing duplicate eprobes and kprobes") +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Shuah Khan +Signed-off-by: Sasha Levin +--- + .../testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc +index d3a79da215c8b..5f72abe6fa79b 100644 +--- a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc ++++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc +@@ -1,7 +1,7 @@ + #!/bin/sh + # SPDX-License-Identifier: GPL-2.0 + # description: Generic dynamic event - check if duplicate events are caught +-# requires: dynamic_events "e[:[/][]] . []":README ++# requires: dynamic_events "e[:[/][]] . []":README events/syscalls/sys_enter_openat + + echo 0 > events/enable + +-- +2.43.0 + diff --git a/queue-6.6/selftests-tracing-fix-event-filter-test-to-retry-up-.patch b/queue-6.6/selftests-tracing-fix-event-filter-test-to-retry-up-.patch new file mode 100644 index 00000000000..cc65a3a96ff --- /dev/null +++ b/queue-6.6/selftests-tracing-fix-event-filter-test-to-retry-up-.patch @@ -0,0 +1,79 @@ +From 5b24ac98dad1768293601187e5bc48da55de6a43 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 31 May 2024 18:43:37 +0900 +Subject: selftests/tracing: Fix event filter test to retry up to 10 times + +From: Masami Hiramatsu (Google) + +[ Upstream commit 0f42bdf59b4e428485aa922bef871bfa6cc505e0 ] + +Commit eb50d0f250e9 ("selftests/ftrace: Choose target function for filter +test from samples") choose the target function from samples, but sometimes +this test failes randomly because the target function does not hit at the +next time. So retry getting samples up to 10 times. + +Fixes: eb50d0f250e9 ("selftests/ftrace: Choose target function for filter test from samples") +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Shuah Khan +Signed-off-by: Sasha Levin +--- + .../test.d/filter/event-filter-function.tc | 20 ++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +index 3f74c09c56b62..118247b8dd84d 100644 +--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc ++++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +@@ -10,7 +10,6 @@ fail() { #msg + } + + sample_events() { +- echo > trace + echo 1 > events/kmem/kmem_cache_free/enable + echo 1 > tracing_on + ls > /dev/null +@@ -22,6 +21,7 @@ echo 0 > tracing_on + echo 0 > events/enable + + echo "Get the most frequently calling function" ++echo > trace + sample_events + + target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'` +@@ -32,7 +32,16 @@ echo > trace + + echo "Test event filter function name" + echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter ++ ++sample_events ++max_retry=10 ++while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do + sample_events ++max_retry=$((max_retry - 1)) ++if [ $max_retry -eq 0 ]; then ++ exit_fail ++fi ++done + + hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` + misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` +@@ -49,7 +58,16 @@ address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1` + + echo "Test event filter function address" + echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter ++echo > trace ++sample_events ++max_retry=10 ++while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do + sample_events ++max_retry=$((max_retry - 1)) ++if [ $max_retry -eq 0 ]; then ++ exit_fail ++fi ++done + + hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` + misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` +-- +2.43.0 + diff --git a/queue-6.6/series b/queue-6.6/series index 65acd1ae83e..f09292ca9b4 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -100,3 +100,82 @@ scsi-mpt3sas-avoid-test-set_bit-operating-in-non-allocated-memory.patch scsi-sd-use-read-16-when-reading-block-zero-on-large-capacity-disks.patch gve-clear-napi-skb-before-dev_kfree_skb_any.patch powerpc-uaccess-fix-build-errors-seen-with-gcc-13-14.patch +hid-nvidia-shield-add-missing-check-for-input_ff_cre.patch +cxl-test-add-missing-vmalloc.h-for-tools-testing-cxl.patch +cxl-region-fix-memregion-leaks-in-devm_cxl_add_regio.patch +cachefiles-add-output-string-to-cachefiles_obj_-get-.patch +cachefiles-remove-requests-from-xarray-during-flushi.patch +cachefiles-introduce-object-ondemand-state.patch +cachefiles-extract-ondemand-info-field-from-cachefil.patch +cachefiles-resend-an-open-request-if-the-read-reques.patch +cachefiles-add-spin_lock-for-cachefiles_ondemand_inf.patch +cachefiles-add-restore-command-to-recover-inflight-o.patch +cachefiles-fix-slab-use-after-free-in-cachefiles_ond.patch +cachefiles-fix-slab-use-after-free-in-cachefiles_ond.patch-25104 +cachefiles-remove-err_put_fd-label-in-cachefiles_ond.patch +cachefiles-never-get-a-new-anonymous-fd-if-ondemand_.patch +cachefiles-defer-exposing-anon_fd-until-after-copy_t.patch +cachefiles-flush-all-requests-after-setting-cachefil.patch +selftests-ftrace-fix-to-check-required-event-file.patch +clk-sifive-do-not-register-clkdevs-for-prci-clocks.patch +nfsv4.1-enforce-rootpath-check-in-fs_location-query.patch +sunrpc-return-proper-error-from-gss_wrap_req_priv.patch +nfs-add-barriers-when-testing-for-nfs_fsdata_blocked.patch +selftests-tracing-fix-event-filter-test-to-retry-up-.patch +nvme-fix-nvme_pr_-status-code-parsing.patch +drm-panel-sitronix-st7789v-add-check-for-of_drm_get_.patch +platform-x86-dell-smbios-fix-wrong-token-data-in-sys.patch +gpio-tqmx86-fix-typo-in-kconfig-label.patch +gpio-tqmx86-introduce-shadow-register-for-gpio-outpu.patch +gpio-tqmx86-store-irq-trigger-type-and-unmask-status.patch +gpio-tqmx86-fix-broken-irq_type_edge_both-interrupt-.patch +hid-core-remove-unnecessary-warn_on-in-implement.patch +iommu-amd-fix-sysfs-leak-in-iommu-init.patch +iommu-return-right-value-in-iommu_sva_bind_device.patch +io_uring-io-wq-use-set_bit-and-test_bit-at-worker-fl.patch +io_uring-io-wq-avoid-garbage-value-of-match-in-io_wq.patch +hid-logitech-dj-fix-memory-leak-in-logi_dj_recv_swit.patch +drm-vmwgfx-refactor-drm-connector-probing-for-displa.patch +drm-vmwgfx-filter-modes-which-exceed-graphics-memory.patch +drm-vmwgfx-3d-disabled-should-not-effect-stdu-memory.patch +drm-vmwgfx-remove-stdu-logic-from-generic-mode_valid.patch +drm-vmwgfx-don-t-memcmp-equivalent-pointers.patch +af_unix-return-struct-unix_sock-from-unix_get_socket.patch +af_unix-run-gc-on-only-one-cpu.patch +af_unix-try-to-run-gc-async.patch +af_unix-replace-bug_on-with-warn_on_once.patch +af_unix-save-listener-for-embryo-socket.patch +net-change-proto-and-proto_ops-accept-type.patch +af_unix-annotate-data-race-of-sk-sk_state-in-unix_ac.patch +modpost-do-not-warn-about-missing-module_description.patch +net-sfp-always-call-sfp_sm_mod_remove-on-remove.patch +net-hns3-fix-kernel-crash-problem-in-concurrent-scen.patch +net-hns3-add-cond_resched-to-hns3-ring-buffer-init-p.patch +liquidio-adjust-a-null-pointer-handling-path-in-lio_.patch +net-stmmac-dwmac-qcom-ethqos-configure-host-dma-widt.patch +drm-komeda-check-for-error-valued-pointer.patch +drm-bridge-panel-fix-runtime-warning-on-panel-bridge.patch +tcp-fix-race-in-tcp_v6_syn_recv_sock.patch +net-dsa-qca8k-fix-usages-of-device_get_named_child_n.patch +geneve-fix-incorrect-inner-network-header-offset-whe.patch +net-mlx5e-fix-features-validation-check-for-tunneled.patch +bluetooth-l2cap-fix-rejecting-l2cap_conn_param_updat.patch +bluetooth-fix-connection-setup-in-l2cap_connect.patch +netfilter-nft_inner-validate-mandatory-meta-and-payl.patch +netfilter-ipset-fix-race-between-namespace-cleanup-a.patch +netfilter-use-flowlabel-flow-key-when-re-routing-man.patch +x86-asm-use-c-n-instead-of-p-operand-modifier-in-asm.patch +x86-uaccess-fix-missed-zeroing-of-ia32-u64-get_user-.patch +scsi-ufs-core-quiesce-request-queues-before-checking.patch +net-pse-pd-use-eopnotsupp-error-code-instead-of-enot.patch +gve-ignore-nonrelevant-gso-type-bits-when-processing.patch +net-stmmac-replace-priv-speed-with-the-porttransmitr.patch +block-sed-opal-avoid-possible-wrong-address-referenc.patch +block-fix-request.queuelist-usage-in-flush.patch +nvmet-passthru-propagate-status-from-id-override-fun.patch +net-ipv6-fix-the-rt-cache-flush-via-sysctl-using-a-p.patch +net-bridge-mst-pass-vlan-group-directly-to-br_mst_vl.patch +net-bridge-mst-fix-suspicious-rcu-usage-in-br_mst_se.patch +ionic-fix-use-after-netif_napi_del.patch +af_unix-read-with-msg_peek-loops-if-the-first-unread.patch +bnxt_en-adjust-logging-of-firmware-messages-in-case-.patch diff --git a/queue-6.6/sunrpc-return-proper-error-from-gss_wrap_req_priv.patch b/queue-6.6/sunrpc-return-proper-error-from-gss_wrap_req_priv.patch new file mode 100644 index 00000000000..e581517af00 --- /dev/null +++ b/queue-6.6/sunrpc-return-proper-error-from-gss_wrap_req_priv.patch @@ -0,0 +1,40 @@ +From 8efeb825437b0575845c62e7cbba24de131e8e79 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 May 2024 16:47:16 +0800 +Subject: SUNRPC: return proper error from gss_wrap_req_priv + +From: Chen Hanxiao + +[ Upstream commit 33c94d7e3cb84f6d130678d6d59ba475a6c489cf ] + +don't return 0 if snd_buf->len really greater than snd_buf->buflen + +Signed-off-by: Chen Hanxiao +Fixes: 0c77668ddb4e ("SUNRPC: Introduce trace points in rpc_auth_gss.ko") +Reviewed-by: Benjamin Coddington +Reviewed-by: Chuck Lever +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + net/sunrpc/auth_gss/auth_gss.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c +index 1af71fbb0d805..00753bc5f1b14 100644 +--- a/net/sunrpc/auth_gss/auth_gss.c ++++ b/net/sunrpc/auth_gss/auth_gss.c +@@ -1875,8 +1875,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; + maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); + /* slack space should prevent this ever happening: */ +- if (unlikely(snd_buf->len > snd_buf->buflen)) ++ if (unlikely(snd_buf->len > snd_buf->buflen)) { ++ status = -EIO; + goto wrap_failed; ++ } + /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was + * done anyway, so it's safe to put the request on the wire: */ + if (maj_stat == GSS_S_CONTEXT_EXPIRED) +-- +2.43.0 + diff --git a/queue-6.6/tcp-fix-race-in-tcp_v6_syn_recv_sock.patch b/queue-6.6/tcp-fix-race-in-tcp_v6_syn_recv_sock.patch new file mode 100644 index 00000000000..06ad6fccdbc --- /dev/null +++ b/queue-6.6/tcp-fix-race-in-tcp_v6_syn_recv_sock.patch @@ -0,0 +1,54 @@ +From 4a61f51b1c0b103eed4521b4aaa633e3ca8e1264 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jun 2024 15:46:51 +0000 +Subject: tcp: fix race in tcp_v6_syn_recv_sock() + +From: Eric Dumazet + +[ Upstream commit d37fe4255abe8e7b419b90c5847e8ec2b8debb08 ] + +tcp_v6_syn_recv_sock() calls ip6_dst_store() before +inet_sk(newsk)->pinet6 has been set up. + +This means ip6_dst_store() writes over the parent (listener) +np->dst_cookie. + +This is racy because multiple threads could share the same +parent and their final np->dst_cookie could be wrong. + +Move ip6_dst_store() call after inet_sk(newsk)->pinet6 +has been changed and after the copy of parent ipv6_pinfo. + +Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") +Signed-off-by: Eric Dumazet +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/tcp_ipv6.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 3783334ef2332..07bcb690932e1 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1287,7 +1287,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + */ + + newsk->sk_gso_type = SKB_GSO_TCPV6; +- ip6_dst_store(newsk, dst, NULL, NULL); + inet6_sk_rx_dst_set(newsk, skb); + + inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); +@@ -1298,6 +1297,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + + memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + ++ ip6_dst_store(newsk, dst, NULL, NULL); ++ + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newnp->saddr = ireq->ir_v6_loc_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; +-- +2.43.0 + diff --git a/queue-6.6/x86-asm-use-c-n-instead-of-p-operand-modifier-in-asm.patch b/queue-6.6/x86-asm-use-c-n-instead-of-p-operand-modifier-in-asm.patch new file mode 100644 index 00000000000..403ab614f4f --- /dev/null +++ b/queue-6.6/x86-asm-use-c-n-instead-of-p-operand-modifier-in-asm.patch @@ -0,0 +1,163 @@ +From 07c9fdea0855a9f618e71246d17449111360fede Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Mar 2024 11:40:13 +0100 +Subject: x86/asm: Use %c/%n instead of %P operand modifier in asm templates + +From: Uros Bizjak + +[ Upstream commit 41cd2e1ee96e56401a18dbce6f42f0bdaebcbf3b ] + +The "P" asm operand modifier is a x86 target-specific modifier. + +When used with a constant, the "P" modifier emits "cst" instead of +"$cst". This property is currently used to emit the bare constant +without all syntax-specific prefixes. + +The generic "c" resp. "n" operand modifier should be used instead. + +No functional changes intended. + +Signed-off-by: Uros Bizjak +Signed-off-by: Ingo Molnar +Cc: Linus Torvalds +Cc: Josh Poimboeuf +Cc: Ard Biesheuvel +Cc: "H. Peter Anvin" +Link: https://lore.kernel.org/r/20240319104418.284519-3-ubizjak@gmail.com +Stable-dep-of: 8c860ed825cb ("x86/uaccess: Fix missed zeroing of ia32 u64 get_user() range checking") +Signed-off-by: Sasha Levin +--- + arch/x86/boot/main.c | 4 ++-- + arch/x86/include/asm/alternative.h | 22 +++++++++++----------- + arch/x86/include/asm/atomic64_32.h | 2 +- + arch/x86/include/asm/cpufeature.h | 2 +- + arch/x86/include/asm/irq_stack.h | 2 +- + arch/x86/include/asm/uaccess.h | 4 ++-- + 6 files changed, 18 insertions(+), 18 deletions(-) + +diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c +index c4ea5258ab558..9049f390d8347 100644 +--- a/arch/x86/boot/main.c ++++ b/arch/x86/boot/main.c +@@ -119,8 +119,8 @@ static void init_heap(void) + char *stack_end; + + if (boot_params.hdr.loadflags & CAN_USE_HEAP) { +- asm("leal %P1(%%esp),%0" +- : "=r" (stack_end) : "i" (-STACK_SIZE)); ++ asm("leal %n1(%%esp),%0" ++ : "=r" (stack_end) : "i" (STACK_SIZE)); + + heap_end = (char *) + ((size_t)boot_params.hdr.heap_end_ptr + 0x200); +diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h +index 65f79092c9d9e..cb9ce0f9e78e0 100644 +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -288,10 +288,10 @@ static inline int alternatives_text_reserved(void *start, void *end) + * Otherwise, if CPU has feature1, newinstr1 is used. + * Otherwise, oldinstr is used. + */ +-#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ +- ft_flags2, input...) \ +- asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ +- newinstr2, ft_flags2) \ ++#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ ++ ft_flags2, input...) \ ++ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ ++ newinstr2, ft_flags2) \ + : : "i" (0), ## input) + + /* Like alternative_input, but with a single output argument */ +@@ -301,7 +301,7 @@ static inline int alternatives_text_reserved(void *start, void *end) + + /* Like alternative_io, but for replacing a direct call with another one. */ + #define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ +- asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", ft_flags) \ ++ asm_inline volatile (ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ + : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) + + /* +@@ -310,12 +310,12 @@ static inline int alternatives_text_reserved(void *start, void *end) + * Otherwise, if CPU has feature1, function1 is used. + * Otherwise, old function is used. + */ +-#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ +- output, input...) \ +- asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", ft_flags1,\ +- "call %P[new2]", ft_flags2) \ +- : output, ASM_CALL_CONSTRAINT \ +- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ ++#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ ++ output, input...) \ ++ asm_inline volatile (ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ ++ "call %c[new2]", ft_flags2) \ ++ : output, ASM_CALL_CONSTRAINT \ ++ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ + [new2] "i" (newfunc2), ## input) + + /* +diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h +index 3486d91b8595f..d510405e4e1de 100644 +--- a/arch/x86/include/asm/atomic64_32.h ++++ b/arch/x86/include/asm/atomic64_32.h +@@ -24,7 +24,7 @@ typedef struct { + + #ifdef CONFIG_X86_CMPXCHG64 + #define __alternative_atomic64(f, g, out, in...) \ +- asm volatile("call %P[func]" \ ++ asm volatile("call %c[func]" \ + : out : [func] "i" (atomic64_##g##_cx8), ## in) + + #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) +diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h +index 686e92d2663ee..3508f3fc928d4 100644 +--- a/arch/x86/include/asm/cpufeature.h ++++ b/arch/x86/include/asm/cpufeature.h +@@ -173,7 +173,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); + static __always_inline bool _static_cpu_has(u16 bit) + { + asm goto( +- ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") ++ ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") + ".pushsection .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n" +diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h +index 798183867d789..b71ad173f8776 100644 +--- a/arch/x86/include/asm/irq_stack.h ++++ b/arch/x86/include/asm/irq_stack.h +@@ -100,7 +100,7 @@ + } + + #define ASM_CALL_ARG0 \ +- "call %P[__func] \n" \ ++ "call %c[__func] \n" \ + ASM_REACHABLE + + #define ASM_CALL_ARG1 \ +diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h +index 237dc8cdd12b9..0f9bab92a43d7 100644 +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -78,7 +78,7 @@ extern int __get_user_bad(void); + int __ret_gu; \ + register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ + __chk_user_ptr(ptr); \ +- asm volatile("call __" #fn "_%P4" \ ++ asm volatile("call __" #fn "_%c4" \ + : "=a" (__ret_gu), "=r" (__val_gu), \ + ASM_CALL_CONSTRAINT \ + : "0" (ptr), "i" (sizeof(*(ptr)))); \ +@@ -177,7 +177,7 @@ extern void __put_user_nocheck_8(void); + __chk_user_ptr(__ptr); \ + __ptr_pu = __ptr; \ + __val_pu = __x; \ +- asm volatile("call __" #fn "_%P[size]" \ ++ asm volatile("call __" #fn "_%c[size]" \ + : "=c" (__ret_pu), \ + ASM_CALL_CONSTRAINT \ + : "0" (__ptr_pu), \ +-- +2.43.0 + diff --git a/queue-6.6/x86-uaccess-fix-missed-zeroing-of-ia32-u64-get_user-.patch b/queue-6.6/x86-uaccess-fix-missed-zeroing-of-ia32-u64-get_user-.patch new file mode 100644 index 00000000000..9a9e49bafc3 --- /dev/null +++ b/queue-6.6/x86-uaccess-fix-missed-zeroing-of-ia32-u64-get_user-.patch @@ -0,0 +1,85 @@ +From 7ab098cc736d580bb6cbc817254078874bced3a5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jun 2024 14:02:27 -0700 +Subject: x86/uaccess: Fix missed zeroing of ia32 u64 get_user() range checking + +From: Kees Cook + +[ Upstream commit 8c860ed825cb85f6672cd7b10a8f33e3498a7c81 ] + +When reworking the range checking for get_user(), the get_user_8() case +on 32-bit wasn't zeroing the high register. (The jump to bad_get_user_8 +was accidentally dropped.) Restore the correct error handling +destination (and rename the jump to using the expected ".L" prefix). + +While here, switch to using a named argument ("size") for the call +template ("%c4" to "%c[size]") as already used in the other call +templates in this file. + +Found after moving the usercopy selftests to KUnit: + + # usercopy_test_invalid: EXPECTATION FAILED at + lib/usercopy_kunit.c:278 + Expected val_u64 == 0, but + val_u64 == -60129542144 (0xfffffff200000000) + +Closes: https://lore.kernel.org/all/CABVgOSn=tb=Lj9SxHuT4_9MTjjKVxsq-ikdXC4kGHO4CfKVmGQ@mail.gmail.com +Fixes: b19b74bc99b1 ("x86/mm: Rework address range check in get_user() and put_user()") +Reported-by: David Gow +Signed-off-by: Kees Cook +Signed-off-by: Dave Hansen +Reviewed-by: Kirill A. Shutemov +Reviewed-by: Qiuxu Zhuo +Tested-by: David Gow +Link: https://lore.kernel.org/all/20240610210213.work.143-kees%40kernel.org +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/uaccess.h | 4 ++-- + arch/x86/lib/getuser.S | 6 +++++- + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h +index 0f9bab92a43d7..3a7755c1a4410 100644 +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -78,10 +78,10 @@ extern int __get_user_bad(void); + int __ret_gu; \ + register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ + __chk_user_ptr(ptr); \ +- asm volatile("call __" #fn "_%c4" \ ++ asm volatile("call __" #fn "_%c[size]" \ + : "=a" (__ret_gu), "=r" (__val_gu), \ + ASM_CALL_CONSTRAINT \ +- : "0" (ptr), "i" (sizeof(*(ptr)))); \ ++ : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \ + instrument_get_user(__val_gu); \ + (x) = (__force __typeof__(*(ptr))) __val_gu; \ + __builtin_expect(__ret_gu, 0); \ +diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S +index f6aad480febd3..6913fbce6544f 100644 +--- a/arch/x86/lib/getuser.S ++++ b/arch/x86/lib/getuser.S +@@ -44,7 +44,11 @@ + or %rdx, %rax + .else + cmp $TASK_SIZE_MAX-\size+1, %eax ++.if \size != 8 + jae .Lbad_get_user ++.else ++ jae .Lbad_get_user_8 ++.endif + sbb %edx, %edx /* array_index_mask_nospec() */ + and %edx, %eax + .endif +@@ -154,7 +158,7 @@ SYM_CODE_END(__get_user_handle_exception) + #ifdef CONFIG_X86_32 + SYM_CODE_START_LOCAL(__get_user_8_handle_exception) + ASM_CLAC +-bad_get_user_8: ++.Lbad_get_user_8: + xor %edx,%edx + xor %ecx,%ecx + mov $(-EFAULT),%_ASM_AX +-- +2.43.0 +