--- /dev/null
+From 49ecaf4e697d3517a0b6e0fb4ea6f2cd140f2fb1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:28 -0700
+Subject: af_unix: Annodate data-races around sk->sk_state for writers.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 942238f9735a4a4ebf8274b218d9a910158941d1 ]
+
+sk->sk_state is changed under unix_state_lock(), but it's read locklessly
+in many places.
+
+This patch adds WRITE_ONCE() on the writer side.
+
+We will add READ_ONCE() to the lockless readers in the following patches.
+
+Fixes: 83301b5367a9 ("af_unix: Set TCP_ESTABLISHED for datagram sockets too")
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index c0cf7137979c7..0a9c3975d4303 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -616,7 +616,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
+ u->path.dentry = NULL;
+ u->path.mnt = NULL;
+ state = sk->sk_state;
+- sk->sk_state = TCP_CLOSE;
++ WRITE_ONCE(sk->sk_state, TCP_CLOSE);
+
+ skpair = unix_peer(sk);
+ unix_peer(sk) = NULL;
+@@ -738,7 +738,8 @@ static int unix_listen(struct socket *sock, int backlog)
+ if (backlog > sk->sk_max_ack_backlog)
+ wake_up_interruptible_all(&u->peer_wait);
+ sk->sk_max_ack_backlog = backlog;
+- sk->sk_state = TCP_LISTEN;
++ WRITE_ONCE(sk->sk_state, TCP_LISTEN);
++
+ /* set credentials so connect can copy them */
+ init_peercred(sk);
+ err = 0;
+@@ -1401,7 +1402,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
+ if (err)
+ goto out_unlock;
+
+- sk->sk_state = other->sk_state = TCP_ESTABLISHED;
++ WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
++ WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
+ } else {
+ /*
+ * 1003.1g breaking connected state with AF_UNSPEC
+@@ -1418,7 +1420,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
+
+ unix_peer(sk) = other;
+ if (!other)
+- sk->sk_state = TCP_CLOSE;
++ WRITE_ONCE(sk->sk_state, TCP_CLOSE);
+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
+
+ unix_state_double_unlock(sk, other);
+@@ -1638,7 +1640,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ copy_peercred(sk, other);
+
+ sock->state = SS_CONNECTED;
+- sk->sk_state = TCP_ESTABLISHED;
++ WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
+ sock_hold(newsk);
+
+ smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
+@@ -2097,7 +2099,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
+ unix_peer(sk) = NULL;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
+- sk->sk_state = TCP_CLOSE;
++ WRITE_ONCE(sk->sk_state, TCP_CLOSE);
+ unix_state_unlock(sk);
+
+ unix_dgram_disconnected(sk, other);
+--
+2.43.0
+
--- /dev/null
+From fa334c18493eaa1b861ea362d42c6c47b012164f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:37 -0700
+Subject: af_unix: Annotate data-race of net->unx.sysctl_max_dgram_qlen.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit bd9f2d05731f6a112d0c7391a0d537bfc588dbe6 ]
+
+net->unx.sysctl_max_dgram_qlen is exposed as a sysctl knob and can be
+changed concurrently.
+
+Let's use READ_ONCE() in unix_create1().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 84bc1de2fd967..0c217ac17e053 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -976,7 +976,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
+ sk->sk_hash = unix_unbound_hash(sk);
+ sk->sk_allocation = GFP_KERNEL_ACCOUNT;
+ sk->sk_write_space = unix_write_space;
+- sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
++ sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
+ sk->sk_destruct = unix_sock_destructor;
+ u = unix_sk(sk);
+ u->inflight = 0;
+--
+2.43.0
+
--- /dev/null
+From 86ca303fca7ea63419e54c0d8d06d14807bad81e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:41 -0700
+Subject: af_unix: Annotate data-race of sk->sk_shutdown in sk_diag_fill().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit efaf24e30ec39ebbea9112227485805a48b0ceb1 ]
+
+While dumping sockets via UNIX_DIAG, we do not hold unix_state_lock().
+
+Let's use READ_ONCE() to read sk->sk_shutdown.
+
+Fixes: e4e541a84863 ("sock-diag: Report shutdown for inet and unix sockets (v2)")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/diag.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/unix/diag.c b/net/unix/diag.c
+index 321336f91a0af..937edf4afed41 100644
+--- a/net/unix/diag.c
++++ b/net/unix/diag.c
+@@ -165,7 +165,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
+ sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO))
+ goto out_nlmsg_trim;
+
+- if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
++ if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, READ_ONCE(sk->sk_shutdown)))
+ goto out_nlmsg_trim;
+
+ if ((req->udiag_show & UDIAG_SHOW_UID) &&
+--
+2.43.0
+
--- /dev/null
+From a85c82051706b12984d0aedf0dfcf532a4c5f732 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:29 -0700
+Subject: af_unix: Annotate data-race of sk->sk_state in unix_inq_len().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 3a0f38eb285c8c2eead4b3230c7ac2983707599d ]
+
+ioctl(SIOCINQ) calls unix_inq_len() that checks sk->sk_state first
+and returns -EINVAL if it's TCP_LISTEN.
+
+Then, for SOCK_STREAM sockets, unix_inq_len() returns the number of
+bytes in recvq.
+
+However, unix_inq_len() does not hold unix_state_lock(), and the
+concurrent listen() might change the state after checking sk->sk_state.
+
+If the race occurs, 0 is returned for the listener, instead of -EINVAL,
+because the length of skb with embryo is 0.
+
+We could hold unix_state_lock() in unix_inq_len(), but it's overkill
+given the result is true for pre-listen() TCP_CLOSE state.
+
+So, let's use READ_ONCE() for sk->sk_state in unix_inq_len().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 0a9c3975d4303..989c2c76dce66 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -3064,7 +3064,7 @@ long unix_inq_len(struct sock *sk)
+ struct sk_buff *skb;
+ long amount = 0;
+
+- if (sk->sk_state == TCP_LISTEN)
++ if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
+ return -EINVAL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+--
+2.43.0
+
--- /dev/null
+From 3dc49cfa98ad3704078850e4b019d5c284cc6199 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:31 -0700
+Subject: af_unix: Annotate data-race of sk->sk_state in unix_stream_connect().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit a9bf9c7dc6a5899c01cb8f6e773a66315a5cd4b7 ]
+
+As small optimisation, unix_stream_connect() prefetches the client's
+sk->sk_state without unix_state_lock() and checks if it's TCP_CLOSE.
+
+Later, sk->sk_state is checked again under unix_state_lock().
+
+Let's use READ_ONCE() for the first check and TCP_CLOSE directly for
+the second check.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index a3eb241eb064e..f95aba56425fa 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -1481,7 +1481,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ struct sk_buff *skb = NULL;
+ long timeo;
+ int err;
+- int st;
+
+ err = unix_validate_addr(sunaddr, addr_len);
+ if (err)
+@@ -1571,9 +1570,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+
+ Well, and we have to recheck the state after socket locked.
+ */
+- st = sk->sk_state;
+-
+- switch (st) {
++ switch (READ_ONCE(sk->sk_state)) {
+ case TCP_CLOSE:
+ /* This is ok... continue with connect */
+ break;
+@@ -1588,7 +1585,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+
+ unix_state_lock_nested(sk, U_LOCK_SECOND);
+
+- if (sk->sk_state != st) {
++ if (sk->sk_state != TCP_CLOSE) {
+ unix_state_unlock(sk);
+ unix_state_unlock(other);
+ sock_put(other);
+--
+2.43.0
+
--- /dev/null
+From e635b3501f721e88a7722695e49bd4260acb5d4b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:34 -0700
+Subject: af_unix: Annotate data-race of sk->sk_state in
+ unix_stream_read_skb().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit af4c733b6b1aded4dc808fafece7dfe6e9d2ebb3 ]
+
+unix_stream_read_skb() is called from sk->sk_data_ready() context
+where unix_state_lock() is not held.
+
+Let's use READ_ONCE() there.
+
+Fixes: 77462de14a43 ("af_unix: Add read_sock for stream socket types")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index d92e664032121..9f266a7679cbc 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2706,7 +2706,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
+
+ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+ {
+- if (unlikely(sk->sk_state != TCP_ESTABLISHED))
++ if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
+ return -ENOTCONN;
+
+ return unix_read_skb(sk, recv_actor);
+--
+2.43.0
+
--- /dev/null
+From b0197ba851a3df3627b336181494acff9065bb19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:36 -0700
+Subject: af_unix: Annotate data-races around sk->sk_sndbuf.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit b0632e53e0da8054e36bc973f0eec69d30f1b7c6 ]
+
+sk_setsockopt() changes sk->sk_sndbuf under lock_sock(), but it's
+not used in af_unix.c.
+
+Let's use READ_ONCE() to read sk->sk_sndbuf in unix_writable(),
+unix_dgram_sendmsg(), and unix_stream_sendmsg().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 9f266a7679cbc..84bc1de2fd967 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -533,7 +533,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+ static int unix_writable(const struct sock *sk, unsigned char state)
+ {
+ return state != TCP_LISTEN &&
+- (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
++ (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
+ }
+
+ static void unix_write_space(struct sock *sk)
+@@ -2014,7 +2014,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
+ }
+
+ err = -EMSGSIZE;
+- if (len > sk->sk_sndbuf - 32)
++ if (len > READ_ONCE(sk->sk_sndbuf) - 32)
+ goto out;
+
+ if (len > SKB_MAX_ALLOC) {
+@@ -2294,7 +2294,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+ &err, 0);
+ } else {
+ /* Keep two messages in the pipe so it schedules better */
+- size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
++ size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
+
+ /* allow fallback to order-0 allocations */
+ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
+--
+2.43.0
+
--- /dev/null
+From ed6ce48cf979b49ab1108e0d84c960efacdad476 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:33 -0700
+Subject: af_unix: Annotate data-races around sk->sk_state in sendmsg() and
+ recvmsg().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 8a34d4e8d9742a24f74998f45a6a98edd923319b ]
+
+The following functions read sk->sk_state locklessly and proceed only if
+the state is TCP_ESTABLISHED.
+
+ * unix_stream_sendmsg
+ * unix_stream_read_generic
+ * unix_seqpacket_sendmsg
+ * unix_seqpacket_recvmsg
+
+Let's use READ_ONCE() there.
+
+Fixes: a05d2ad1c1f3 ("af_unix: Only allow recv on connected seqpacket sockets.")
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index f95aba56425fa..d92e664032121 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2273,7 +2273,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+ }
+
+ if (msg->msg_namelen) {
+- err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
++ err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
+ goto out_err;
+ } else {
+ err = -ENOTCONN;
+@@ -2387,7 +2387,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
+ if (err)
+ return err;
+
+- if (sk->sk_state != TCP_ESTABLISHED)
++ if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
+ return -ENOTCONN;
+
+ if (msg->msg_namelen)
+@@ -2401,7 +2401,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
+ {
+ struct sock *sk = sock->sk;
+
+- if (sk->sk_state != TCP_ESTABLISHED)
++ if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
+ return -ENOTCONN;
+
+ return unix_dgram_recvmsg(sock, msg, size, flags);
+@@ -2730,7 +2730,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
+ size_t size = state->size;
+ unsigned int last_len;
+
+- if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
++ if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
+ err = -EINVAL;
+ goto out;
+ }
+--
+2.43.0
+
--- /dev/null
+From b140402b6bd6105319b4023a543ea4227f98e817 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:30 -0700
+Subject: af_unix: Annotate data-races around sk->sk_state in
+ unix_write_space() and poll().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit eb0718fb3e97ad0d6f4529b810103451c90adf94 ]
+
+unix_poll() and unix_dgram_poll() read sk->sk_state locklessly and
+calls unix_writable() which also reads sk->sk_state without holding
+unix_state_lock().
+
+Let's use READ_ONCE() in unix_poll() and unix_dgram_poll() and pass
+it to unix_writable().
+
+While at it, we remove TCP_SYN_SENT check in unix_dgram_poll() as
+that state does not exist for AF_UNIX socket since the code was added.
+
+Fixes: 1586a5877db9 ("af_unix: do not report POLLOUT on listeners")
+Fixes: 3c73419c09a5 ("af_unix: fix 'poll for write'/ connected DGRAM sockets")
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 25 ++++++++++++-------------
+ 1 file changed, 12 insertions(+), 13 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 989c2c76dce66..a3eb241eb064e 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -530,9 +530,9 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+ return 0;
+ }
+
+-static int unix_writable(const struct sock *sk)
++static int unix_writable(const struct sock *sk, unsigned char state)
+ {
+- return sk->sk_state != TCP_LISTEN &&
++ return state != TCP_LISTEN &&
+ (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+ }
+
+@@ -541,7 +541,7 @@ static void unix_write_space(struct sock *sk)
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+- if (unix_writable(sk)) {
++ if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
+ wq = rcu_dereference(sk->sk_wq);
+ if (skwq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait,
+@@ -3176,12 +3176,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
+ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
+ {
+ struct sock *sk = sock->sk;
++ unsigned char state;
+ __poll_t mask;
+ u8 shutdown;
+
+ sock_poll_wait(file, sock, wait);
+ mask = 0;
+ shutdown = READ_ONCE(sk->sk_shutdown);
++ state = READ_ONCE(sk->sk_state);
+
+ /* exceptional events? */
+ if (READ_ONCE(sk->sk_err))
+@@ -3203,14 +3205,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
+
+ /* Connection-based need to check for termination and startup */
+ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
+- sk->sk_state == TCP_CLOSE)
++ state == TCP_CLOSE)
+ mask |= EPOLLHUP;
+
+ /*
+ * we set writable also when the other side has shut down the
+ * connection. This prevents stuck sockets.
+ */
+- if (unix_writable(sk))
++ if (unix_writable(sk, state))
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
+
+ return mask;
+@@ -3221,12 +3223,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
+ {
+ struct sock *sk = sock->sk, *other;
+ unsigned int writable;
++ unsigned char state;
+ __poll_t mask;
+ u8 shutdown;
+
+ sock_poll_wait(file, sock, wait);
+ mask = 0;
+ shutdown = READ_ONCE(sk->sk_shutdown);
++ state = READ_ONCE(sk->sk_state);
+
+ /* exceptional events? */
+ if (READ_ONCE(sk->sk_err) ||
+@@ -3246,19 +3250,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ /* Connection-based need to check for termination and startup */
+- if (sk->sk_type == SOCK_SEQPACKET) {
+- if (sk->sk_state == TCP_CLOSE)
+- mask |= EPOLLHUP;
+- /* connection hasn't started yet? */
+- if (sk->sk_state == TCP_SYN_SENT)
+- return mask;
+- }
++ if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
++ mask |= EPOLLHUP;
+
+ /* No write status requested, avoid expensive OUT tests. */
+ if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
+ return mask;
+
+- writable = unix_writable(sk);
++ writable = unix_writable(sk, state);
+ if (writable) {
+ unix_state_lock(sk);
+
+--
+2.43.0
+
--- /dev/null
+From 215c5156fe83c962fd00a0ed04640080c5446dcf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:35 -0700
+Subject: af_unix: Annotate data-races around sk->sk_state in UNIX_DIAG.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 0aa3be7b3e1f8f997312cc4705f8165e02806f8f ]
+
+While dumping AF_UNIX sockets via UNIX_DIAG, sk->sk_state is read
+locklessly.
+
+Let's use READ_ONCE() there.
+
+Note that the result could be inconsistent if the socket is dumped
+during the state change. This is common for other SOCK_DIAG and
+similar interfaces.
+
+Fixes: c9da99e6475f ("unix_diag: Fixup RQLEN extension report")
+Fixes: 2aac7a2cb0d9 ("unix_diag: Pending connections IDs NLA")
+Fixes: 45a96b9be6ec ("unix_diag: Dumping all sockets core")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/diag.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/unix/diag.c b/net/unix/diag.c
+index ae39538c5042b..116cf508aea4a 100644
+--- a/net/unix/diag.c
++++ b/net/unix/diag.c
+@@ -65,7 +65,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
+ u32 *buf;
+ int i;
+
+- if (sk->sk_state == TCP_LISTEN) {
++ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
+ spin_lock(&sk->sk_receive_queue.lock);
+
+ attr = nla_reserve(nlskb, UNIX_DIAG_ICONS,
+@@ -103,7 +103,7 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
+ {
+ struct unix_diag_rqlen rql;
+
+- if (sk->sk_state == TCP_LISTEN) {
++ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
+ rql.udiag_rqueue = sk->sk_receive_queue.qlen;
+ rql.udiag_wqueue = sk->sk_max_ack_backlog;
+ } else {
+@@ -136,7 +136,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
+ rep = nlmsg_data(nlh);
+ rep->udiag_family = AF_UNIX;
+ rep->udiag_type = sk->sk_type;
+- rep->udiag_state = sk->sk_state;
++ rep->udiag_state = READ_ONCE(sk->sk_state);
+ rep->pad = 0;
+ rep->udiag_ino = sk_ino;
+ sock_diag_save_cookie(sk, rep->udiag_cookie);
+@@ -215,7 +215,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+ sk_for_each(sk, &net->unx.table.buckets[slot]) {
+ if (num < s_num)
+ goto next;
+- if (!(req->udiag_states & (1 << sk->sk_state)))
++ if (!(req->udiag_states & (1 << READ_ONCE(sk->sk_state))))
+ goto next;
+ if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk),
+ NETLINK_CB(cb->skb).portid,
+--
+2.43.0
+
--- /dev/null
+From 7d9b471b9edf17dd787fc660c619d4fd24473020 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:27 -0700
+Subject: af_unix: Set sk->sk_state under unix_state_lock() for truly
+ disconencted peer.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 26bfb8b57063f52b867f9b6c8d1742fcb5bd656c ]
+
+When a SOCK_DGRAM socket connect()s to another socket, the both sockets'
+sk->sk_state are changed to TCP_ESTABLISHED so that we can register them
+to BPF SOCKMAP.
+
+When the socket disconnects from the peer by connect(AF_UNSPEC), the state
+is set back to TCP_CLOSE.
+
+Then, the peer's state is also set to TCP_CLOSE, but the update is done
+locklessly and unconditionally.
+
+Let's say socket A connect()ed to B, B connect()ed to C, and A disconnects
+from B.
+
+After the first two connect()s, all three sockets' sk->sk_state are
+TCP_ESTABLISHED:
+
+ $ ss -xa
+ Netid State Recv-Q Send-Q Local Address:Port Peer Address:PortProcess
+ u_dgr ESTAB 0 0 @A 641 * 642
+ u_dgr ESTAB 0 0 @B 642 * 643
+ u_dgr ESTAB 0 0 @C 643 * 0
+
+And after the disconnect, B's state is TCP_CLOSE even though it's still
+connected to C and C's state is TCP_ESTABLISHED.
+
+ $ ss -xa
+ Netid State Recv-Q Send-Q Local Address:Port Peer Address:PortProcess
+ u_dgr UNCONN 0 0 @A 641 * 0
+ u_dgr UNCONN 0 0 @B 642 * 643
+ u_dgr ESTAB 0 0 @C 643 * 0
+
+In this case, we cannot register B to SOCKMAP.
+
+So, when a socket disconnects from the peer, we should not set TCP_CLOSE to
+the peer if the peer is connected to yet another socket, and this must be
+done under unix_state_lock().
+
+Note that we use WRITE_ONCE() for sk->sk_state as there are many lockless
+readers. These data-races will be fixed in the following patches.
+
+Fixes: 83301b5367a9 ("af_unix: Set TCP_ESTABLISHED for datagram sockets too")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 439c531744a27..c0cf7137979c7 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -570,7 +570,6 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
+ sk_error_report(other);
+ }
+ }
+- other->sk_state = TCP_CLOSE;
+ }
+
+ static void unix_sock_destructor(struct sock *sk)
+@@ -1424,8 +1423,15 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
+
+ unix_state_double_unlock(sk, other);
+
+- if (other != old_peer)
++ if (other != old_peer) {
+ unix_dgram_disconnected(sk, old_peer);
++
++ unix_state_lock(old_peer);
++ if (!unix_peer(old_peer))
++ WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
++ unix_state_unlock(old_peer);
++ }
++
+ sock_put(old_peer);
+ } else {
+ unix_peer(sk) = other;
+--
+2.43.0
+
--- /dev/null
+From 76bb725ce26cf931c09528a5f9924c37b9bb9d1a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:39 -0700
+Subject: af_unix: Use skb_queue_empty_lockless() in unix_release_sock().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 83690b82d228b3570565ebd0b41873933238b97f ]
+
+If the socket type is SOCK_STREAM or SOCK_SEQPACKET, unix_release_sock()
+checks the length of the peer socket's recvq under unix_state_lock().
+
+However, unix_stream_read_generic() calls skb_unlink() after releasing
+the lock. Also, for SOCK_SEQPACKET, __skb_try_recv_datagram() unlinks
+skb without unix_state_lock().
+
+Thues, unix_state_lock() does not protect qlen.
+
+Let's use skb_queue_empty_lockless() in unix_release_sock().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index f0760afad71fe..cbc011ceb89b4 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -631,7 +631,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
+ unix_state_lock(skpair);
+ /* No more writes */
+ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
+- if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
+ WRITE_ONCE(skpair->sk_err, ECONNRESET);
+ unix_state_unlock(skpair);
+ skpair->sk_state_change(skpair);
+--
+2.43.0
+
--- /dev/null
+From abcb17ec9a23b8a8e08fe94b8e511b042e9b0dd9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:40 -0700
+Subject: af_unix: Use skb_queue_len_lockless() in sk_diag_show_rqlen().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 5d915e584d8408211d4567c22685aae8820bfc55 ]
+
+We can dump the socket queue length via UNIX_DIAG by specifying
+UDIAG_SHOW_RQLEN.
+
+If sk->sk_state is TCP_LISTEN, we return the recv queue length,
+but here we do not hold recvq lock.
+
+Let's use skb_queue_len_lockless() in sk_diag_show_rqlen().
+
+Fixes: c9da99e6475f ("unix_diag: Fixup RQLEN extension report")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/diag.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/unix/diag.c b/net/unix/diag.c
+index 116cf508aea4a..321336f91a0af 100644
+--- a/net/unix/diag.c
++++ b/net/unix/diag.c
+@@ -104,7 +104,7 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
+ struct unix_diag_rqlen rql;
+
+ if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
+- rql.udiag_rqueue = sk->sk_receive_queue.qlen;
++ rql.udiag_rqueue = skb_queue_len_lockless(&sk->sk_receive_queue);
+ rql.udiag_wqueue = sk->sk_max_ack_backlog;
+ } else {
+ rql.udiag_rqueue = (u32) unix_inq_len(sk);
+--
+2.43.0
+
--- /dev/null
+From b402a8d92bad090ad7ea45a7fa5d81c149ee3f2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 09:52:38 -0700
+Subject: af_unix: Use unix_recvq_full_lockless() in unix_stream_connect().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 45d872f0e65593176d880ec148f41ad7c02e40a7 ]
+
+Once sk->sk_state is changed to TCP_LISTEN, it never changes.
+
+unix_accept() takes advantage of this characteristics; it does not
+hold the listener's unix_state_lock() and only acquires recvq lock
+to pop one skb.
+
+It means unix_state_lock() does not prevent the queue length from
+changing in unix_stream_connect().
+
+Thus, we need to use unix_recvq_full_lockless() to avoid data-race.
+
+Now we remove unix_recvq_full() as no one uses it.
+
+Note that we can remove READ_ONCE() for sk->sk_max_ack_backlog in
+unix_recvq_full_lockless() because of the following reasons:
+
+ (1) For SOCK_DGRAM, it is a written-once field in unix_create1()
+
+ (2) For SOCK_STREAM and SOCK_SEQPACKET, it is changed under the
+ listener's unix_state_lock() in unix_listen(), and we hold
+ the lock in unix_stream_connect()
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 0c217ac17e053..f0760afad71fe 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -221,15 +221,9 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk)
+ return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
+ }
+
+-static inline int unix_recvq_full(const struct sock *sk)
+-{
+- return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
+-}
+-
+ static inline int unix_recvq_full_lockless(const struct sock *sk)
+ {
+- return skb_queue_len_lockless(&sk->sk_receive_queue) >
+- READ_ONCE(sk->sk_max_ack_backlog);
++ return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
+ }
+
+ struct sock *unix_peer_get(struct sock *s)
+@@ -1545,7 +1539,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ if (other->sk_shutdown & RCV_SHUTDOWN)
+ goto out_unlock;
+
+- if (unix_recvq_full(other)) {
++ if (unix_recvq_full_lockless(other)) {
+ err = -EAGAIN;
+ if (!timeo)
+ goto out_unlock;
+--
+2.43.0
+
--- /dev/null
+From 6092a21f3f6dda8dad72966bfd716109dc49050e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 17:02:43 -0400
+Subject: ax25: Fix refcount imbalance on inbound connections
+
+From: Lars Kellogg-Stedman <lars@oddbit.com>
+
+[ Upstream commit 3c34fb0bd4a4237592c5ecb5b2e2531900c55774 ]
+
+When releasing a socket in ax25_release(), we call netdev_put() to
+decrease the refcount on the associated ax.25 device. However, the
+execution path for accepting an incoming connection never calls
+netdev_hold(). This imbalance leads to refcount errors, and ultimately
+to kernel crashes.
+
+A typical call trace for the above situation will start with one of the
+following errors:
+
+ refcount_t: decrement hit 0; leaking memory.
+ refcount_t: underflow; use-after-free.
+
+And will then have a trace like:
+
+ Call Trace:
+ <TASK>
+ ? show_regs+0x64/0x70
+ ? __warn+0x83/0x120
+ ? refcount_warn_saturate+0xb2/0x100
+ ? report_bug+0x158/0x190
+ ? prb_read_valid+0x20/0x30
+ ? handle_bug+0x3e/0x70
+ ? exc_invalid_op+0x1c/0x70
+ ? asm_exc_invalid_op+0x1f/0x30
+ ? refcount_warn_saturate+0xb2/0x100
+ ? refcount_warn_saturate+0xb2/0x100
+ ax25_release+0x2ad/0x360
+ __sock_release+0x35/0xa0
+ sock_close+0x19/0x20
+ [...]
+
+On reboot (or any attempt to remove the interface), the kernel gets
+stuck in an infinite loop:
+
+ unregister_netdevice: waiting for ax0 to become free. Usage count = 0
+
+This patch corrects these issues by ensuring that we call netdev_hold()
+and ax25_dev_hold() for new connections in ax25_accept(). This makes the
+logic leading to ax25_accept() match the logic for ax25_bind(): in both
+cases we increment the refcount, which is ultimately decremented in
+ax25_release().
+
+Fixes: 9fd75b66b8f6 ("ax25: Fix refcount leaks caused by ax25_cb_del()")
+Signed-off-by: Lars Kellogg-Stedman <lars@oddbit.com>
+Tested-by: Duoming Zhou <duoming@zju.edu.cn>
+Tested-by: Dan Cross <crossd@gmail.com>
+Tested-by: Chris Maness <christopher.maness@gmail.com>
+Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
+Link: https://lore.kernel.org/r/20240529210242.3346844-2-lars@oddbit.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ax25/af_ax25.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
+index 9169efb2f43aa..5fff5930e4deb 100644
+--- a/net/ax25/af_ax25.c
++++ b/net/ax25/af_ax25.c
+@@ -1378,8 +1378,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
+ {
+ struct sk_buff *skb;
+ struct sock *newsk;
++ ax25_dev *ax25_dev;
+ DEFINE_WAIT(wait);
+ struct sock *sk;
++ ax25_cb *ax25;
+ int err = 0;
+
+ if (sock->state != SS_UNCONNECTED)
+@@ -1434,6 +1436,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
+ kfree_skb(skb);
+ sk_acceptq_removed(sk);
+ newsock->state = SS_CONNECTED;
++ ax25 = sk_to_ax25(newsk);
++ ax25_dev = ax25->ax25_dev;
++ netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC);
++ ax25_dev_hold(ax25_dev);
+
+ out:
+ release_sock(sk);
+--
+2.43.0
+
--- /dev/null
+From d995df1ec95cc67e5ba7085df4fce93c8085dd41 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 May 2024 13:17:33 +0800
+Subject: ax25: Replace kfree() in ax25_dev_free() with ax25_dev_put()
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit 166fcf86cd34e15c7f383eda4642d7a212393008 ]
+
+The object "ax25_dev" is managed by reference counting. Thus it should
+not be directly released by kfree(), replace with ax25_dev_put().
+
+Fixes: d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs")
+Suggested-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
+Link: https://lore.kernel.org/r/20240530051733.11416-1-duoming@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ax25/ax25_dev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
+index c9d55b99a7a57..67ae6b8c52989 100644
+--- a/net/ax25/ax25_dev.c
++++ b/net/ax25/ax25_dev.c
+@@ -193,7 +193,7 @@ void __exit ax25_dev_free(void)
+ list_for_each_entry_safe(s, n, &ax25_dev_list, list) {
+ netdev_put(s->dev, &s->dev_tracker);
+ list_del(&s->list);
+- kfree(s);
++ ax25_dev_put(s);
+ }
+ spin_unlock_bh(&ax25_dev_lock);
+ }
+--
+2.43.0
+
--- /dev/null
+From ddf3a5f5dd289024e4684f1950956ba3658622ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 2 Jun 2024 11:27:03 -0700
+Subject: bpf: Fix a potential use-after-free in bpf_link_free()
+
+From: Cong Wang <cong.wang@bytedance.com>
+
+[ Upstream commit 2884dc7d08d98a89d8d65121524bb7533183a63a ]
+
+After commit 1a80dbcb2dba, bpf_link can be freed by
+link->ops->dealloc_deferred, but the code still tests and uses
+link->ops->dealloc afterward, which leads to a use-after-free as
+reported by syzbot. Actually, one of them should be sufficient, so
+just call one of them instead of both. Also add a WARN_ON() in case
+of any problematic implementation.
+
+Fixes: 1a80dbcb2dba ("bpf: support deferring bpf_link dealloc to after RCU grace period")
+Reported-by: syzbot+1989ee16d94720836244@syzkaller.appspotmail.com
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Jiri Olsa <jolsa@kernel.org>
+Link: https://lore.kernel.org/bpf/20240602182703.207276-1-xiyou.wangcong@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/syscall.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index cb61d8880dbe0..52ffe33356418 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -2985,6 +2985,7 @@ static int bpf_obj_get(const union bpf_attr *attr)
+ void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog)
+ {
++ WARN_ON(ops->dealloc && ops->dealloc_deferred);
+ atomic64_set(&link->refcnt, 1);
+ link->type = type;
+ link->id = 0;
+@@ -3043,16 +3044,17 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
+ /* bpf_link_free is guaranteed to be called from process context */
+ static void bpf_link_free(struct bpf_link *link)
+ {
++ const struct bpf_link_ops *ops = link->ops;
+ bool sleepable = false;
+
+ bpf_link_free_id(link->id);
+ if (link->prog) {
+ sleepable = link->prog->sleepable;
+ /* detach BPF program, clean up used resources */
+- link->ops->release(link);
++ ops->release(link);
+ bpf_prog_put(link->prog);
+ }
+- if (link->ops->dealloc_deferred) {
++ if (ops->dealloc_deferred) {
+ /* schedule BPF link deallocation; if underlying BPF program
+ * is sleepable, we need to first wait for RCU tasks trace
+ * sync, then go through "classic" RCU grace period
+@@ -3061,9 +3063,8 @@ static void bpf_link_free(struct bpf_link *link)
+ call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+ else
+ call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
+- }
+- if (link->ops->dealloc)
+- link->ops->dealloc(link);
++ } else if (ops->dealloc)
++ ops->dealloc(link);
+ }
+
+ static void bpf_link_put_deferred(struct work_struct *work)
+--
+2.43.0
+
--- /dev/null
+From 23a08df6c30c3f00ab2e7d9c80319b110fd276c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 17:00:24 +0200
+Subject: bpf: Set run context for rawtp test_run callback
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+[ Upstream commit d0d1df8ba18abc57f28fb3bc053b2bf319367f2c ]
+
+syzbot reported crash when rawtp program executed through the
+test_run interface calls bpf_get_attach_cookie helper or any
+other helper that touches task->bpf_ctx pointer.
+
+Setting the run context (task->bpf_ctx pointer) for test_run
+callback.
+
+Fixes: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value")
+Reported-by: syzbot+3ab78ff125b7979e45f9@syzkaller.appspotmail.com
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Closes: https://syzkaller.appspot.com/bug?extid=3ab78ff125b7979e45f9
+Link: https://lore.kernel.org/bpf/20240604150024.359247-1-jolsa@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bpf/test_run.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
+index 61efeadaff8db..4cd29fb490f7c 100644
+--- a/net/bpf/test_run.c
++++ b/net/bpf/test_run.c
+@@ -719,10 +719,16 @@ static void
+ __bpf_prog_test_run_raw_tp(void *data)
+ {
+ struct bpf_raw_tp_test_run_info *info = data;
++ struct bpf_trace_run_ctx run_ctx = {};
++ struct bpf_run_ctx *old_run_ctx;
++
++ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+
+ rcu_read_lock();
+ info->retval = bpf_prog_run(info->prog, info->ctx);
+ rcu_read_unlock();
++
++ bpf_reset_run_ctx(old_run_ctx);
+ }
+
+ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+--
+2.43.0
+
--- /dev/null
+From e2e92ece6fc62b33a603f527825d7c5f08ec5a2e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Apr 2024 16:07:58 +0800
+Subject: cpufreq: amd-pstate: Add quirk for the pstate CPPC capabilities
+ missing
+
+From: Perry Yuan <perry.yuan@amd.com>
+
+[ Upstream commit eb8b6c36820214df96e7e86d8614d93f6b028f28 ]
+
+Add quirks table to get CPPC capabilities issue fixed by providing
+correct perf or frequency values while driver loading.
+
+If CPPC capabilities are not defined in the ACPI tables or wrongly
+defined by platform firmware, it needs to use quick to get those
+issues fixed with correct workaround values to make pstate driver
+can be loaded even though there are CPPC capabilities errors.
+
+The workaround will match the broken BIOS which lack of CPPC capabilities
+nominal_freq and lowest_freq definition in the ACPI table.
+
+$ cat /sys/devices/system/cpu/cpu0/acpi_cppc/lowest_freq
+0
+$ cat /sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq
+0
+
+Acked-by: Huang Rui <ray.huang@amd.com>
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Tested-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Signed-off-by: Perry Yuan <perry.yuan@amd.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Stable-dep-of: 779b8a14afde ("cpufreq: amd-pstate: remove global header file")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/amd-pstate.c | 53 ++++++++++++++++++++++++++++++++++--
+ include/linux/amd-pstate.h | 6 ++++
+ 2 files changed, 57 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
+index e8385ca32f1fc..605b037913ff8 100644
+--- a/drivers/cpufreq/amd-pstate.c
++++ b/drivers/cpufreq/amd-pstate.c
+@@ -68,6 +68,7 @@ static struct cpufreq_driver amd_pstate_epp_driver;
+ static int cppc_state = AMD_PSTATE_UNDEFINED;
+ static bool cppc_enabled;
+ static bool amd_pstate_prefcore = true;
++static struct quirk_entry *quirks;
+
+ /*
+ * AMD Energy Preference Performance (EPP)
+@@ -112,6 +113,41 @@ static unsigned int epp_values[] = {
+
+ typedef int (*cppc_mode_transition_fn)(int);
+
++static struct quirk_entry quirk_amd_7k62 = {
++ .nominal_freq = 2600,
++ .lowest_freq = 550,
++};
++
++static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
++{
++ /**
++ * match the broken bios for family 17h processor support CPPC V2
++ * broken BIOS lack of nominal_freq and lowest_freq capabilities
++ * definition in ACPI tables
++ */
++ if (boot_cpu_has(X86_FEATURE_ZEN2)) {
++ quirks = dmi->driver_data;
++ pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
++ return 1;
++ }
++
++ return 0;
++}
++
++static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = {
++ {
++ .callback = dmi_matched_7k62_bios_bug,
++ .ident = "AMD EPYC 7K62",
++ .matches = {
++ DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
++ DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"),
++ },
++ .driver_data = &quirk_amd_7k62,
++ },
++ {}
++};
++MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table);
++
+ static inline int get_mode_idx_from_str(const char *str, size_t size)
+ {
+ int i;
+@@ -818,8 +854,16 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
+ if (ret)
+ return ret;
+
+- min_freq = cppc_perf.lowest_freq * 1000;
+- nominal_freq = cppc_perf.nominal_freq;
++ if (quirks && quirks->lowest_freq)
++ min_freq = quirks->lowest_freq * 1000;
++ else
++ min_freq = cppc_perf.lowest_freq * 1000;
++
++ if (quirks && quirks->nominal_freq)
++ nominal_freq = quirks->nominal_freq ;
++ else
++ nominal_freq = cppc_perf.nominal_freq;
++
+ nominal_perf = READ_ONCE(cpudata->nominal_perf);
+
+ highest_perf = READ_ONCE(cpudata->highest_perf);
+@@ -1664,6 +1708,11 @@ static int __init amd_pstate_init(void)
+ if (cpufreq_get_current_driver())
+ return -EEXIST;
+
++ quirks = NULL;
++
++ /* check if this machine need CPPC quirks */
++ dmi_check_system(amd_pstate_quirks_table);
++
+ switch (cppc_state) {
+ case AMD_PSTATE_UNDEFINED:
+ /* Disable on the following configs by default:
+diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
+index d21838835abda..7b2cbb892fd91 100644
+--- a/include/linux/amd-pstate.h
++++ b/include/linux/amd-pstate.h
+@@ -124,4 +124,10 @@ static const char * const amd_pstate_mode_string[] = {
+ [AMD_PSTATE_GUIDED] = "guided",
+ NULL,
+ };
++
++struct quirk_entry {
++ u32 nominal_freq;
++ u32 lowest_freq;
++};
++
+ #endif /* _LINUX_AMD_PSTATE_H */
+--
+2.43.0
+
--- /dev/null
+From 9ec7ac961a4725f22473bfccf2f9bb90d71f2126 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 14:09:23 +0200
+Subject: cpufreq: amd-pstate: remove global header file
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 779b8a14afde110dd3502566be907289eba72447 ]
+
+When extra warnings are enabled, gcc points out a global variable
+definition in a header:
+
+In file included from drivers/cpufreq/amd-pstate-ut.c:29:
+include/linux/amd-pstate.h:123:27: error: 'amd_pstate_mode_string' defined but not used [-Werror=unused-const-variable=]
+ 123 | static const char * const amd_pstate_mode_string[] = {
+ | ^~~~~~~~~~~~~~~~~~~~~~
+
+This header is only included from two files in the same directory,
+and one of them uses only a single definition from it, so clean it
+up by moving most of the contents into the driver that uses them,
+and making shared bits a local header file.
+
+Fixes: 36c5014e5460 ("cpufreq: amd-pstate: optimize driver working mode selection in amd_pstate_param()")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ MAINTAINERS | 1 -
+ drivers/cpufreq/amd-pstate-ut.c | 3 +-
+ drivers/cpufreq/amd-pstate.c | 34 ++++++++++++++++++-
+ .../linux => drivers/cpufreq}/amd-pstate.h | 33 ------------------
+ 4 files changed, 35 insertions(+), 36 deletions(-)
+ rename {include/linux => drivers/cpufreq}/amd-pstate.h (81%)
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 28e20975c26f5..3121709d99e3b 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -1066,7 +1066,6 @@ L: linux-pm@vger.kernel.org
+ S: Supported
+ F: Documentation/admin-guide/pm/amd-pstate.rst
+ F: drivers/cpufreq/amd-pstate*
+-F: include/linux/amd-pstate.h
+ F: tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
+
+ AMD PTDMA DRIVER
+diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
+index f04ae67dda372..fc275d41d51e9 100644
+--- a/drivers/cpufreq/amd-pstate-ut.c
++++ b/drivers/cpufreq/amd-pstate-ut.c
+@@ -26,10 +26,11 @@
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+ #include <linux/fs.h>
+-#include <linux/amd-pstate.h>
+
+ #include <acpi/cppc_acpi.h>
+
++#include "amd-pstate.h"
++
+ /*
+ * Abbreviations:
+ * amd_pstate_ut: used as a shortform for AMD P-State unit test.
+diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
+index 605b037913ff8..6c989d859b396 100644
+--- a/drivers/cpufreq/amd-pstate.c
++++ b/drivers/cpufreq/amd-pstate.c
+@@ -36,7 +36,6 @@
+ #include <linux/delay.h>
+ #include <linux/uaccess.h>
+ #include <linux/static_call.h>
+-#include <linux/amd-pstate.h>
+ #include <linux/topology.h>
+
+ #include <acpi/processor.h>
+@@ -46,6 +45,8 @@
+ #include <asm/processor.h>
+ #include <asm/cpufeature.h>
+ #include <asm/cpu_device_id.h>
++
++#include "amd-pstate.h"
+ #include "amd-pstate-trace.h"
+
+ #define AMD_PSTATE_TRANSITION_LATENCY 20000
+@@ -53,6 +54,37 @@
+ #define CPPC_HIGHEST_PERF_PERFORMANCE 196
+ #define CPPC_HIGHEST_PERF_DEFAULT 166
+
++#define AMD_CPPC_EPP_PERFORMANCE 0x00
++#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
++#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
++#define AMD_CPPC_EPP_POWERSAVE 0xFF
++
++/*
++ * enum amd_pstate_mode - driver working mode of amd pstate
++ */
++enum amd_pstate_mode {
++ AMD_PSTATE_UNDEFINED = 0,
++ AMD_PSTATE_DISABLE,
++ AMD_PSTATE_PASSIVE,
++ AMD_PSTATE_ACTIVE,
++ AMD_PSTATE_GUIDED,
++ AMD_PSTATE_MAX,
++};
++
++static const char * const amd_pstate_mode_string[] = {
++ [AMD_PSTATE_UNDEFINED] = "undefined",
++ [AMD_PSTATE_DISABLE] = "disable",
++ [AMD_PSTATE_PASSIVE] = "passive",
++ [AMD_PSTATE_ACTIVE] = "active",
++ [AMD_PSTATE_GUIDED] = "guided",
++ NULL,
++};
++
++struct quirk_entry {
++ u32 nominal_freq;
++ u32 lowest_freq;
++};
++
+ /*
+ * TODO: We need more time to fine tune processors with shared memory solution
+ * with community together.
+diff --git a/include/linux/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
+similarity index 81%
+rename from include/linux/amd-pstate.h
+rename to drivers/cpufreq/amd-pstate.h
+index 7b2cbb892fd91..bc341f35908d7 100644
+--- a/include/linux/amd-pstate.h
++++ b/drivers/cpufreq/amd-pstate.h
+@@ -1,7 +1,5 @@
+ /* SPDX-License-Identifier: GPL-2.0-only */
+ /*
+- * linux/include/linux/amd-pstate.h
+- *
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Author: Meng Li <li.meng@amd.com>
+@@ -12,11 +10,6 @@
+
+ #include <linux/pm_qos.h>
+
+-#define AMD_CPPC_EPP_PERFORMANCE 0x00
+-#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
+-#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
+-#define AMD_CPPC_EPP_POWERSAVE 0xFF
+-
+ /*********************************************************************
+ * AMD P-state INTERFACE *
+ *********************************************************************/
+@@ -104,30 +97,4 @@ struct amd_cpudata {
+ bool suspended;
+ };
+
+-/*
+- * enum amd_pstate_mode - driver working mode of amd pstate
+- */
+-enum amd_pstate_mode {
+- AMD_PSTATE_UNDEFINED = 0,
+- AMD_PSTATE_DISABLE,
+- AMD_PSTATE_PASSIVE,
+- AMD_PSTATE_ACTIVE,
+- AMD_PSTATE_GUIDED,
+- AMD_PSTATE_MAX,
+-};
+-
+-static const char * const amd_pstate_mode_string[] = {
+- [AMD_PSTATE_UNDEFINED] = "undefined",
+- [AMD_PSTATE_DISABLE] = "disable",
+- [AMD_PSTATE_PASSIVE] = "passive",
+- [AMD_PSTATE_ACTIVE] = "active",
+- [AMD_PSTATE_GUIDED] = "guided",
+- NULL,
+-};
+-
+-struct quirk_entry {
+- u32 nominal_freq;
+- u32 lowest_freq;
+-};
+-
+ #endif /* _LINUX_AMD_PSTATE_H */
+--
+2.43.0
+
--- /dev/null
+From 85ccf4121195a997b0d2b4a170b6ef45c4f9a123 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Apr 2024 16:07:53 +0800
+Subject: cpufreq: amd-pstate: Unify computation of
+ {max,min,nominal,lowest_nonlinear}_freq
+
+From: Perry Yuan <perry.yuan@amd.com>
+
+[ Upstream commit 5547c0ebfc2efdab6ee93a7fd4d9c411ad87013e ]
+
+Currently the amd_get_{min, max, nominal, lowest_nonlinear}_freq()
+helpers computes the values of min_freq, max_freq, nominal_freq and
+lowest_nominal_freq respectively afresh from
+cppc_get_perf_caps(). This is not necessary as there are fields in
+cpudata to cache these values.
+
+To simplify this, add a single helper function named
+amd_pstate_init_freq() which computes all these frequencies at once, and
+caches it in cpudata.
+
+Use the cached values everywhere else in the code.
+
+Acked-by: Huang Rui <ray.huang@amd.com>
+Reviewed-by: Li Meng <li.meng@amd.com>
+Tested-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+Co-developed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
+Signed-off-by: Perry Yuan <perry.yuan@amd.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Stable-dep-of: 779b8a14afde ("cpufreq: amd-pstate: remove global header file")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/amd-pstate.c | 126 ++++++++++++++++-------------------
+ 1 file changed, 59 insertions(+), 67 deletions(-)
+
+diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
+index e263db0385ab7..e8385ca32f1fc 100644
+--- a/drivers/cpufreq/amd-pstate.c
++++ b/drivers/cpufreq/amd-pstate.c
+@@ -622,74 +622,22 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
+
+ static int amd_get_min_freq(struct amd_cpudata *cpudata)
+ {
+- struct cppc_perf_caps cppc_perf;
+-
+- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+- if (ret)
+- return ret;
+-
+- /* Switch to khz */
+- return cppc_perf.lowest_freq * 1000;
++ return READ_ONCE(cpudata->min_freq);
+ }
+
+ static int amd_get_max_freq(struct amd_cpudata *cpudata)
+ {
+- struct cppc_perf_caps cppc_perf;
+- u32 max_perf, max_freq, nominal_freq, nominal_perf;
+- u64 boost_ratio;
+-
+- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+- if (ret)
+- return ret;
+-
+- nominal_freq = cppc_perf.nominal_freq;
+- nominal_perf = READ_ONCE(cpudata->nominal_perf);
+- max_perf = READ_ONCE(cpudata->highest_perf);
+-
+- boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT,
+- nominal_perf);
+-
+- max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT;
+-
+- /* Switch to khz */
+- return max_freq * 1000;
++ return READ_ONCE(cpudata->max_freq);
+ }
+
+ static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
+ {
+- struct cppc_perf_caps cppc_perf;
+-
+- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+- if (ret)
+- return ret;
+-
+- /* Switch to khz */
+- return cppc_perf.nominal_freq * 1000;
++ return READ_ONCE(cpudata->nominal_freq);
+ }
+
+ static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
+ {
+- struct cppc_perf_caps cppc_perf;
+- u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
+- nominal_freq, nominal_perf;
+- u64 lowest_nonlinear_ratio;
+-
+- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+- if (ret)
+- return ret;
+-
+- nominal_freq = cppc_perf.nominal_freq;
+- nominal_perf = READ_ONCE(cpudata->nominal_perf);
+-
+- lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
+-
+- lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
+- nominal_perf);
+-
+- lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT;
+-
+- /* Switch to khz */
+- return lowest_nonlinear_freq * 1000;
++ return READ_ONCE(cpudata->lowest_nonlinear_freq);
+ }
+
+ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
+@@ -844,6 +792,53 @@ static void amd_pstate_update_limits(unsigned int cpu)
+ mutex_unlock(&amd_pstate_driver_lock);
+ }
+
++/**
++ * amd_pstate_init_freq: Initialize the max_freq, min_freq,
++ * nominal_freq and lowest_nonlinear_freq for
++ * the @cpudata object.
++ *
++ * Requires: highest_perf, lowest_perf, nominal_perf and
++ * lowest_nonlinear_perf members of @cpudata to be
++ * initialized.
++ *
++ * Returns 0 on success, non-zero value on failure.
++ */
++static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
++{
++ int ret;
++ u32 min_freq;
++ u32 highest_perf, max_freq;
++ u32 nominal_perf, nominal_freq;
++ u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
++ u32 boost_ratio, lowest_nonlinear_ratio;
++ struct cppc_perf_caps cppc_perf;
++
++
++ ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
++ if (ret)
++ return ret;
++
++ min_freq = cppc_perf.lowest_freq * 1000;
++ nominal_freq = cppc_perf.nominal_freq;
++ nominal_perf = READ_ONCE(cpudata->nominal_perf);
++
++ highest_perf = READ_ONCE(cpudata->highest_perf);
++ boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
++ max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
++
++ lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
++ lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
++ nominal_perf);
++ lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
++
++ WRITE_ONCE(cpudata->min_freq, min_freq);
++ WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
++ WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
++ WRITE_ONCE(cpudata->max_freq, max_freq);
++
++ return 0;
++}
++
+ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
+ {
+ int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
+@@ -871,6 +866,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
+ if (ret)
+ goto free_cpudata1;
+
++ ret = amd_pstate_init_freq(cpudata);
++ if (ret)
++ goto free_cpudata1;
++
+ min_freq = amd_get_min_freq(cpudata);
+ max_freq = amd_get_max_freq(cpudata);
+ nominal_freq = amd_get_nominal_freq(cpudata);
+@@ -912,13 +911,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
+ goto free_cpudata2;
+ }
+
+- /* Initial processor data capability frequencies */
+- cpudata->max_freq = max_freq;
+- cpudata->min_freq = min_freq;
+ cpudata->max_limit_freq = max_freq;
+ cpudata->min_limit_freq = min_freq;
+- cpudata->nominal_freq = nominal_freq;
+- cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
+
+ policy->driver_data = cpudata;
+
+@@ -1333,6 +1327,10 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+ if (ret)
+ goto free_cpudata1;
+
++ ret = amd_pstate_init_freq(cpudata);
++ if (ret)
++ goto free_cpudata1;
++
+ min_freq = amd_get_min_freq(cpudata);
+ max_freq = amd_get_max_freq(cpudata);
+ nominal_freq = amd_get_nominal_freq(cpudata);
+@@ -1349,12 +1347,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+ /* It will be updated by governor */
+ policy->cur = policy->cpuinfo.min_freq;
+
+- /* Initial processor data capability frequencies */
+- cpudata->max_freq = max_freq;
+- cpudata->min_freq = min_freq;
+- cpudata->nominal_freq = nominal_freq;
+- cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
+-
+ policy->driver_data = cpudata;
+
+ cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0);
+--
+2.43.0
+
--- /dev/null
+From 8582e60cd1871a086a5a6574ffa90c31ed287bf8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 May 2024 20:47:58 -0700
+Subject: drm/xe: Use ordered WQ for G2H handler
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Matthew Brost <matthew.brost@intel.com>
+
+[ Upstream commit 2d9c72f676e6f79a021b74c6c1c88235e7d5b722 ]
+
+System work queues are shared, use a dedicated work queue for G2H
+processing to avoid G2H processing getting block behind system tasks.
+
+Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+Reviewed-by: Francois Dugast <francois.dugast@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240506034758.3697397-1-matthew.brost@intel.com
+(cherry picked from commit 50aec9665e0babd62b9eee4e613d9a1ef8d2b7de)
+Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
+index 8bbfa45798e2e..6ac86936faaf9 100644
+--- a/drivers/gpu/drm/xe/xe_guc_ct.c
++++ b/drivers/gpu/drm/xe/xe_guc_ct.c
+@@ -146,6 +146,10 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
+
+ xe_assert(xe, !(guc_ct_size() % PAGE_SIZE));
+
++ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
++ if (!ct->g2h_wq)
++ return -ENOMEM;
++
+ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+ if (!ct->g2h_wq)
+ return -ENOMEM;
+--
+2.43.0
+
--- /dev/null
+From d2f51b24d9c1f23a52d5b1b8047c10fb347a8789 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 01:14:28 -0400
+Subject: eventfs: Update all the eventfs_inodes from the events descriptor
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit 340f0c7067a95281ad13734f8225f49c6cf52067 ]
+
+The change to update the permissions of the eventfs_inode had the
+misconception that using the tracefs_inode would find all the
+eventfs_inodes that have been updated and reset them on remount.
+The problem with this approach is that the eventfs_inodes are freed when
+they are no longer used (basically the reason the eventfs system exists).
+When they are freed, the updated eventfs_inodes are not reset on a remount
+because their tracefs_inodes have been freed.
+
+Instead, since the events directory eventfs_inode always has a
+tracefs_inode pointing to it (it is not freed when finished), and the
+events directory has a link to all its children, have the
+eventfs_remount() function only operate on the events eventfs_inode and
+have it descend into its children updating their uid and gids.
+
+Link: https://lore.kernel.org/all/CAK7LNARXgaWw3kH9JgrnH4vK6fr8LDkNKf3wq8NhMWJrVwJyVQ@mail.gmail.com/
+Link: https://lore.kernel.org/linux-trace-kernel/20240523051539.754424703@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: baa23a8d4360d ("tracefs: Reset permissions on remount if permissions are options")
+Reported-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/tracefs/event_inode.c | 51 ++++++++++++++++++++++++++++++----------
+ 1 file changed, 39 insertions(+), 12 deletions(-)
+
+diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
+index 55a40a730b10c..129d0f54ba62f 100644
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -305,33 +305,60 @@ static const struct file_operations eventfs_file_operations = {
+ .llseek = generic_file_llseek,
+ };
+
+-/*
+- * On a remount of tracefs, if UID or GID options are set, then
+- * the mount point inode permissions should be used.
+- * Reset the saved permission flags appropriately.
+- */
+-void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid)
++static void eventfs_set_attrs(struct eventfs_inode *ei, bool update_uid, kuid_t uid,
++ bool update_gid, kgid_t gid, int level)
+ {
+- struct eventfs_inode *ei = ti->private;
++ struct eventfs_inode *ei_child;
+
+- if (!ei)
++ /* Update events/<system>/<event> */
++ if (WARN_ON_ONCE(level > 3))
+ return;
+
+- if (update_uid)
++ if (update_uid) {
+ ei->attr.mode &= ~EVENTFS_SAVE_UID;
++ ei->attr.uid = uid;
++ }
+
+- if (update_gid)
++ if (update_gid) {
+ ei->attr.mode &= ~EVENTFS_SAVE_GID;
++ ei->attr.gid = gid;
++ }
++
++ list_for_each_entry(ei_child, &ei->children, list) {
++ eventfs_set_attrs(ei_child, update_uid, uid, update_gid, gid, level + 1);
++ }
+
+ if (!ei->entry_attrs)
+ return;
+
+ for (int i = 0; i < ei->nr_entries; i++) {
+- if (update_uid)
++ if (update_uid) {
+ ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID;
+- if (update_gid)
++ ei->entry_attrs[i].uid = uid;
++ }
++ if (update_gid) {
+ ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID;
++ ei->entry_attrs[i].gid = gid;
++ }
+ }
++
++}
++
++/*
++ * On a remount of tracefs, if UID or GID options are set, then
++ * the mount point inode permissions should be used.
++ * Reset the saved permission flags appropriately.
++ */
++void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid)
++{
++ struct eventfs_inode *ei = ti->private;
++
++ /* Only the events directory does the updates */
++ if (!ei || !ei->is_events || ei->is_freed)
++ return;
++
++ eventfs_set_attrs(ei, update_uid, ti->vfs_inode.i_uid,
++ update_gid, ti->vfs_inode.i_gid, 0);
+ }
+
+ /* Return the evenfs_inode of the "events" directory */
+--
+2.43.0
+
--- /dev/null
+From b00bc71eed63b7e71284b41100d766fcd1285162 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Mar 2024 19:33:17 +0800
+Subject: ext4: avoid overflow when setting values via sysfs
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 9e8e819f8f272c4e5dcd0bd6c7450e36481ed139 ]
+
+When setting values of type unsigned int through sysfs, we use kstrtoul()
+to parse it and then truncate part of it as the final set value, when the
+set value is greater than UINT_MAX, the set value will not match what we
+see because of the truncation. As follows:
+
+ $ echo 4294967296 > /sys/fs/ext4/sda/mb_max_linear_groups
+ $ cat /sys/fs/ext4/sda/mb_max_linear_groups
+ 0
+
+So we use kstrtouint() to parse the attr_pointer_ui type to avoid the
+inconsistency described above. In addition, a judgment is added to avoid
+setting s_resv_clusters less than 0.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240319113325.3110393-2-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 13df4d44a3aa ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/sysfs.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
+index 6d332dff79ddc..ca820620b9742 100644
+--- a/fs/ext4/sysfs.c
++++ b/fs/ext4/sysfs.c
+@@ -104,7 +104,7 @@ static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi,
+ int ret;
+
+ ret = kstrtoull(skip_spaces(buf), 0, &val);
+- if (ret || val >= clusters)
++ if (ret || val >= clusters || (s64)val < 0)
+ return -EINVAL;
+
+ atomic64_set(&sbi->s_resv_clusters, val);
+@@ -451,7 +451,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
+ s_kobj);
+ struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
+ void *ptr = calc_ptr(a, sbi);
+- unsigned long t;
++ unsigned int t;
++ unsigned long lt;
+ int ret;
+
+ switch (a->attr_id) {
+@@ -460,7 +461,7 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
+ case attr_pointer_ui:
+ if (!ptr)
+ return 0;
+- ret = kstrtoul(skip_spaces(buf), 0, &t);
++ ret = kstrtouint(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+@@ -471,10 +472,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
+ case attr_pointer_ul:
+ if (!ptr)
+ return 0;
+- ret = kstrtoul(skip_spaces(buf), 0, &t);
++ ret = kstrtoul(skip_spaces(buf), 0, <);
+ if (ret)
+ return ret;
+- *((unsigned long *) ptr) = t;
++ *((unsigned long *) ptr) = lt;
+ return len;
+ case attr_inode_readahead:
+ return inode_readahead_blks_store(sbi, buf, len);
+--
+2.43.0
+
--- /dev/null
+From 84cee2d2394a43766dd2990edac8a4a05817ef7b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Mar 2024 19:33:20 +0800
+Subject: ext4: fix slab-out-of-bounds in
+ ext4_mb_find_good_group_avg_frag_lists()
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 13df4d44a3aaabe61cd01d277b6ee23ead2a5206 ]
+
+We can trigger a slab-out-of-bounds with the following commands:
+
+ mkfs.ext4 -F /dev/$disk 10G
+ mount /dev/$disk /tmp/test
+ echo 2147483647 > /sys/fs/ext4/$disk/mb_group_prealloc
+ echo test > /tmp/test/file && sync
+
+==================================================================
+BUG: KASAN: slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4]
+Read of size 8 at addr ffff888121b9d0f0 by task kworker/u2:0/11
+CPU: 0 PID: 11 Comm: kworker/u2:0 Tainted: GL 6.7.0-next-20240118 #521
+Call Trace:
+ dump_stack_lvl+0x2c/0x50
+ kasan_report+0xb6/0xf0
+ ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4]
+ ext4_mb_regular_allocator+0x19e9/0x2370 [ext4]
+ ext4_mb_new_blocks+0x88a/0x1370 [ext4]
+ ext4_ext_map_blocks+0x14f7/0x2390 [ext4]
+ ext4_map_blocks+0x569/0xea0 [ext4]
+ ext4_do_writepages+0x10f6/0x1bc0 [ext4]
+[...]
+==================================================================
+
+The flow of issue triggering is as follows:
+
+// Set s_mb_group_prealloc to 2147483647 via sysfs
+ext4_mb_new_blocks
+ ext4_mb_normalize_request
+ ext4_mb_normalize_group_request
+ ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc
+ ext4_mb_regular_allocator
+ ext4_mb_choose_next_group
+ ext4_mb_choose_next_group_best_avail
+ mb_avg_fragment_size_order
+ order = fls(len) - 2 = 29
+ ext4_mb_find_good_group_avg_frag_lists
+ frag_list = &sbi->s_mb_avg_fragment_size[order]
+ if (list_empty(frag_list)) // Trigger SOOB!
+
+At 4k block size, the length of the s_mb_avg_fragment_size list is 14,
+but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds
+to be triggered by an attempt to access an element at index 29.
+
+Add a new attr_id attr_clusters_in_group with values in the range
+[0, sbi->s_clusters_per_group] and declare mb_group_prealloc as
+that type to fix the issue. In addition avoid returning an order
+from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb)
+and reduce some useless loops.
+
+Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)")
+CC: stable@vger.kernel.org
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Link: https://lore.kernel.org/r/20240319113325.3110393-5-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/mballoc.c | 4 ++++
+ fs/ext4/sysfs.c | 13 ++++++++++++-
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 714f83632e3f9..66b5a68b0254e 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
+ return 0;
+ if (order == MB_NUM_ORDERS(sb))
+ order--;
++ if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
++ order = MB_NUM_ORDERS(sb) - 1;
+ return order;
+ }
+
+@@ -1008,6 +1010,8 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
+ * goal length.
+ */
+ order = fls(ac->ac_g_ex.fe_len) - 1;
++ if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb)))
++ order = MB_NUM_ORDERS(ac->ac_sb);
+ min_order = order - sbi->s_mb_best_avail_max_trim_order;
+ if (min_order < 0)
+ min_order = 0;
+diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
+index 295ea9a32de91..ca66e33f61815 100644
+--- a/fs/ext4/sysfs.c
++++ b/fs/ext4/sysfs.c
+@@ -29,6 +29,7 @@ typedef enum {
+ attr_trigger_test_error,
+ attr_first_error_time,
+ attr_last_error_time,
++ attr_clusters_in_group,
+ attr_feature,
+ attr_pointer_ui,
+ attr_pointer_ul,
+@@ -207,13 +208,14 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444);
+
+ EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
+ ext4_sb_info, s_inode_readahead_blks);
++EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group,
++ ext4_sb_info, s_mb_group_prealloc);
+ EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
+ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+ EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
+ EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
+-EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
+ EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
+ EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
+ EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
+@@ -376,6 +378,7 @@ static ssize_t ext4_generic_attr_show(struct ext4_attr *a,
+
+ switch (a->attr_id) {
+ case attr_inode_readahead:
++ case attr_clusters_in_group:
+ case attr_pointer_ui:
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+ return sysfs_emit(buf, "%u\n", le32_to_cpup(ptr));
+@@ -459,6 +462,14 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
+ else
+ *((unsigned int *) ptr) = t;
+ return len;
++ case attr_clusters_in_group:
++ ret = kstrtouint(skip_spaces(buf), 0, &t);
++ if (ret)
++ return ret;
++ if (t > sbi->s_clusters_per_group)
++ return -EINVAL;
++ *((unsigned int *) ptr) = t;
++ return len;
+ case attr_pointer_ul:
+ if (!ptr)
+ return 0;
+--
+2.43.0
+
--- /dev/null
+From e8509b358a91de4cab5b87f20011cd3fa492cf01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Mar 2024 19:33:19 +0800
+Subject: ext4: refactor out ext4_generic_attr_show()
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 57341fe3179c7694c92dcf99e7f836cee4c800dd ]
+
+Refactor out the function ext4_generic_attr_show() to handle the reading
+of values of various common types, with no functional changes.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20240319113325.3110393-4-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 13df4d44a3aa ("ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/sysfs.c | 74 +++++++++++++++++++++----------------------------
+ 1 file changed, 32 insertions(+), 42 deletions(-)
+
+diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
+index ca820620b9742..295ea9a32de91 100644
+--- a/fs/ext4/sysfs.c
++++ b/fs/ext4/sysfs.c
+@@ -366,13 +366,42 @@ static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi)
+ #define print_tstamp(buf, es, tstamp) \
+ __print_tstamp(buf, (es)->tstamp, (es)->tstamp ## _hi)
+
++static ssize_t ext4_generic_attr_show(struct ext4_attr *a,
++ struct ext4_sb_info *sbi, char *buf)
++{
++ void *ptr = calc_ptr(a, sbi);
++
++ if (!ptr)
++ return 0;
++
++ switch (a->attr_id) {
++ case attr_inode_readahead:
++ case attr_pointer_ui:
++ if (a->attr_ptr == ptr_ext4_super_block_offset)
++ return sysfs_emit(buf, "%u\n", le32_to_cpup(ptr));
++ return sysfs_emit(buf, "%u\n", *((unsigned int *) ptr));
++ case attr_pointer_ul:
++ return sysfs_emit(buf, "%lu\n", *((unsigned long *) ptr));
++ case attr_pointer_u8:
++ return sysfs_emit(buf, "%u\n", *((unsigned char *) ptr));
++ case attr_pointer_u64:
++ if (a->attr_ptr == ptr_ext4_super_block_offset)
++ return sysfs_emit(buf, "%llu\n", le64_to_cpup(ptr));
++ return sysfs_emit(buf, "%llu\n", *((unsigned long long *) ptr));
++ case attr_pointer_string:
++ return sysfs_emit(buf, "%.*s\n", a->attr_size, (char *) ptr);
++ case attr_pointer_atomic:
++ return sysfs_emit(buf, "%d\n", atomic_read((atomic_t *) ptr));
++ }
++ return 0;
++}
++
+ static ssize_t ext4_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+ {
+ struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
+ s_kobj);
+ struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
+- void *ptr = calc_ptr(a, sbi);
+
+ switch (a->attr_id) {
+ case attr_delayed_allocation_blocks:
+@@ -391,45 +420,6 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
+ return sysfs_emit(buf, "%llu\n",
+ (unsigned long long)
+ percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit));
+- case attr_inode_readahead:
+- case attr_pointer_ui:
+- if (!ptr)
+- return 0;
+- if (a->attr_ptr == ptr_ext4_super_block_offset)
+- return sysfs_emit(buf, "%u\n",
+- le32_to_cpup(ptr));
+- else
+- return sysfs_emit(buf, "%u\n",
+- *((unsigned int *) ptr));
+- case attr_pointer_ul:
+- if (!ptr)
+- return 0;
+- return sysfs_emit(buf, "%lu\n",
+- *((unsigned long *) ptr));
+- case attr_pointer_u8:
+- if (!ptr)
+- return 0;
+- return sysfs_emit(buf, "%u\n",
+- *((unsigned char *) ptr));
+- case attr_pointer_u64:
+- if (!ptr)
+- return 0;
+- if (a->attr_ptr == ptr_ext4_super_block_offset)
+- return sysfs_emit(buf, "%llu\n",
+- le64_to_cpup(ptr));
+- else
+- return sysfs_emit(buf, "%llu\n",
+- *((unsigned long long *) ptr));
+- case attr_pointer_string:
+- if (!ptr)
+- return 0;
+- return sysfs_emit(buf, "%.*s\n", a->attr_size,
+- (char *) ptr);
+- case attr_pointer_atomic:
+- if (!ptr)
+- return 0;
+- return sysfs_emit(buf, "%d\n",
+- atomic_read((atomic_t *) ptr));
+ case attr_feature:
+ return sysfs_emit(buf, "supported\n");
+ case attr_first_error_time:
+@@ -438,9 +428,9 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
+ return print_tstamp(buf, sbi->s_es, s_last_error_time);
+ case attr_journal_task:
+ return journal_task_show(sbi, buf);
++ default:
++ return ext4_generic_attr_show(a, sbi, buf);
+ }
+-
+- return 0;
+ }
+
+ static ssize_t ext4_attr_store(struct kobject *kobj,
+--
+2.43.0
+
--- /dev/null
+From f7478a22b257398bffabd351218faffb76f80439 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 14:42:33 -0700
+Subject: ice: add flag to distinguish reset from .ndo_bpf in XDP rings config
+
+From: Larysa Zaremba <larysa.zaremba@intel.com>
+
+[ Upstream commit 744d197162c2070a6045a71e2666ed93a57cc65d ]
+
+Commit 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
+has placed ice_vsi_free_q_vectors() after ice_destroy_xdp_rings() in
+the rebuild process. The behaviour of the XDP rings config functions is
+context-dependent, so the change of order has led to
+ice_destroy_xdp_rings() doing additional work and removing XDP prog, when
+it was supposed to be preserved.
+
+Also, dependency on the PF state reset flags creates an additional,
+fortunately less common problem:
+
+* PFR is requested e.g. by tx_timeout handler
+* .ndo_bpf() is asked to delete the program, calls ice_destroy_xdp_rings(),
+ but reset flag is set, so rings are destroyed without deleting the
+ program
+* ice_vsi_rebuild tries to delete non-existent XDP rings, because the
+ program is still on the VSI
+* system crashes
+
+With a similar race, when requested to attach a program,
+ice_prepare_xdp_rings() can actually skip setting the program in the VSI
+and nevertheless report success.
+
+Instead of reverting to the old order of function calls, add an enum
+argument to both ice_prepare_xdp_rings() and ice_destroy_xdp_rings() in
+order to distinguish between calls from rebuild and .ndo_bpf().
+
+Fixes: efc2214b6047 ("ice: Add support for XDP")
+Reviewed-by: Igor Bagnucki <igor.bagnucki@intel.com>
+Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-4-e3563aa89b0c@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice.h | 11 +++++++++--
+ drivers/net/ethernet/intel/ice/ice_lib.c | 5 +++--
+ drivers/net/ethernet/intel/ice/ice_main.c | 22 ++++++++++++----------
+ 3 files changed, 24 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
+index ca9b56d625841..a3286964d6c31 100644
+--- a/drivers/net/ethernet/intel/ice/ice.h
++++ b/drivers/net/ethernet/intel/ice/ice.h
+@@ -932,9 +932,16 @@ int ice_down(struct ice_vsi *vsi);
+ int ice_down_up(struct ice_vsi *vsi);
+ int ice_vsi_cfg_lan(struct ice_vsi *vsi);
+ struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
++
++enum ice_xdp_cfg {
++ ICE_XDP_CFG_FULL, /* Fully apply new config in .ndo_bpf() */
++ ICE_XDP_CFG_PART, /* Save/use part of config in VSI rebuild */
++};
++
+ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
+-int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+-int ice_destroy_xdp_rings(struct ice_vsi *vsi);
++int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
++ enum ice_xdp_cfg cfg_type);
++int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type);
+ int
+ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index 7d401a4dc4513..5de7c50b439e1 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -2334,7 +2334,8 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
+ ret = ice_vsi_determine_xdp_res(vsi);
+ if (ret)
+ goto unroll_vector_base;
+- ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
++ ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog,
++ ICE_XDP_CFG_PART);
+ if (ret)
+ goto unroll_vector_base;
+ }
+@@ -2485,7 +2486,7 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
+ /* return value check can be skipped here, it always returns
+ * 0 if reset is in progress
+ */
+- ice_destroy_xdp_rings(vsi);
++ ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_PART);
+
+ ice_vsi_clear_rings(vsi);
+ ice_vsi_free_q_vectors(vsi);
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index 33a164fa325ac..b53fe27dbed7d 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -2674,10 +2674,12 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
++ * @cfg_type: create from scratch or restore the existing configuration
+ *
+ * Return 0 on success and negative value on error
+ */
+-int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
++int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
++ enum ice_xdp_cfg cfg_type)
+ {
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ int xdp_rings_rem = vsi->num_xdp_txq;
+@@ -2753,7 +2755,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+ * taken into account at the end of ice_vsi_rebuild, where
+ * ice_cfg_vsi_lan is being called
+ */
+- if (ice_is_reset_in_progress(pf->state))
++ if (cfg_type == ICE_XDP_CFG_PART)
+ return 0;
+
+ /* tell the Tx scheduler that right now we have
+@@ -2805,22 +2807,21 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+ /**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
++ * @cfg_type: disable XDP permanently or allow it to be restored later
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+-int ice_destroy_xdp_rings(struct ice_vsi *vsi)
++int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
+ {
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct ice_pf *pf = vsi->back;
+ int i, v_idx;
+
+ /* q_vectors are freed in reset path so there's no point in detaching
+- * rings; in case of rebuild being triggered not from reset bits
+- * in pf->state won't be set, so additionally check first q_vector
+- * against NULL
++ * rings
+ */
+- if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
++ if (cfg_type == ICE_XDP_CFG_PART)
+ goto free_qmap;
+
+ ice_for_each_q_vector(vsi, v_idx) {
+@@ -2861,7 +2862,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+ if (static_key_enabled(&ice_xdp_locking_key))
+ static_branch_dec(&ice_xdp_locking_key);
+
+- if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
++ if (cfg_type == ICE_XDP_CFG_PART)
+ return 0;
+
+ ice_vsi_assign_bpf_prog(vsi, NULL);
+@@ -2972,7 +2973,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+ if (xdp_ring_err) {
+ NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
+ } else {
+- xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
++ xdp_ring_err = ice_prepare_xdp_rings(vsi, prog,
++ ICE_XDP_CFG_FULL);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
+ }
+@@ -2983,7 +2985,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+ NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
+ } else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+ xdp_features_clear_redirect_target(vsi->netdev);
+- xdp_ring_err = ice_destroy_xdp_rings(vsi);
++ xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
+ /* reallocate Rx queues that were used for zero-copy */
+--
+2.43.0
+
--- /dev/null
+From bc48f47608091042aa9550ecee0332179d1668c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 14:42:30 -0700
+Subject: ice: fix iteration of TLVs in Preserved Fields Area
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+[ Upstream commit 03e4a092be8ce3de7c1baa7ae14e68b64e3ea644 ]
+
+The ice_get_pfa_module_tlv() function iterates over the Type-Length-Value
+structures in the Preserved Fields Area (PFA) of the NVM. This is used by
+the driver to access data such as the Part Board Assembly identifier.
+
+The function uses simple logic to iterate over the PFA. First, the pointer
+to the PFA in the NVM is read. Then the total length of the PFA is read
+from the first word.
+
+A pointer to the first TLV is initialized, and a simple loop iterates over
+each TLV. The pointer is moved forward through the NVM until it exceeds the
+PFA area.
+
+The logic seems sound, but it is missing a key detail. The Preserved
+Fields Area length includes one additional final word. This is documented
+in the device data sheet as a dummy word which contains 0xFFFF. All NVMs
+have this extra word.
+
+If the driver tries to scan for a TLV that is not in the PFA, it will read
+past the size of the PFA. It reads and interprets the last dummy word of
+the PFA as a TLV with type 0xFFFF. It then reads the word following the PFA
+as a length.
+
+The PFA resides within the Shadow RAM portion of the NVM, which is
+relatively small. All of its offsets are within a 16-bit size. The PFA
+pointer and TLV pointer are stored by the driver as 16-bit values.
+
+In almost all cases, the word following the PFA will be such that
+interpreting it as a length will result in 16-bit arithmetic overflow. Once
+overflowed, the new next_tlv value is now below the maximum offset of the
+PFA. Thus, the driver will continue to iterate the data as TLVs. In the
+worst case, the driver hits on a sequence of reads which loop back to
+reading the same offsets in an endless loop.
+
+To fix this, we need to correct the loop iteration check to account for
+this extra word at the end of the PFA. This alone is sufficient to resolve
+the known cases of this issue in the field. However, it is plausible that
+an NVM could be misconfigured or have corrupt data which results in the
+same kind of overflow. Protect against this by using check_add_overflow
+when calculating both the maximum offset of the TLVs, and when calculating
+the next_tlv offset at the end of each loop iteration. This ensures that
+the driver will not get stuck in an infinite loop when scanning the PFA.
+
+Fixes: e961b679fb0b ("ice: add board identifier info to devlink .info_get")
+Co-developed-by: Paul Greenwalt <paul.greenwalt@intel.com>
+Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-1-e3563aa89b0c@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_nvm.c | 28 ++++++++++++++++++------
+ 1 file changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
+index d4e05d2cb30c4..a0ad950cc76d9 100644
+--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
++++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
+@@ -441,8 +441,7 @@ int
+ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+ u16 module_type)
+ {
+- u16 pfa_len, pfa_ptr;
+- u16 next_tlv;
++ u16 pfa_len, pfa_ptr, next_tlv, max_tlv;
+ int status;
+
+ status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr);
+@@ -455,11 +454,23 @@ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n");
+ return status;
+ }
++
++ /* The Preserved Fields Area contains a sequence of Type-Length-Value
++ * structures which define its contents. The PFA length includes all
++ * of the TLVs, plus the initial length word itself, *and* one final
++ * word at the end after all of the TLVs.
++ */
++ if (check_add_overflow(pfa_ptr, pfa_len - 1, &max_tlv)) {
++ dev_warn(ice_hw_to_dev(hw), "PFA starts at offset %u. PFA length of %u caused 16-bit arithmetic overflow.\n",
++ pfa_ptr, pfa_len);
++ return -EINVAL;
++ }
++
+ /* Starting with first TLV after PFA length, iterate through the list
+ * of TLVs to find the requested one.
+ */
+ next_tlv = pfa_ptr + 1;
+- while (next_tlv < pfa_ptr + pfa_len) {
++ while (next_tlv < max_tlv) {
+ u16 tlv_sub_module_type;
+ u16 tlv_len;
+
+@@ -483,10 +494,13 @@ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+ }
+ return -EINVAL;
+ }
+- /* Check next TLV, i.e. current TLV pointer + length + 2 words
+- * (for current TLV's type and length)
+- */
+- next_tlv = next_tlv + tlv_len + 2;
++
++ if (check_add_overflow(next_tlv, 2, &next_tlv) ||
++ check_add_overflow(next_tlv, tlv_len, &next_tlv)) {
++ dev_warn(ice_hw_to_dev(hw), "TLV of type %u and length 0x%04x caused 16-bit arithmetic overflow. The PFA starts at 0x%04x and has length of 0x%04x\n",
++ tlv_sub_module_type, tlv_len, pfa_ptr, pfa_len);
++ return -EINVAL;
++ }
+ }
+ /* Module does not exist */
+ return -ENOENT;
+--
+2.43.0
+
--- /dev/null
+From fdc36508569938653ba14eececb83101077dfecf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 14:42:31 -0700
+Subject: ice: fix reads from NVM Shadow RAM on E830 and E825-C devices
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+[ Upstream commit cfa747a66e5da34793ac08c26b814e7709613fab ]
+
+The ice driver reads data from the Shadow RAM portion of the NVM during
+initialization, including data used to identify the NVM image and device,
+such as the ETRACK ID used to populate devlink dev info fw.bundle.
+
+Currently it is using a fixed offset defined by ICE_CSS_HEADER_LENGTH to
+compute the appropriate offset. This worked fine for E810 and E822 devices
+which both have CSS header length of 330 words.
+
+Other devices, including both E825-C and E830 devices have different sizes
+for their CSS header. The use of a hard coded value results in the driver
+reading from the wrong block in the NVM when attempting to access the
+Shadow RAM copy. This results in the driver reporting the fw.bundle as 0x0
+in both the devlink dev info and ethtool -i output.
+
+The first E830 support was introduced by commit ba20ecb1d1bb ("ice: Hook up
+4 E830 devices by adding their IDs") and the first E825-C support was
+introducted by commit f64e18944233 ("ice: introduce new E825C devices
+family")
+
+The NVM actually contains the CSS header length embedded in it. Remove the
+hard coded value and replace it with logic to read the length from the NVM
+directly. This is more resilient against all existing and future hardware,
+vs looking up the expected values from a table. It ensures the driver will
+read from the appropriate place when determining the ETRACK ID value used
+for populating the fw.bundle_id and for reporting in ethtool -i.
+
+The CSS header length for both the active and inactive flash bank is stored
+in the ice_bank_info structure to avoid unnecessary duplicate work when
+accessing multiple words of the Shadow RAM. Both banks are read in the
+unlikely event that the header length is different for the NVM in the
+inactive bank, rather than being different only by the overall device
+family.
+
+Fixes: ba20ecb1d1bb ("ice: Hook up 4 E830 devices by adding their IDs")
+Co-developed-by: Paul Greenwalt <paul.greenwalt@intel.com>
+Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-2-e3563aa89b0c@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_nvm.c | 88 ++++++++++++++++++++++-
+ drivers/net/ethernet/intel/ice/ice_type.h | 14 ++--
+ 2 files changed, 93 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
+index a0ad950cc76d9..8510a02afedcc 100644
+--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
++++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
+@@ -375,11 +375,25 @@ ice_read_nvm_module(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u1
+ *
+ * Read the specified word from the copy of the Shadow RAM found in the
+ * specified NVM module.
++ *
++ * Note that the Shadow RAM copy is always located after the CSS header, and
++ * is aligned to 64-byte (32-word) offsets.
+ */
+ static int
+ ice_read_nvm_sr_copy(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data)
+ {
+- return ice_read_nvm_module(hw, bank, ICE_NVM_SR_COPY_WORD_OFFSET + offset, data);
++ u32 sr_copy;
++
++ switch (bank) {
++ case ICE_ACTIVE_FLASH_BANK:
++ sr_copy = roundup(hw->flash.banks.active_css_hdr_len, 32);
++ break;
++ case ICE_INACTIVE_FLASH_BANK:
++ sr_copy = roundup(hw->flash.banks.inactive_css_hdr_len, 32);
++ break;
++ }
++
++ return ice_read_nvm_module(hw, bank, sr_copy + offset, data);
+ }
+
+ /**
+@@ -1024,6 +1038,72 @@ static int ice_determine_active_flash_banks(struct ice_hw *hw)
+ return 0;
+ }
+
++/**
++ * ice_get_nvm_css_hdr_len - Read the CSS header length from the NVM CSS header
++ * @hw: pointer to the HW struct
++ * @bank: whether to read from the active or inactive flash bank
++ * @hdr_len: storage for header length in words
++ *
++ * Read the CSS header length from the NVM CSS header and add the Authentication
++ * header size, and then convert to words.
++ *
++ * Return: zero on success, or a negative error code on failure.
++ */
++static int
++ice_get_nvm_css_hdr_len(struct ice_hw *hw, enum ice_bank_select bank,
++ u32 *hdr_len)
++{
++ u16 hdr_len_l, hdr_len_h;
++ u32 hdr_len_dword;
++ int status;
++
++ status = ice_read_nvm_module(hw, bank, ICE_NVM_CSS_HDR_LEN_L,
++ &hdr_len_l);
++ if (status)
++ return status;
++
++ status = ice_read_nvm_module(hw, bank, ICE_NVM_CSS_HDR_LEN_H,
++ &hdr_len_h);
++ if (status)
++ return status;
++
++ /* CSS header length is in DWORD, so convert to words and add
++ * authentication header size
++ */
++ hdr_len_dword = hdr_len_h << 16 | hdr_len_l;
++ *hdr_len = (hdr_len_dword * 2) + ICE_NVM_AUTH_HEADER_LEN;
++
++ return 0;
++}
++
++/**
++ * ice_determine_css_hdr_len - Discover CSS header length for the device
++ * @hw: pointer to the HW struct
++ *
++ * Determine the size of the CSS header at the start of the NVM module. This
++ * is useful for locating the Shadow RAM copy in the NVM, as the Shadow RAM is
++ * always located just after the CSS header.
++ *
++ * Return: zero on success, or a negative error code on failure.
++ */
++static int ice_determine_css_hdr_len(struct ice_hw *hw)
++{
++ struct ice_bank_info *banks = &hw->flash.banks;
++ int status;
++
++ status = ice_get_nvm_css_hdr_len(hw, ICE_ACTIVE_FLASH_BANK,
++ &banks->active_css_hdr_len);
++ if (status)
++ return status;
++
++ status = ice_get_nvm_css_hdr_len(hw, ICE_INACTIVE_FLASH_BANK,
++ &banks->inactive_css_hdr_len);
++ if (status)
++ return status;
++
++ return 0;
++}
++
+ /**
+ * ice_init_nvm - initializes NVM setting
+ * @hw: pointer to the HW struct
+@@ -1070,6 +1150,12 @@ int ice_init_nvm(struct ice_hw *hw)
+ return status;
+ }
+
++ status = ice_determine_css_hdr_len(hw);
++ if (status) {
++ ice_debug(hw, ICE_DBG_NVM, "Failed to determine Shadow RAM copy offsets.\n");
++ return status;
++ }
++
+ status = ice_get_nvm_ver_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->nvm);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read NVM info.\n");
+diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
+index 9ff92dba58236..0aacd0d050b8e 100644
+--- a/drivers/net/ethernet/intel/ice/ice_type.h
++++ b/drivers/net/ethernet/intel/ice/ice_type.h
+@@ -481,6 +481,8 @@ struct ice_bank_info {
+ u32 orom_size; /* Size of OROM bank */
+ u32 netlist_ptr; /* Pointer to 1st Netlist bank */
+ u32 netlist_size; /* Size of Netlist bank */
++ u32 active_css_hdr_len; /* Active CSS header length */
++ u32 inactive_css_hdr_len; /* Inactive CSS header length */
+ enum ice_flash_bank nvm_bank; /* Active NVM bank */
+ enum ice_flash_bank orom_bank; /* Active OROM bank */
+ enum ice_flash_bank netlist_bank; /* Active Netlist bank */
+@@ -1084,17 +1086,13 @@ struct ice_aq_get_set_rss_lut_params {
+ #define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800
+
+ /* CSS Header words */
++#define ICE_NVM_CSS_HDR_LEN_L 0x02
++#define ICE_NVM_CSS_HDR_LEN_H 0x03
+ #define ICE_NVM_CSS_SREV_L 0x14
+ #define ICE_NVM_CSS_SREV_H 0x15
+
+-/* Length of CSS header section in words */
+-#define ICE_CSS_HEADER_LENGTH 330
+-
+-/* Offset of Shadow RAM copy in the NVM bank area. */
+-#define ICE_NVM_SR_COPY_WORD_OFFSET roundup(ICE_CSS_HEADER_LENGTH, 32)
+-
+-/* Size in bytes of Option ROM trailer */
+-#define ICE_NVM_OROM_TRAILER_LENGTH (2 * ICE_CSS_HEADER_LENGTH)
++/* Length of Authentication header section in words */
++#define ICE_NVM_AUTH_HEADER_LEN 0x08
+
+ /* The Link Topology Netlist section is stored as a series of words. It is
+ * stored in the NVM as a TLV, with the first two words containing the type
+--
+2.43.0
+
--- /dev/null
+From 8e1a010cc713602931c65f2493da2bd5209353fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 14:42:34 -0700
+Subject: ice: map XDP queues to vectors in ice_vsi_map_rings_to_vectors()
+
+From: Larysa Zaremba <larysa.zaremba@intel.com>
+
+[ Upstream commit f3df4044254c98128890b512bf19cc05588f1fe5 ]
+
+ice_pf_dcb_recfg() re-maps queues to vectors with
+ice_vsi_map_rings_to_vectors(), which does not restore the previous
+state for XDP queues. This leads to no AF_XDP traffic after rebuild.
+
+Map XDP queues to vectors in ice_vsi_map_rings_to_vectors().
+Also, move the code around, so XDP queues are mapped independently only
+through .ndo_bpf().
+
+Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-5-e3563aa89b0c@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice.h | 1 +
+ drivers/net/ethernet/intel/ice/ice_base.c | 3 +
+ drivers/net/ethernet/intel/ice/ice_lib.c | 14 ++--
+ drivers/net/ethernet/intel/ice/ice_main.c | 96 ++++++++++++++---------
+ 4 files changed, 68 insertions(+), 46 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
+index a3286964d6c31..8e40f26aa5060 100644
+--- a/drivers/net/ethernet/intel/ice/ice.h
++++ b/drivers/net/ethernet/intel/ice/ice.h
+@@ -942,6 +942,7 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
+ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
+ enum ice_xdp_cfg cfg_type);
+ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type);
++void ice_map_xdp_rings(struct ice_vsi *vsi);
+ int
+ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
+index a545a7917e4fc..9d23a436d2a6a 100644
+--- a/drivers/net/ethernet/intel/ice/ice_base.c
++++ b/drivers/net/ethernet/intel/ice/ice_base.c
+@@ -860,6 +860,9 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+ }
+ rx_rings_rem -= rx_rings_per_v;
+ }
++
++ if (ice_is_xdp_ena_vsi(vsi))
++ ice_map_xdp_rings(vsi);
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index 5de7c50b439e1..acf732ce04ed6 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -2323,13 +2323,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
+ if (ret)
+ goto unroll_vector_base;
+
+- ice_vsi_map_rings_to_vectors(vsi);
+-
+- /* Associate q_vector rings to napi */
+- ice_vsi_set_napi_queues(vsi);
+-
+- vsi->stat_offsets_loaded = false;
+-
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ ret = ice_vsi_determine_xdp_res(vsi);
+ if (ret)
+@@ -2340,6 +2333,13 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
+ goto unroll_vector_base;
+ }
+
++ ice_vsi_map_rings_to_vectors(vsi);
++
++ /* Associate q_vector rings to napi */
++ ice_vsi_set_napi_queues(vsi);
++
++ vsi->stat_offsets_loaded = false;
++
+ /* ICE_VSI_CTRL does not need RSS so skip RSS processing */
+ if (vsi->type != ICE_VSI_CTRL)
+ /* Do not exit if configuring RSS had an issue, at
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index b53fe27dbed7d..10fef2e726b39 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -2670,6 +2670,60 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+ bpf_prog_put(old_prog);
+ }
+
++static struct ice_tx_ring *ice_xdp_ring_from_qid(struct ice_vsi *vsi, int qid)
++{
++ struct ice_q_vector *q_vector;
++ struct ice_tx_ring *ring;
++
++ if (static_key_enabled(&ice_xdp_locking_key))
++ return vsi->xdp_rings[qid % vsi->num_xdp_txq];
++
++ q_vector = vsi->rx_rings[qid]->q_vector;
++ ice_for_each_tx_ring(ring, q_vector->tx)
++ if (ice_ring_is_xdp(ring))
++ return ring;
++
++ return NULL;
++}
++
++/**
++ * ice_map_xdp_rings - Map XDP rings to interrupt vectors
++ * @vsi: the VSI with XDP rings being configured
++ *
++ * Map XDP rings to interrupt vectors and perform the configuration steps
++ * dependent on the mapping.
++ */
++void ice_map_xdp_rings(struct ice_vsi *vsi)
++{
++ int xdp_rings_rem = vsi->num_xdp_txq;
++ int v_idx, q_idx;
++
++ /* follow the logic from ice_vsi_map_rings_to_vectors */
++ ice_for_each_q_vector(vsi, v_idx) {
++ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
++ int xdp_rings_per_v, q_id, q_base;
++
++ xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
++ vsi->num_q_vectors - v_idx);
++ q_base = vsi->num_xdp_txq - xdp_rings_rem;
++
++ for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
++ struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
++
++ xdp_ring->q_vector = q_vector;
++ xdp_ring->next = q_vector->tx.tx_ring;
++ q_vector->tx.tx_ring = xdp_ring;
++ }
++ xdp_rings_rem -= xdp_rings_per_v;
++ }
++
++ ice_for_each_rxq(vsi, q_idx) {
++ vsi->rx_rings[q_idx]->xdp_ring = ice_xdp_ring_from_qid(vsi,
++ q_idx);
++ ice_tx_xsk_pool(vsi, q_idx);
++ }
++}
++
+ /**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+@@ -2682,7 +2736,6 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
+ enum ice_xdp_cfg cfg_type)
+ {
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+- int xdp_rings_rem = vsi->num_xdp_txq;
+ struct ice_pf *pf = vsi->back;
+ struct ice_qs_cfg xdp_qs_cfg = {
+ .qs_mutex = &pf->avail_q_mutex,
+@@ -2695,8 +2748,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
+ .mapping_mode = ICE_VSI_MAP_CONTIG
+ };
+ struct device *dev;
+- int i, v_idx;
+- int status;
++ int status, i;
+
+ dev = ice_pf_to_dev(pf);
+ vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
+@@ -2715,42 +2767,6 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
+ if (ice_xdp_alloc_setup_rings(vsi))
+ goto clear_xdp_rings;
+
+- /* follow the logic from ice_vsi_map_rings_to_vectors */
+- ice_for_each_q_vector(vsi, v_idx) {
+- struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+- int xdp_rings_per_v, q_id, q_base;
+-
+- xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+- vsi->num_q_vectors - v_idx);
+- q_base = vsi->num_xdp_txq - xdp_rings_rem;
+-
+- for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+- struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
+-
+- xdp_ring->q_vector = q_vector;
+- xdp_ring->next = q_vector->tx.tx_ring;
+- q_vector->tx.tx_ring = xdp_ring;
+- }
+- xdp_rings_rem -= xdp_rings_per_v;
+- }
+-
+- ice_for_each_rxq(vsi, i) {
+- if (static_key_enabled(&ice_xdp_locking_key)) {
+- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
+- } else {
+- struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
+- struct ice_tx_ring *ring;
+-
+- ice_for_each_tx_ring(ring, q_vector->tx) {
+- if (ice_ring_is_xdp(ring)) {
+- vsi->rx_rings[i]->xdp_ring = ring;
+- break;
+- }
+- }
+- }
+- ice_tx_xsk_pool(vsi, i);
+- }
+-
+ /* omit the scheduler update if in reset path; XDP queues will be
+ * taken into account at the end of ice_vsi_rebuild, where
+ * ice_cfg_vsi_lan is being called
+@@ -2758,6 +2774,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
+ if (cfg_type == ICE_XDP_CFG_PART)
+ return 0;
+
++ ice_map_xdp_rings(vsi);
++
+ /* tell the Tx scheduler that right now we have
+ * additional queues
+ */
+--
+2.43.0
+
--- /dev/null
+From 27deb621c3bdc4de69213e3bd52baa80dc21628e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 14:42:32 -0700
+Subject: ice: remove af_xdp_zc_qps bitmap
+
+From: Larysa Zaremba <larysa.zaremba@intel.com>
+
+[ Upstream commit adbf5a42341f6ea038d3626cd4437d9f0ad0b2dd ]
+
+Referenced commit has introduced a bitmap to distinguish between ZC and
+copy-mode AF_XDP queues, because xsk_get_pool_from_qid() does not do this
+for us.
+
+The bitmap would be especially useful when restoring previous state after
+rebuild, if only it was not reallocated in the process. This leads to e.g.
+xdpsock dying after changing number of queues.
+
+Instead of preserving the bitmap during the rebuild, remove it completely
+and distinguish between ZC and copy-mode queues based on the presence of
+a device associated with the pool.
+
+Fixes: e102db780e1c ("ice: track AF_XDP ZC enabled queues in bitmap")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-3-e3563aa89b0c@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice.h | 32 ++++++++++++++++--------
+ drivers/net/ethernet/intel/ice/ice_lib.c | 8 ------
+ drivers/net/ethernet/intel/ice/ice_xsk.c | 13 +++++-----
+ 3 files changed, 27 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
+index 365c03d1c4622..ca9b56d625841 100644
+--- a/drivers/net/ethernet/intel/ice/ice.h
++++ b/drivers/net/ethernet/intel/ice/ice.h
+@@ -412,7 +412,6 @@ struct ice_vsi {
+ struct ice_tc_cfg tc_cfg;
+ struct bpf_prog *xdp_prog;
+ struct ice_tx_ring **xdp_rings; /* XDP ring array */
+- unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */
+ u16 num_xdp_txq; /* Used XDP queues */
+ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+
+@@ -748,6 +747,25 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
+ ring->flags |= ICE_TX_FLAGS_RING_XDP;
+ }
+
++/**
++ * ice_get_xp_from_qid - get ZC XSK buffer pool bound to a queue ID
++ * @vsi: pointer to VSI
++ * @qid: index of a queue to look at XSK buff pool presence
++ *
++ * Return: A pointer to xsk_buff_pool structure if there is a buffer pool
++ * attached and configured as zero-copy, NULL otherwise.
++ */
++static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi,
++ u16 qid)
++{
++ struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
++
++ if (!ice_is_xdp_ena_vsi(vsi))
++ return NULL;
++
++ return (pool && pool->dev) ? pool : NULL;
++}
++
+ /**
+ * ice_xsk_pool - get XSK buffer pool bound to a ring
+ * @ring: Rx ring to use
+@@ -760,10 +778,7 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
+ struct ice_vsi *vsi = ring->vsi;
+ u16 qid = ring->q_index;
+
+- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
+- return NULL;
+-
+- return xsk_get_pool_from_qid(vsi->netdev, qid);
++ return ice_get_xp_from_qid(vsi, qid);
+ }
+
+ /**
+@@ -788,12 +803,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
+ if (!ring)
+ return;
+
+- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) {
+- ring->xsk_pool = NULL;
+- return;
+- }
+-
+- ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid);
++ ring->xsk_pool = ice_get_xp_from_qid(vsi, qid);
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index 558422120312b..7d401a4dc4513 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -117,14 +117,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
+ if (!vsi->q_vectors)
+ goto err_vectors;
+
+- vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL);
+- if (!vsi->af_xdp_zc_qps)
+- goto err_zc_qps;
+-
+ return 0;
+
+-err_zc_qps:
+- devm_kfree(dev, vsi->q_vectors);
+ err_vectors:
+ devm_kfree(dev, vsi->rxq_map);
+ err_rxq_map:
+@@ -328,8 +322,6 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
+
+ dev = ice_pf_to_dev(pf);
+
+- bitmap_free(vsi->af_xdp_zc_qps);
+- vsi->af_xdp_zc_qps = NULL;
+ /* free the ring and vector containers */
+ devm_kfree(dev, vsi->q_vectors);
+ vsi->q_vectors = NULL;
+diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
+index 1857220d27fee..86a865788e345 100644
+--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
+@@ -269,7 +269,6 @@ static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
+ if (!pool)
+ return -EINVAL;
+
+- clear_bit(qid, vsi->af_xdp_zc_qps);
+ xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
+
+ return 0;
+@@ -300,8 +299,6 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+ if (err)
+ return err;
+
+- set_bit(qid, vsi->af_xdp_zc_qps);
+-
+ return 0;
+ }
+
+@@ -349,11 +346,13 @@ ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present)
+ int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
+ {
+ struct ice_rx_ring *rx_ring;
+- unsigned long q;
++ uint i;
++
++ ice_for_each_rxq(vsi, i) {
++ rx_ring = vsi->rx_rings[i];
++ if (!rx_ring->xsk_pool)
++ continue;
+
+- for_each_set_bit(q, vsi->af_xdp_zc_qps,
+- max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) {
+- rx_ring = vsi->rx_rings[q];
+ if (ice_realloc_rx_xdp_bufs(rx_ring, zc))
+ return -ENOMEM;
+ }
+--
+2.43.0
+
--- /dev/null
+From efad0a522eb2397015b43fb82a02e6b5ffa2a8c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 14:42:35 -0700
+Subject: igc: Fix Energy Efficient Ethernet support declaration
+
+From: Sasha Neftin <sasha.neftin@intel.com>
+
+[ Upstream commit 7d67d11fbe194f71298263f48e33ae2afa38197e ]
+
+The commit 01cf893bf0f4 ("net: intel: i40e/igc: Remove setting Autoneg in
+EEE capabilities") removed SUPPORTED_Autoneg field but left inappropriate
+ethtool_keee structure initialization. When "ethtool --show <device>"
+(get_eee) invoke, the 'ethtool_keee' structure was accidentally overridden.
+Remove the 'ethtool_keee' overriding and add EEE declaration as per IEEE
+specification that allows reporting Energy Efficient Ethernet capabilities.
+
+Examples:
+Before fix:
+ethtool --show-eee enp174s0
+EEE settings for enp174s0:
+ EEE status: not supported
+
+After fix:
+EEE settings for enp174s0:
+ EEE status: disabled
+ Tx LPI: disabled
+ Supported EEE link modes: 100baseT/Full
+ 1000baseT/Full
+ 2500baseT/Full
+
+Fixes: 01cf893bf0f4 ("net: intel: i40e/igc: Remove setting Autoneg in EEE capabilities")
+Suggested-by: Dima Ruinskiy <dima.ruinskiy@intel.com>
+Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-6-e3563aa89b0c@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_ethtool.c | 9 +++++++--
+ drivers/net/ethernet/intel/igc/igc_main.c | 4 ++++
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+index 1a64f1ca6ca86..e699412d22f68 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
++++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
+@@ -1629,12 +1629,17 @@ static int igc_ethtool_get_eee(struct net_device *netdev,
+ struct igc_hw *hw = &adapter->hw;
+ u32 eeer;
+
++ linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
++ edata->supported);
++ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
++ edata->supported);
++ linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
++ edata->supported);
++
+ if (hw->dev_spec._base.eee_enable)
+ mii_eee_cap1_mod_linkmode_t(edata->advertised,
+ adapter->eee_advert);
+
+- *edata = adapter->eee;
+-
+ eeer = rd32(IGC_EEER);
+
+ /* EEE status on negotiated link */
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 4d975d620a8e4..58bc96021bb4c 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -12,6 +12,7 @@
+ #include <linux/bpf_trace.h>
+ #include <net/xdp_sock_drv.h>
+ #include <linux/pci.h>
++#include <linux/mdio.h>
+
+ #include <net/ipv6.h>
+
+@@ -4876,6 +4877,9 @@ void igc_up(struct igc_adapter *adapter)
+ /* start the watchdog. */
+ hw->mac.get_link_status = true;
+ schedule_work(&adapter->watchdog_task);
++
++ adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T |
++ MDIO_EEE_2_5GT;
+ }
+
+ /**
+--
+2.43.0
+
--- /dev/null
+From 3fe8b50c2e7bcbba07f2dab179e4b5e671ee9d18 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 04:57:55 +0000
+Subject: ionic: fix kernel panic in XDP_TX action
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 491aee894a08bc9b8bb52e7363b9d4bc6403f363 ]
+
+In the XDP_TX path, ionic driver sends a packet to the TX path with rx
+page and corresponding dma address.
+After tx is done, ionic_tx_clean() frees that page.
+But RX ring buffer isn't reset to NULL.
+So, it uses a freed page, which causes kernel panic.
+
+BUG: unable to handle page fault for address: ffff8881576c110c
+PGD 773801067 P4D 773801067 PUD 87f086067 PMD 87efca067 PTE 800ffffea893e060
+Oops: Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN NOPTI
+CPU: 1 PID: 25 Comm: ksoftirqd/1 Not tainted 6.9.0+ #11
+Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021
+RIP: 0010:bpf_prog_f0b8caeac1068a55_balancer_ingress+0x3b/0x44f
+Code: 00 53 41 55 41 56 41 57 b8 01 00 00 00 48 8b 5f 08 4c 8b 77 00 4c 89 f7 48 83 c7 0e 48 39 d8
+RSP: 0018:ffff888104e6fa28 EFLAGS: 00010283
+RAX: 0000000000000002 RBX: ffff8881576c1140 RCX: 0000000000000002
+RDX: ffffffffc0051f64 RSI: ffffc90002d33048 RDI: ffff8881576c110e
+RBP: ffff888104e6fa88 R08: 0000000000000000 R09: ffffed1027a04a23
+R10: 0000000000000000 R11: 0000000000000000 R12: ffff8881b03a21a8
+R13: ffff8881589f800f R14: ffff8881576c1100 R15: 00000001576c1100
+FS: 0000000000000000(0000) GS:ffff88881ae00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffff8881576c110c CR3: 0000000767a90000 CR4: 00000000007506f0
+PKRU: 55555554
+Call Trace:
+<TASK>
+? __die+0x20/0x70
+? page_fault_oops+0x254/0x790
+? __pfx_page_fault_oops+0x10/0x10
+? __pfx_is_prefetch.constprop.0+0x10/0x10
+? search_bpf_extables+0x165/0x260
+? fixup_exception+0x4a/0x970
+? exc_page_fault+0xcb/0xe0
+? asm_exc_page_fault+0x22/0x30
+? 0xffffffffc0051f64
+? bpf_prog_f0b8caeac1068a55_balancer_ingress+0x3b/0x44f
+? do_raw_spin_unlock+0x54/0x220
+ionic_rx_service+0x11ab/0x3010 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
+? ionic_tx_clean+0x29b/0xc60 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
+? __pfx_ionic_tx_clean+0x10/0x10 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
+? __pfx_ionic_rx_service+0x10/0x10 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
+? ionic_tx_cq_service+0x25d/0xa00 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
+? __pfx_ionic_rx_service+0x10/0x10 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
+ionic_cq_service+0x69/0x150 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
+ionic_txrx_napi+0x11a/0x540 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
+__napi_poll.constprop.0+0xa0/0x440
+net_rx_action+0x7e7/0xc30
+? __pfx_net_rx_action+0x10/0x10
+
+Fixes: 8eeed8373e1c ("ionic: Add XDP_TX support")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
+Reviewed-by: Brett Creeley <brett.creeley@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+index 5dba6d2d633cb..2427610f4306d 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+@@ -586,6 +586,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
+ netdev_dbg(netdev, "tx ionic_xdp_post_frame err %d\n", err);
+ goto out_xdp_abort;
+ }
++ buf_info->page = NULL;
+ stats->xdp_tx++;
+
+ /* the Tx completion will free the buffers */
+--
+2.43.0
+
--- /dev/null
+From 207f1436b6f66887f65b1f79ea5e7290f6932452 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 19:35:49 +0000
+Subject: ipv6: fix possible race in __fib6_drop_pcpu_from()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit b01e1c030770ff3b4fe37fc7cc6bca03f594133f ]
+
+syzbot found a race in __fib6_drop_pcpu_from() [1]
+
+If compiler reads more than once (*ppcpu_rt),
+second read could read NULL, if another cpu clears
+the value in rt6_get_pcpu_route().
+
+Add a READ_ONCE() to prevent this race.
+
+Also add rcu_read_lock()/rcu_read_unlock() because
+we rely on RCU protection while dereferencing pcpu_rt.
+
+[1]
+
+Oops: general protection fault, probably for non-canonical address 0xdffffc0000000012: 0000 [#1] PREEMPT SMP KASAN PTI
+KASAN: null-ptr-deref in range [0x0000000000000090-0x0000000000000097]
+CPU: 0 PID: 7543 Comm: kworker/u8:17 Not tainted 6.10.0-rc1-syzkaller-00013-g2bfcfd584ff5 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024
+Workqueue: netns cleanup_net
+ RIP: 0010:__fib6_drop_pcpu_from.part.0+0x10a/0x370 net/ipv6/ip6_fib.c:984
+Code: f8 48 c1 e8 03 80 3c 28 00 0f 85 16 02 00 00 4d 8b 3f 4d 85 ff 74 31 e8 74 a7 fa f7 49 8d bf 90 00 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 0f 85 1e 02 00 00 49 8b 87 90 00 00 00 48 8b 0c 24 48
+RSP: 0018:ffffc900040df070 EFLAGS: 00010206
+RAX: 0000000000000012 RBX: 0000000000000001 RCX: ffffffff89932e16
+RDX: ffff888049dd1e00 RSI: ffffffff89932d7c RDI: 0000000000000091
+RBP: dffffc0000000000 R08: 0000000000000005 R09: 0000000000000007
+R10: 0000000000000001 R11: 0000000000000006 R12: ffff88807fa080b8
+R13: fffffbfff1a9a07d R14: ffffed100ff41022 R15: 0000000000000001
+FS: 0000000000000000(0000) GS:ffff8880b9200000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000001b32c26000 CR3: 000000005d56e000 CR4: 00000000003526f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ __fib6_drop_pcpu_from net/ipv6/ip6_fib.c:966 [inline]
+ fib6_drop_pcpu_from net/ipv6/ip6_fib.c:1027 [inline]
+ fib6_purge_rt+0x7f2/0x9f0 net/ipv6/ip6_fib.c:1038
+ fib6_del_route net/ipv6/ip6_fib.c:1998 [inline]
+ fib6_del+0xa70/0x17b0 net/ipv6/ip6_fib.c:2043
+ fib6_clean_node+0x426/0x5b0 net/ipv6/ip6_fib.c:2205
+ fib6_walk_continue+0x44f/0x8d0 net/ipv6/ip6_fib.c:2127
+ fib6_walk+0x182/0x370 net/ipv6/ip6_fib.c:2175
+ fib6_clean_tree+0xd7/0x120 net/ipv6/ip6_fib.c:2255
+ __fib6_clean_all+0x100/0x2d0 net/ipv6/ip6_fib.c:2271
+ rt6_sync_down_dev net/ipv6/route.c:4906 [inline]
+ rt6_disable_ip+0x7ed/0xa00 net/ipv6/route.c:4911
+ addrconf_ifdown.isra.0+0x117/0x1b40 net/ipv6/addrconf.c:3855
+ addrconf_notify+0x223/0x19e0 net/ipv6/addrconf.c:3778
+ notifier_call_chain+0xb9/0x410 kernel/notifier.c:93
+ call_netdevice_notifiers_info+0xbe/0x140 net/core/dev.c:1992
+ call_netdevice_notifiers_extack net/core/dev.c:2030 [inline]
+ call_netdevice_notifiers net/core/dev.c:2044 [inline]
+ dev_close_many+0x333/0x6a0 net/core/dev.c:1585
+ unregister_netdevice_many_notify+0x46d/0x19f0 net/core/dev.c:11193
+ unregister_netdevice_many net/core/dev.c:11276 [inline]
+ default_device_exit_batch+0x85b/0xae0 net/core/dev.c:11759
+ ops_exit_list+0x128/0x180 net/core/net_namespace.c:178
+ cleanup_net+0x5b7/0xbf0 net/core/net_namespace.c:640
+ process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231
+ process_scheduled_works kernel/workqueue.c:3312 [inline]
+ worker_thread+0x6c8/0xf70 kernel/workqueue.c:3393
+ kthread+0x2c1/0x3a0 kernel/kthread.c:389
+ ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
+ ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
+
+Fixes: d52d3997f843 ("ipv6: Create percpu rt6_info")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Martin KaFai Lau <kafai@fb.com>
+Link: https://lore.kernel.org/r/20240604193549.981839-1-edumazet@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6_fib.c | 6 +++++-
+ net/ipv6/route.c | 1 +
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
+index c1f62352a4814..1ace4ac3ee04c 100644
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -965,6 +965,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
+ if (!fib6_nh->rt6i_pcpu)
+ return;
+
++ rcu_read_lock();
+ /* release the reference to this fib entry from
+ * all of its cached pcpu routes
+ */
+@@ -973,7 +974,9 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
+ struct rt6_info *pcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+- pcpu_rt = *ppcpu_rt;
++
++ /* Paired with xchg() in rt6_get_pcpu_route() */
++ pcpu_rt = READ_ONCE(*ppcpu_rt);
+
+ /* only dropping the 'from' reference if the cached route
+ * is using 'match'. The cached pcpu_rt->from only changes
+@@ -987,6 +990,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
+ fib6_info_release(from);
+ }
+ }
++ rcu_read_unlock();
+ }
+
+ struct fib6_nh_pcpu_arg {
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index f090e7bcb784f..bca6f33c7bb9e 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1409,6 +1409,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
+ struct rt6_info *prev, **p;
+
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
++ /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */
+ prev = xchg(p, NULL);
+ if (prev) {
+ dst_dev_put(&prev->dst);
+--
+2.43.0
+
--- /dev/null
+From 6eb925c6292ada81ade142facd48e8afac1a12f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 May 2024 13:26:32 +0000
+Subject: ipv6: ioam: block BH from ioam6_output()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2fe40483ec257de2a0d819ef88e3e76c7e261319 ]
+
+As explained in commit 1378817486d6 ("tipc: block BH
+before using dst_cache"), net/core/dst_cache.c
+helpers need to be called with BH disabled.
+
+Disabling preemption in ioam6_output() is not good enough,
+because ioam6_output() is called from process context,
+lwtunnel_output() only uses rcu_read_lock().
+
+We might be interrupted by a softirq, re-enter ioam6_output()
+and corrupt dst_cache data structures.
+
+Fix the race by using local_bh_disable() instead of
+preempt_disable().
+
+Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Justin Iurman <justin.iurman@uliege.be>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://lore.kernel.org/r/20240531132636.2637995-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ioam6_iptunnel.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
+index 7563f8c6aa87c..bf7120ecea1eb 100644
+--- a/net/ipv6/ioam6_iptunnel.c
++++ b/net/ipv6/ioam6_iptunnel.c
+@@ -351,9 +351,9 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ goto drop;
+
+ if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
+- preempt_disable();
++ local_bh_disable();
+ dst = dst_cache_get(&ilwt->cache);
+- preempt_enable();
++ local_bh_enable();
+
+ if (unlikely(!dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+@@ -373,9 +373,9 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ goto drop;
+ }
+
+- preempt_disable();
++ local_bh_disable();
+ dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
+- preempt_enable();
++ local_bh_enable();
+ }
+
+ skb_dst_drop(skb);
+--
+2.43.0
+
--- /dev/null
+From a129d96350918ce3e77d0d1986b1456300e7538b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 May 2024 13:26:34 +0000
+Subject: ipv6: sr: block BH in seg6_output_core() and seg6_input_core()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c0b98ac1cc104f48763cdb27b1e9ac25fd81fc90 ]
+
+As explained in commit 1378817486d6 ("tipc: block BH
+before using dst_cache"), net/core/dst_cache.c
+helpers need to be called with BH disabled.
+
+Disabling preemption in seg6_output_core() is not good enough,
+because seg6_output_core() is called from process context,
+lwtunnel_output() only uses rcu_read_lock().
+
+We might be interrupted by a softirq, re-enter seg6_output_core()
+and corrupt dst_cache data structures.
+
+Fix the race by using local_bh_disable() instead of
+preempt_disable().
+
+Apply a similar change in seg6_input_core().
+
+Fixes: fa79581ea66c ("ipv6: sr: fix several BUGs when preemption is enabled")
+Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: David Lebrun <dlebrun@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://lore.kernel.org/r/20240531132636.2637995-4-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/seg6_iptunnel.c | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
+index a75df2ec8db0d..098632adc9b5a 100644
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -464,23 +464,21 @@ static int seg6_input_core(struct net *net, struct sock *sk,
+
+ slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
+- preempt_disable();
++ local_bh_disable();
+ dst = dst_cache_get(&slwt->cache);
+- preempt_enable();
+
+ if (!dst) {
+ ip6_route_input(skb);
+ dst = skb_dst(skb);
+ if (!dst->error) {
+- preempt_disable();
+ dst_cache_set_ip6(&slwt->cache, dst,
+ &ipv6_hdr(skb)->saddr);
+- preempt_enable();
+ }
+ } else {
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+ }
++ local_bh_enable();
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+@@ -536,9 +534,9 @@ static int seg6_output_core(struct net *net, struct sock *sk,
+
+ slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
+- preempt_disable();
++ local_bh_disable();
+ dst = dst_cache_get(&slwt->cache);
+- preempt_enable();
++ local_bh_enable();
+
+ if (unlikely(!dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+@@ -558,9 +556,9 @@ static int seg6_output_core(struct net *net, struct sock *sk,
+ goto drop;
+ }
+
+- preempt_disable();
++ local_bh_disable();
+ dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+- preempt_enable();
++ local_bh_enable();
+ }
+
+ skb_dst_drop(skb);
+--
+2.43.0
+
--- /dev/null
+From e685f5611df4f81294aafd029551434d10a292fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 May 2024 04:46:44 +0000
+Subject: KVM: SEV-ES: Delegate LBR virtualization to the processor
+
+From: Ravi Bangoria <ravi.bangoria@amd.com>
+
+[ Upstream commit b7e4be0a224fe5c6be30c1c8bdda8d2317ad6ba4 ]
+
+As documented in APM[1], LBR Virtualization must be enabled for SEV-ES
+guests. Although KVM currently enforces LBRV for SEV-ES guests, there
+are multiple issues with it:
+
+o MSR_IA32_DEBUGCTLMSR is still intercepted. Since MSR_IA32_DEBUGCTLMSR
+ interception is used to dynamically toggle LBRV for performance reasons,
+ this can be fatal for SEV-ES guests. For ex SEV-ES guest on Zen3:
+
+ [guest ~]# wrmsr 0x1d9 0x4
+ KVM: entry failed, hardware error 0xffffffff
+ EAX=00000004 EBX=00000000 ECX=000001d9 EDX=00000000
+
+ Fix this by never intercepting MSR_IA32_DEBUGCTLMSR for SEV-ES guests.
+ No additional save/restore logic is required since MSR_IA32_DEBUGCTLMSR
+ is of swap type A.
+
+o KVM will disable LBRV if userspace sets MSR_IA32_DEBUGCTLMSR before the
+ VMSA is encrypted. Fix this by moving LBRV enablement code post VMSA
+ encryption.
+
+[1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June
+ 2023, Vol 2, 15.35.2 Enabling SEV-ES.
+ https://bugzilla.kernel.org/attachment.cgi?id=304653
+
+Fixes: 376c6d285017 ("KVM: SVM: Provide support for SEV-ES vCPU creation/loading")
+Co-developed-by: Nikunj A Dadhania <nikunj@amd.com>
+Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
+Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
+Message-ID: <20240531044644.768-4-ravi.bangoria@amd.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/svm/sev.c | 13 ++++++++-----
+ arch/x86/kvm/svm/svm.c | 8 +++++++-
+ arch/x86/kvm/svm/svm.h | 3 ++-
+ 3 files changed, 17 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index 43b7d76a27a56..4471b4e08d23d 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -666,6 +666,14 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ return ret;
+
+ vcpu->arch.guest_state_protected = true;
++
++ /*
++ * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it
++ * only after setting guest_state_protected because KVM_SET_MSRS allows
++ * dynamic toggling of LBRV (for performance reason) on write access to
++ * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
++ */
++ svm_enable_lbrv(vcpu);
+ return 0;
+ }
+
+@@ -3040,7 +3048,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+
+ svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
+- svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+
+ /*
+ * An SEV-ES guest requires a VMSA area that is a separate from the
+@@ -3092,10 +3099,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
+ /* Clear intercepts on selected MSRs */
+ set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
+- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+ }
+
+ void sev_init_vmcb(struct vcpu_svm *svm)
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 3363e5ba0fff5..4650153afa465 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -99,6 +99,7 @@ static const struct svm_direct_access_msrs {
+ { .index = MSR_IA32_SPEC_CTRL, .always = false },
+ { .index = MSR_IA32_PRED_CMD, .always = false },
+ { .index = MSR_IA32_FLUSH_CMD, .always = false },
++ { .index = MSR_IA32_DEBUGCTLMSR, .always = false },
+ { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
+ { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
+ { .index = MSR_IA32_LASTINTFROMIP, .always = false },
+@@ -990,7 +991,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
+ vmcb_mark_dirty(to_vmcb, VMCB_LBR);
+ }
+
+-static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
++void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+@@ -1000,6 +1001,9 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+
++ if (sev_es_guest(vcpu->kvm))
++ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
++
+ /* Move the LBR msrs to the vmcb02 so that the guest can see them. */
+ if (is_guest_mode(vcpu))
+ svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
+@@ -1009,6 +1013,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
++ KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
++
+ svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
+diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
+index 4bf9af529ae03..2ed3015e03f13 100644
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -30,7 +30,7 @@
+ #define IOPM_SIZE PAGE_SIZE * 3
+ #define MSRPM_SIZE PAGE_SIZE * 2
+
+-#define MAX_DIRECT_ACCESS_MSRS 47
++#define MAX_DIRECT_ACCESS_MSRS 48
+ #define MSRPM_OFFSETS 32
+ extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
+ extern bool npt_enabled;
+@@ -544,6 +544,7 @@ u32 *svm_vcpu_alloc_msrpm(void);
+ void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
+ void svm_vcpu_free_msrpm(u32 *msrpm);
+ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
++void svm_enable_lbrv(struct kvm_vcpu *vcpu);
+ void svm_update_lbrv(struct kvm_vcpu *vcpu);
+
+ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
+--
+2.43.0
+
--- /dev/null
+From f6e2b6111122e9b2d33db9675164c2ccd6d73d83 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 May 2024 04:46:43 +0000
+Subject: KVM: SEV-ES: Disallow SEV-ES guests when X86_FEATURE_LBRV is absent
+
+From: Ravi Bangoria <ravi.bangoria@amd.com>
+
+[ Upstream commit d922056215617eedfbdbc29fe49953423686fe5e ]
+
+As documented in APM[1], LBR Virtualization must be enabled for SEV-ES
+guests. So, prevent SEV-ES guests when LBRV support is missing.
+
+[1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June
+ 2023, Vol 2, 15.35.2 Enabling SEV-ES.
+ https://bugzilla.kernel.org/attachment.cgi?id=304653
+
+Fixes: 376c6d285017 ("KVM: SVM: Provide support for SEV-ES vCPU creation/loading")
+Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
+Message-ID: <20240531044644.768-3-ravi.bangoria@amd.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/svm/sev.c | 6 ++++++
+ arch/x86/kvm/svm/svm.c | 16 +++++++---------
+ arch/x86/kvm/svm/svm.h | 1 +
+ 3 files changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index 759581bb2128d..43b7d76a27a56 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -2269,6 +2269,12 @@ void __init sev_hardware_setup(void)
+ if (!boot_cpu_has(X86_FEATURE_SEV_ES))
+ goto out;
+
++ if (!lbrv) {
++ WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV),
++ "LBRV must be present for SEV-ES support");
++ goto out;
++ }
++
+ /* Has the system been allocated ASIDs for SEV-ES? */
+ if (min_sev_asid == 1)
+ goto out;
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 308416b50b036..3363e5ba0fff5 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -215,7 +215,7 @@ int vgif = true;
+ module_param(vgif, int, 0444);
+
+ /* enable/disable LBR virtualization */
+-static int lbrv = true;
++int lbrv = true;
+ module_param(lbrv, int, 0444);
+
+ static int tsc_scaling = true;
+@@ -5260,6 +5260,12 @@ static __init int svm_hardware_setup(void)
+
+ nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
+
++ if (lbrv) {
++ if (!boot_cpu_has(X86_FEATURE_LBRV))
++ lbrv = false;
++ else
++ pr_info("LBR virtualization supported\n");
++ }
+ /*
+ * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
+ * may be modified by svm_adjust_mmio_mask()), as well as nrips.
+@@ -5313,14 +5319,6 @@ static __init int svm_hardware_setup(void)
+ svm_x86_ops.set_vnmi_pending = NULL;
+ }
+
+-
+- if (lbrv) {
+- if (!boot_cpu_has(X86_FEATURE_LBRV))
+- lbrv = false;
+- else
+- pr_info("LBR virtualization supported\n");
+- }
+-
+ if (!enable_pmu)
+ pr_info("PMU virtualization is disabled\n");
+
+diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
+index 33878efdebc82..4bf9af529ae03 100644
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -39,6 +39,7 @@ extern int vgif;
+ extern bool intercept_smi;
+ extern bool x2avic_enabled;
+ extern bool vnmi;
++extern int lbrv;
+
+ /*
+ * Clean bits in VMCB.
+--
+2.43.0
+
--- /dev/null
+From 7c14fcf7b04386e758a76bb1a7f213f03c359581 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 01:02:17 +0800
+Subject: mptcp: count CLOSE-WAIT sockets for MPTCP_MIB_CURRESTAB
+
+From: Jason Xing <kernelxing@tencent.com>
+
+[ Upstream commit 9633e9377e6af0244f7381e86b9aac5276f5be97 ]
+
+Like previous patch does in TCP, we need to adhere to RFC 1213:
+
+ "tcpCurrEstab OBJECT-TYPE
+ ...
+ The number of TCP connections for which the current state
+ is either ESTABLISHED or CLOSE- WAIT."
+
+So let's consider CLOSE-WAIT sockets.
+
+The logic of counting
+When we increment the counter?
+a) Only if we change the state to ESTABLISHED.
+
+When we decrement the counter?
+a) if the socket leaves ESTABLISHED and will never go into CLOSE-WAIT,
+say, on the client side, changing from ESTABLISHED to FIN-WAIT-1.
+b) if the socket leaves CLOSE-WAIT, say, on the server side, changing
+from CLOSE-WAIT to LAST-ACK.
+
+Fixes: d9cd27b8cd19 ("mptcp: add CurrEstab MIB counter support")
+Signed-off-by: Jason Xing <kernelxing@tencent.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 965eb69dc5de3..327dcf06edd47 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2907,9 +2907,14 @@ void mptcp_set_state(struct sock *sk, int state)
+ if (oldstate != TCP_ESTABLISHED)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ break;
+-
++ case TCP_CLOSE_WAIT:
++ /* Unlike TCP, MPTCP sk would not have the TCP_SYN_RECV state:
++ * MPTCP "accepted" sockets will be created later on. So no
++ * transition from TCP_SYN_RECV to TCP_CLOSE_WAIT.
++ */
++ break;
+ default:
+- if (oldstate == TCP_ESTABLISHED)
++ if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT)
+ MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 5ff4c9e607f13ea37ef7584778c25b6d4d06c066 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 21:25:05 +0200
+Subject: net: ethernet: mtk_eth_soc: handle dma buffer size soc specific
+
+From: Frank Wunderlich <frank-w@public-files.de>
+
+[ Upstream commit c57e558194430d10d5e5f4acd8a8655b68dade13 ]
+
+The mainline MTK ethernet driver suffers long time from rarly but
+annoying tx queue timeouts. We think that this is caused by fixed
+dma sizes hardcoded for all SoCs.
+
+We suspect this problem arises from a low level of free TX DMADs,
+the TX Ring alomost full.
+
+The transmit timeout is caused by the Tx queue not waking up. The
+Tx queue stops when the free counter is less than ring->thres, and
+it will wake up once the free counter is greater than ring->thres.
+If the CPU is too late to wake up the Tx queues, it may cause a
+transmit timeout.
+Therefore, we increased the TX and RX DMADs to improve this error
+situation.
+
+Use the dma-size implementation from SDK in a per SoC manner. In
+difference to SDK we have no RSS feature yet, so all RX/TX sizes
+should be raised from 512 to 2048 byte except fqdma on mt7988 to
+avoid the tx timeout issue.
+
+Fixes: 656e705243fd ("net-next: mediatek: add support for MT7623 ethernet")
+Suggested-by: Daniel Golle <daniel@makrotopia.org>
+Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c | 104 +++++++++++++-------
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 9 +-
+ 2 files changed, 77 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index d7d73295f0dc4..41d9b0684be74 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1131,9 +1131,9 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
+ {
+ const struct mtk_soc_data *soc = eth->soc;
+ dma_addr_t phy_ring_tail;
+- int cnt = MTK_QDMA_RING_SIZE;
++ int cnt = soc->tx.fq_dma_size;
+ dma_addr_t dma_addr;
+- int i;
++ int i, j, len;
+
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SRAM))
+ eth->scratch_ring = eth->sram_base;
+@@ -1142,40 +1142,46 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
+ cnt * soc->tx.desc_size,
+ ð->phy_scratch_ring,
+ GFP_KERNEL);
++
+ if (unlikely(!eth->scratch_ring))
+ return -ENOMEM;
+
+- eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE, GFP_KERNEL);
+- if (unlikely(!eth->scratch_head))
+- return -ENOMEM;
++ phy_ring_tail = eth->phy_scratch_ring + soc->tx.desc_size * (cnt - 1);
+
+- dma_addr = dma_map_single(eth->dma_dev,
+- eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
+- DMA_FROM_DEVICE);
+- if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
+- return -ENOMEM;
++ for (j = 0; j < DIV_ROUND_UP(soc->tx.fq_dma_size, MTK_FQ_DMA_LENGTH); j++) {
++ len = min_t(int, cnt - j * MTK_FQ_DMA_LENGTH, MTK_FQ_DMA_LENGTH);
++ eth->scratch_head[j] = kcalloc(len, MTK_QDMA_PAGE_SIZE, GFP_KERNEL);
+
+- phy_ring_tail = eth->phy_scratch_ring + soc->tx.desc_size * (cnt - 1);
++ if (unlikely(!eth->scratch_head[j]))
++ return -ENOMEM;
+
+- for (i = 0; i < cnt; i++) {
+- dma_addr_t addr = dma_addr + i * MTK_QDMA_PAGE_SIZE;
+- struct mtk_tx_dma_v2 *txd;
++ dma_addr = dma_map_single(eth->dma_dev,
++ eth->scratch_head[j], len * MTK_QDMA_PAGE_SIZE,
++ DMA_FROM_DEVICE);
+
+- txd = eth->scratch_ring + i * soc->tx.desc_size;
+- txd->txd1 = addr;
+- if (i < cnt - 1)
+- txd->txd2 = eth->phy_scratch_ring +
+- (i + 1) * soc->tx.desc_size;
++ if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
++ return -ENOMEM;
+
+- txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE);
+- if (MTK_HAS_CAPS(soc->caps, MTK_36BIT_DMA))
+- txd->txd3 |= TX_DMA_PREP_ADDR64(addr);
+- txd->txd4 = 0;
+- if (mtk_is_netsys_v2_or_greater(eth)) {
+- txd->txd5 = 0;
+- txd->txd6 = 0;
+- txd->txd7 = 0;
+- txd->txd8 = 0;
++ for (i = 0; i < cnt; i++) {
++ struct mtk_tx_dma_v2 *txd;
++
++ txd = eth->scratch_ring + (j * MTK_FQ_DMA_LENGTH + i) * soc->tx.desc_size;
++ txd->txd1 = dma_addr + i * MTK_QDMA_PAGE_SIZE;
++ if (j * MTK_FQ_DMA_LENGTH + i < cnt)
++ txd->txd2 = eth->phy_scratch_ring +
++ (j * MTK_FQ_DMA_LENGTH + i + 1) * soc->tx.desc_size;
++
++ txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE);
++ if (MTK_HAS_CAPS(soc->caps, MTK_36BIT_DMA))
++ txd->txd3 |= TX_DMA_PREP_ADDR64(dma_addr + i * MTK_QDMA_PAGE_SIZE);
++
++ txd->txd4 = 0;
++ if (mtk_is_netsys_v2_or_greater(eth)) {
++ txd->txd5 = 0;
++ txd->txd6 = 0;
++ txd->txd7 = 0;
++ txd->txd8 = 0;
++ }
+ }
+ }
+
+@@ -2457,7 +2463,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
+ if (MTK_HAS_CAPS(soc->caps, MTK_QDMA))
+ ring_size = MTK_QDMA_RING_SIZE;
+ else
+- ring_size = MTK_DMA_SIZE;
++ ring_size = soc->tx.dma_size;
+
+ ring->buf = kcalloc(ring_size, sizeof(*ring->buf),
+ GFP_KERNEL);
+@@ -2465,8 +2471,8 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
+ goto no_tx_mem;
+
+ if (MTK_HAS_CAPS(soc->caps, MTK_SRAM)) {
+- ring->dma = eth->sram_base + ring_size * sz;
+- ring->phys = eth->phy_scratch_ring + ring_size * (dma_addr_t)sz;
++ ring->dma = eth->sram_base + soc->tx.fq_dma_size * sz;
++ ring->phys = eth->phy_scratch_ring + soc->tx.fq_dma_size * (dma_addr_t)sz;
+ } else {
+ ring->dma = dma_alloc_coherent(eth->dma_dev, ring_size * sz,
+ &ring->phys, GFP_KERNEL);
+@@ -2588,6 +2594,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
+ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
+ {
+ const struct mtk_reg_map *reg_map = eth->soc->reg_map;
++ const struct mtk_soc_data *soc = eth->soc;
+ struct mtk_rx_ring *ring;
+ int rx_data_len, rx_dma_size, tx_ring_size;
+ int i;
+@@ -2595,7 +2602,7 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+ tx_ring_size = MTK_QDMA_RING_SIZE;
+ else
+- tx_ring_size = MTK_DMA_SIZE;
++ tx_ring_size = soc->tx.dma_size;
+
+ if (rx_flag == MTK_RX_FLAGS_QDMA) {
+ if (ring_no)
+@@ -2610,7 +2617,7 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
+ rx_dma_size = MTK_HW_LRO_DMA_SIZE;
+ } else {
+ rx_data_len = ETH_DATA_LEN;
+- rx_dma_size = MTK_DMA_SIZE;
++ rx_dma_size = soc->rx.dma_size;
+ }
+
+ ring->frag_size = mtk_max_frag_size(rx_data_len);
+@@ -3139,7 +3146,10 @@ static void mtk_dma_free(struct mtk_eth *eth)
+ mtk_rx_clean(eth, ð->rx_ring[i], false);
+ }
+
+- kfree(eth->scratch_head);
++ for (i = 0; i < DIV_ROUND_UP(soc->tx.fq_dma_size, MTK_FQ_DMA_LENGTH); i++) {
++ kfree(eth->scratch_head[i]);
++ eth->scratch_head[i] = NULL;
++ }
+ }
+
+ static bool mtk_hw_reset_check(struct mtk_eth *eth)
+@@ -5043,11 +5053,14 @@ static const struct mtk_soc_data mt2701_data = {
+ .desc_size = sizeof(struct mtk_tx_dma),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
++ .dma_size = MTK_DMA_SIZE(2K),
++ .fq_dma_size = MTK_DMA_SIZE(2K),
+ },
+ .rx = {
+ .desc_size = sizeof(struct mtk_rx_dma),
+ .irq_done_mask = MTK_RX_DONE_INT,
+ .dma_l4_valid = RX_DMA_L4_VALID,
++ .dma_size = MTK_DMA_SIZE(2K),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
+ },
+@@ -5067,11 +5080,14 @@ static const struct mtk_soc_data mt7621_data = {
+ .desc_size = sizeof(struct mtk_tx_dma),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
++ .dma_size = MTK_DMA_SIZE(2K),
++ .fq_dma_size = MTK_DMA_SIZE(2K),
+ },
+ .rx = {
+ .desc_size = sizeof(struct mtk_rx_dma),
+ .irq_done_mask = MTK_RX_DONE_INT,
+ .dma_l4_valid = RX_DMA_L4_VALID,
++ .dma_size = MTK_DMA_SIZE(2K),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
+ },
+@@ -5093,11 +5109,14 @@ static const struct mtk_soc_data mt7622_data = {
+ .desc_size = sizeof(struct mtk_tx_dma),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
++ .dma_size = MTK_DMA_SIZE(2K),
++ .fq_dma_size = MTK_DMA_SIZE(2K),
+ },
+ .rx = {
+ .desc_size = sizeof(struct mtk_rx_dma),
+ .irq_done_mask = MTK_RX_DONE_INT,
+ .dma_l4_valid = RX_DMA_L4_VALID,
++ .dma_size = MTK_DMA_SIZE(2K),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
+ },
+@@ -5118,11 +5137,14 @@ static const struct mtk_soc_data mt7623_data = {
+ .desc_size = sizeof(struct mtk_tx_dma),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
++ .dma_size = MTK_DMA_SIZE(2K),
++ .fq_dma_size = MTK_DMA_SIZE(2K),
+ },
+ .rx = {
+ .desc_size = sizeof(struct mtk_rx_dma),
+ .irq_done_mask = MTK_RX_DONE_INT,
+ .dma_l4_valid = RX_DMA_L4_VALID,
++ .dma_size = MTK_DMA_SIZE(2K),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
+ },
+@@ -5141,11 +5163,14 @@ static const struct mtk_soc_data mt7629_data = {
+ .desc_size = sizeof(struct mtk_tx_dma),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
++ .dma_size = MTK_DMA_SIZE(2K),
++ .fq_dma_size = MTK_DMA_SIZE(2K),
+ },
+ .rx = {
+ .desc_size = sizeof(struct mtk_rx_dma),
+ .irq_done_mask = MTK_RX_DONE_INT,
+ .dma_l4_valid = RX_DMA_L4_VALID,
++ .dma_size = MTK_DMA_SIZE(2K),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
+ },
+@@ -5167,6 +5192,8 @@ static const struct mtk_soc_data mt7981_data = {
+ .desc_size = sizeof(struct mtk_tx_dma_v2),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
+ .dma_len_offset = 8,
++ .dma_size = MTK_DMA_SIZE(2K),
++ .fq_dma_size = MTK_DMA_SIZE(2K),
+ },
+ .rx = {
+ .desc_size = sizeof(struct mtk_rx_dma),
+@@ -5174,6 +5201,7 @@ static const struct mtk_soc_data mt7981_data = {
+ .dma_l4_valid = RX_DMA_L4_VALID_V2,
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
++ .dma_size = MTK_DMA_SIZE(2K),
+ },
+ };
+
+@@ -5193,6 +5221,8 @@ static const struct mtk_soc_data mt7986_data = {
+ .desc_size = sizeof(struct mtk_tx_dma_v2),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
+ .dma_len_offset = 8,
++ .dma_size = MTK_DMA_SIZE(2K),
++ .fq_dma_size = MTK_DMA_SIZE(2K),
+ },
+ .rx = {
+ .desc_size = sizeof(struct mtk_rx_dma),
+@@ -5200,6 +5230,7 @@ static const struct mtk_soc_data mt7986_data = {
+ .dma_l4_valid = RX_DMA_L4_VALID_V2,
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
++ .dma_size = MTK_DMA_SIZE(2K),
+ },
+ };
+
+@@ -5219,6 +5250,8 @@ static const struct mtk_soc_data mt7988_data = {
+ .desc_size = sizeof(struct mtk_tx_dma_v2),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
+ .dma_len_offset = 8,
++ .dma_size = MTK_DMA_SIZE(2K),
++ .fq_dma_size = MTK_DMA_SIZE(4K),
+ },
+ .rx = {
+ .desc_size = sizeof(struct mtk_rx_dma_v2),
+@@ -5226,6 +5259,7 @@ static const struct mtk_soc_data mt7988_data = {
+ .dma_l4_valid = RX_DMA_L4_VALID_V2,
+ .dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
+ .dma_len_offset = 8,
++ .dma_size = MTK_DMA_SIZE(2K),
+ },
+ };
+
+@@ -5240,6 +5274,7 @@ static const struct mtk_soc_data rt5350_data = {
+ .desc_size = sizeof(struct mtk_tx_dma),
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
++ .dma_size = MTK_DMA_SIZE(2K),
+ },
+ .rx = {
+ .desc_size = sizeof(struct mtk_rx_dma),
+@@ -5247,6 +5282,7 @@ static const struct mtk_soc_data rt5350_data = {
+ .dma_l4_valid = RX_DMA_L4_VALID_PDMA,
+ .dma_max_len = MTK_TX_DMA_BUF_LEN,
+ .dma_len_offset = 16,
++ .dma_size = MTK_DMA_SIZE(2K),
+ },
+ };
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index 39b50de1decbf..a25c33b9a4f34 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -32,7 +32,9 @@
+ #define MTK_TX_DMA_BUF_LEN 0x3fff
+ #define MTK_TX_DMA_BUF_LEN_V2 0xffff
+ #define MTK_QDMA_RING_SIZE 2048
+-#define MTK_DMA_SIZE 512
++#define MTK_DMA_SIZE(x) (SZ_##x)
++#define MTK_FQ_DMA_HEAD 32
++#define MTK_FQ_DMA_LENGTH 2048
+ #define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN)
+ #define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
+ #define MTK_DMA_DUMMY_DESC 0xffffffff
+@@ -1176,6 +1178,8 @@ struct mtk_soc_data {
+ u32 desc_size;
+ u32 dma_max_len;
+ u32 dma_len_offset;
++ u32 dma_size;
++ u32 fq_dma_size;
+ } tx;
+ struct {
+ u32 desc_size;
+@@ -1183,6 +1187,7 @@ struct mtk_soc_data {
+ u32 dma_l4_valid;
+ u32 dma_max_len;
+ u32 dma_len_offset;
++ u32 dma_size;
+ } rx;
+ };
+
+@@ -1264,7 +1269,7 @@ struct mtk_eth {
+ struct napi_struct rx_napi;
+ void *scratch_ring;
+ dma_addr_t phy_scratch_ring;
+- void *scratch_head;
++ void *scratch_head[MTK_FQ_DMA_HEAD];
+ struct clk *clks[MTK_CLK_MAX];
+
+ struct mii_bus *mii_bus;
+--
+2.43.0
+
--- /dev/null
+From f92f6f19c890a216c57262bfcb522089b85ddd4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jun 2024 11:47:43 +0800
+Subject: net: ethtool: fix the error condition in
+ ethtool_get_phy_stats_ethtool()
+
+From: Su Hui <suhui@nfschina.com>
+
+[ Upstream commit 0dcc53abf58d572d34c5313de85f607cd33fc691 ]
+
+Clang static checker (scan-build) warning:
+net/ethtool/ioctl.c:line 2233, column 2
+Called function pointer is null (null dereference).
+
+Return '-EOPNOTSUPP' when 'ops->get_ethtool_phy_stats' is NULL to fix
+this typo error.
+
+Fixes: 201ed315f967 ("net/ethtool/ioctl: split ethtool_get_phy_stats into multiple helpers")
+Signed-off-by: Su Hui <suhui@nfschina.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Hariprasad Kelam <hkelam@marvell.com>
+Link: https://lore.kernel.org/r/20240605034742.921751-1-suhui@nfschina.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ethtool/ioctl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
+index 5a55270aa86e8..e645d751a5e89 100644
+--- a/net/ethtool/ioctl.c
++++ b/net/ethtool/ioctl.c
+@@ -2220,7 +2220,7 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev,
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ int n_stats, ret;
+
+- if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats)
++ if (!ops || !ops->get_sset_count || !ops->get_ethtool_phy_stats)
+ return -EOPNOTSUPP;
+
+ n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
+--
+2.43.0
+
--- /dev/null
+From 8f920f6e9e1bbb8cd79a42135d7126f65de76f8d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 00:04:43 +0300
+Subject: net/mlx5: Always stop health timer during driver removal
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit c8b3f38d2dae0397944814d691a419c451f9906f ]
+
+Currently, if teardown_hca fails to execute during driver removal, mlx5
+does not stop the health timer. Afterwards, mlx5 continue with driver
+teardown. This may lead to a UAF bug, which results in page fault
+Oops[1], since the health timer invokes after resources were freed.
+
+Hence, stop the health monitor even if teardown_hca fails.
+
+[1]
+mlx5_core 0000:18:00.0: E-Switch: Unload vfs: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
+mlx5_core 0000:18:00.0: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
+mlx5_core 0000:18:00.0: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
+mlx5_core 0000:18:00.0: E-Switch: cleanup
+mlx5_core 0000:18:00.0: wait_func:1155:(pid 1967079): TEARDOWN_HCA(0x103) timeout. Will cause a leak of a command resource
+mlx5_core 0000:18:00.0: mlx5_function_close:1288:(pid 1967079): tear_down_hca failed, skip cleanup
+BUG: unable to handle page fault for address: ffffa26487064230
+PGD 100c00067 P4D 100c00067 PUD 100e5a067 PMD 105ed7067 PTE 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+CPU: 0 PID: 0 Comm: swapper/0 Tainted: G OE ------- --- 6.7.0-68.fc38.x86_64 #1
+Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0013.121520200651 12/15/2020
+RIP: 0010:ioread32be+0x34/0x60
+RSP: 0018:ffffa26480003e58 EFLAGS: 00010292
+RAX: ffffa26487064200 RBX: ffff9042d08161a0 RCX: ffff904c108222c0
+RDX: 000000010bbf1b80 RSI: ffffffffc055ddb0 RDI: ffffa26487064230
+RBP: ffff9042d08161a0 R08: 0000000000000022 R09: ffff904c108222e8
+R10: 0000000000000004 R11: 0000000000000441 R12: ffffffffc055ddb0
+R13: ffffa26487064200 R14: ffffa26480003f00 R15: ffff904c108222c0
+FS: 0000000000000000(0000) GS:ffff904c10800000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffffa26487064230 CR3: 00000002c4420006 CR4: 00000000007706f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+PKRU: 55555554
+Call Trace:
+ <IRQ>
+ ? __die+0x23/0x70
+ ? page_fault_oops+0x171/0x4e0
+ ? exc_page_fault+0x175/0x180
+ ? asm_exc_page_fault+0x26/0x30
+ ? __pfx_poll_health+0x10/0x10 [mlx5_core]
+ ? __pfx_poll_health+0x10/0x10 [mlx5_core]
+ ? ioread32be+0x34/0x60
+ mlx5_health_check_fatal_sensors+0x20/0x100 [mlx5_core]
+ ? __pfx_poll_health+0x10/0x10 [mlx5_core]
+ poll_health+0x42/0x230 [mlx5_core]
+ ? __next_timer_interrupt+0xbc/0x110
+ ? __pfx_poll_health+0x10/0x10 [mlx5_core]
+ call_timer_fn+0x21/0x130
+ ? __pfx_poll_health+0x10/0x10 [mlx5_core]
+ __run_timers+0x222/0x2c0
+ run_timer_softirq+0x1d/0x40
+ __do_softirq+0xc9/0x2c8
+ __irq_exit_rcu+0xa6/0xc0
+ sysvec_apic_timer_interrupt+0x72/0x90
+ </IRQ>
+ <TASK>
+ asm_sysvec_apic_timer_interrupt+0x1a/0x20
+RIP: 0010:cpuidle_enter_state+0xcc/0x440
+ ? cpuidle_enter_state+0xbd/0x440
+ cpuidle_enter+0x2d/0x40
+ do_idle+0x20d/0x270
+ cpu_startup_entry+0x2a/0x30
+ rest_init+0xd0/0xd0
+ arch_call_rest_init+0xe/0x30
+ start_kernel+0x709/0xa90
+ x86_64_start_reservations+0x18/0x30
+ x86_64_start_kernel+0x96/0xa0
+ secondary_startup_64_no_verify+0x18f/0x19b
+---[ end trace 0000000000000000 ]---
+
+Fixes: 9b98d395b85d ("net/mlx5: Start health poll at earlier stage of driver load")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 6574c145dc1e2..459a836a5d9c1 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1298,6 +1298,9 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
+
+ if (!err)
+ mlx5_function_disable(dev, boot);
++ else
++ mlx5_stop_health_poll(dev, boot);
++
+ return err;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 3e7e69e895c5ce90f88011c9e2aef9d60f4f922d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 13:05:52 +0300
+Subject: net/mlx5: Fix tainted pointer delete is case of flow rules creation
+ fail
+
+From: Aleksandr Mishin <amishin@t-argos.ru>
+
+[ Upstream commit 229bedbf62b13af5aba6525ad10b62ad38d9ccb5 ]
+
+In case of flow rule creation fail in mlx5_lag_create_port_sel_table(),
+instead of previously created rules, the tainted pointer is deleted
+deveral times.
+Fix this bug by using correct flow rules pointers.
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: 352899f384d4 ("net/mlx5: Lag, use buckets in hash mode")
+Signed-off-by: Aleksandr Mishin <amishin@t-argos.ru>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240604100552.25201-1-amishin@t-argos.ru
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+index 101b3bb908638..e12bc4cd80661 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+@@ -88,9 +88,13 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
+ &dest, 1);
+ if (IS_ERR(lag_definer->rules[idx])) {
+ err = PTR_ERR(lag_definer->rules[idx]);
+- while (i--)
+- while (j--)
++ do {
++ while (j--) {
++ idx = i * ldev->buckets + j;
+ mlx5_del_flow_rules(lag_definer->rules[idx]);
++ }
++ j = ldev->buckets;
++ } while (i--);
+ goto destroy_fg;
+ }
+ }
+--
+2.43.0
+
--- /dev/null
+From 4bfecfe30b552d640ec75dc5e5ad070a35ea545e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 00:04:42 +0300
+Subject: net/mlx5: Stop waiting for PCI if pci channel is offline
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+[ Upstream commit 33afbfcc105a572159750f2ebee834a8a70fdd96 ]
+
+In case pci channel becomes offline the driver should not wait for PCI
+reads during health dump and recovery flow. The driver has timeout for
+each of these loops trying to read PCI, so it would fail anyway.
+However, in case of recovery waiting till timeout may cause the pci
+error_detected() callback fail to meet pci_dpc_recovered() wait timeout.
+
+Fixes: b3bd076f7501 ("net/mlx5: Report devlink health on FW fatal issues")
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Shay Drori <shayd@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fw.c | 4 ++++
+ drivers/net/ethernet/mellanox/mlx5/core/health.c | 8 ++++++++
+ drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c | 4 ++++
+ 3 files changed, 16 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+index e7faf7e73ca48..6c7f2471fe629 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+@@ -373,6 +373,10 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
+ do {
+ if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
+ break;
++ if (pci_channel_offline(dev->pdev)) {
++ mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n");
++ return -EACCES;
++ }
+
+ cond_resched();
+ } while (!time_after(jiffies, end));
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+index ad38e31822df1..a6329ca2d9bff 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+@@ -248,6 +248,10 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
+ do {
+ if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
+ break;
++ if (pci_channel_offline(dev->pdev)) {
++ mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n");
++ goto unlock;
++ }
+
+ msleep(20);
+ } while (!time_after(jiffies, end));
+@@ -317,6 +321,10 @@ int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
+ mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n");
+ return -ENODEV;
+ }
++ if (pci_channel_offline(dev->pdev)) {
++ mlx5_core_err(dev, "PCI channel offline, stop waiting for PCI\n");
++ return -EACCES;
++ }
+ msleep(100);
+ }
+ return 0;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
+index 6b774e0c27665..d0b595ba61101 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
+@@ -74,6 +74,10 @@ int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev)
+ ret = -EBUSY;
+ goto pci_unlock;
+ }
++ if (pci_channel_offline(dev->pdev)) {
++ ret = -EACCES;
++ goto pci_unlock;
++ }
+
+ /* Check if semaphore is already locked */
+ ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
+--
+2.43.0
+
--- /dev/null
+From 6a0d2451f18cc95174d4a4c7bab93bd6d08e46db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 14:58:55 +0800
+Subject: net/ncsi: Fix the multi thread manner of NCSI driver
+
+From: DelphineCCChiu <delphine_cc_chiu@wiwynn.com>
+
+[ Upstream commit e85e271dec0270982afed84f70dc37703fcc1d52 ]
+
+Currently NCSI driver will send several NCSI commands back to back without
+waiting the response of previous NCSI command or timeout in some state
+when NIC have multi channel. This operation against the single thread
+manner defined by NCSI SPEC(section 6.3.2.3 in DSP0222_1.1.1)
+
+According to NCSI SPEC(section 6.2.13.1 in DSP0222_1.1.1), we should probe
+one channel at a time by sending NCSI commands (Clear initial state, Get
+version ID, Get capabilities...), than repeat this steps until the max
+number of channels which we got from NCSI command (Get capabilities) has
+been probed.
+
+Fixes: e6f44ed6d04d ("net/ncsi: Package and channel management")
+Signed-off-by: DelphineCCChiu <delphine_cc_chiu@wiwynn.com>
+Link: https://lore.kernel.org/r/20240529065856.825241-1-delphine_cc_chiu@wiwynn.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ncsi/internal.h | 2 ++
+ net/ncsi/ncsi-manage.c | 73 +++++++++++++++++++++---------------------
+ net/ncsi/ncsi-rsp.c | 4 ++-
+ 3 files changed, 41 insertions(+), 38 deletions(-)
+
+diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
+index 374412ed780b6..ef0f8f73826f5 100644
+--- a/net/ncsi/internal.h
++++ b/net/ncsi/internal.h
+@@ -325,6 +325,7 @@ struct ncsi_dev_priv {
+ spinlock_t lock; /* Protect the NCSI device */
+ unsigned int package_probe_id;/* Current ID during probe */
+ unsigned int package_num; /* Number of packages */
++ unsigned int channel_probe_id;/* Current cahnnel ID during probe */
+ struct list_head packages; /* List of packages */
+ struct ncsi_channel *hot_channel; /* Channel was ever active */
+ struct ncsi_request requests[256]; /* Request table */
+@@ -343,6 +344,7 @@ struct ncsi_dev_priv {
+ bool multi_package; /* Enable multiple packages */
+ bool mlx_multi_host; /* Enable multi host Mellanox */
+ u32 package_whitelist; /* Packages to configure */
++ unsigned char channel_count; /* Num of channels to probe */
+ };
+
+ struct ncsi_cmd_arg {
+diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
+index 745c788f1d1df..5ecf611c88200 100644
+--- a/net/ncsi/ncsi-manage.c
++++ b/net/ncsi/ncsi-manage.c
+@@ -510,17 +510,19 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
+
+ break;
+ case ncsi_dev_state_suspend_gls:
+- ndp->pending_req_num = np->channel_num;
++ ndp->pending_req_num = 1;
+
+ nca.type = NCSI_PKT_CMD_GLS;
+ nca.package = np->id;
++ nca.channel = ndp->channel_probe_id;
++ ret = ncsi_xmit_cmd(&nca);
++ if (ret)
++ goto error;
++ ndp->channel_probe_id++;
+
+- nd->state = ncsi_dev_state_suspend_dcnt;
+- NCSI_FOR_EACH_CHANNEL(np, nc) {
+- nca.channel = nc->id;
+- ret = ncsi_xmit_cmd(&nca);
+- if (ret)
+- goto error;
++ if (ndp->channel_probe_id == ndp->channel_count) {
++ ndp->channel_probe_id = 0;
++ nd->state = ncsi_dev_state_suspend_dcnt;
+ }
+
+ break;
+@@ -1345,7 +1347,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
+ {
+ struct ncsi_dev *nd = &ndp->ndev;
+ struct ncsi_package *np;
+- struct ncsi_channel *nc;
+ struct ncsi_cmd_arg nca;
+ unsigned char index;
+ int ret;
+@@ -1423,23 +1424,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
+
+ nd->state = ncsi_dev_state_probe_cis;
+ break;
+- case ncsi_dev_state_probe_cis:
+- ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
+-
+- /* Clear initial state */
+- nca.type = NCSI_PKT_CMD_CIS;
+- nca.package = ndp->active_package->id;
+- for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) {
+- nca.channel = index;
+- ret = ncsi_xmit_cmd(&nca);
+- if (ret)
+- goto error;
+- }
+-
+- nd->state = ncsi_dev_state_probe_gvi;
+- if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
+- nd->state = ncsi_dev_state_probe_keep_phy;
+- break;
+ case ncsi_dev_state_probe_keep_phy:
+ ndp->pending_req_num = 1;
+
+@@ -1452,14 +1436,17 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
+
+ nd->state = ncsi_dev_state_probe_gvi;
+ break;
++ case ncsi_dev_state_probe_cis:
+ case ncsi_dev_state_probe_gvi:
+ case ncsi_dev_state_probe_gc:
+ case ncsi_dev_state_probe_gls:
+ np = ndp->active_package;
+- ndp->pending_req_num = np->channel_num;
++ ndp->pending_req_num = 1;
+
+- /* Retrieve version, capability or link status */
+- if (nd->state == ncsi_dev_state_probe_gvi)
++ /* Clear initial state Retrieve version, capability or link status */
++ if (nd->state == ncsi_dev_state_probe_cis)
++ nca.type = NCSI_PKT_CMD_CIS;
++ else if (nd->state == ncsi_dev_state_probe_gvi)
+ nca.type = NCSI_PKT_CMD_GVI;
+ else if (nd->state == ncsi_dev_state_probe_gc)
+ nca.type = NCSI_PKT_CMD_GC;
+@@ -1467,19 +1454,29 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
+ nca.type = NCSI_PKT_CMD_GLS;
+
+ nca.package = np->id;
+- NCSI_FOR_EACH_CHANNEL(np, nc) {
+- nca.channel = nc->id;
+- ret = ncsi_xmit_cmd(&nca);
+- if (ret)
+- goto error;
+- }
++ nca.channel = ndp->channel_probe_id;
+
+- if (nd->state == ncsi_dev_state_probe_gvi)
++ ret = ncsi_xmit_cmd(&nca);
++ if (ret)
++ goto error;
++
++ if (nd->state == ncsi_dev_state_probe_cis) {
++ nd->state = ncsi_dev_state_probe_gvi;
++ if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) && ndp->channel_probe_id == 0)
++ nd->state = ncsi_dev_state_probe_keep_phy;
++ } else if (nd->state == ncsi_dev_state_probe_gvi) {
+ nd->state = ncsi_dev_state_probe_gc;
+- else if (nd->state == ncsi_dev_state_probe_gc)
++ } else if (nd->state == ncsi_dev_state_probe_gc) {
+ nd->state = ncsi_dev_state_probe_gls;
+- else
++ } else {
++ nd->state = ncsi_dev_state_probe_cis;
++ ndp->channel_probe_id++;
++ }
++
++ if (ndp->channel_probe_id == ndp->channel_count) {
++ ndp->channel_probe_id = 0;
+ nd->state = ncsi_dev_state_probe_dp;
++ }
+ break;
+ case ncsi_dev_state_probe_dp:
+ ndp->pending_req_num = 1;
+@@ -1780,6 +1777,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
+ ndp->requests[i].ndp = ndp;
+ timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0);
+ }
++ ndp->channel_count = NCSI_RESERVED_CHANNEL;
+
+ spin_lock_irqsave(&ncsi_dev_lock, flags);
+ list_add_tail_rcu(&ndp->node, &ncsi_dev_list);
+@@ -1813,6 +1811,7 @@ int ncsi_start_dev(struct ncsi_dev *nd)
+
+ if (!(ndp->flags & NCSI_DEV_PROBED)) {
+ ndp->package_probe_id = 0;
++ ndp->channel_probe_id = 0;
+ nd->state = ncsi_dev_state_probe;
+ schedule_work(&ndp->work);
+ return 0;
+diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
+index bee290d0f48b6..e28be33bdf2c4 100644
+--- a/net/ncsi/ncsi-rsp.c
++++ b/net/ncsi/ncsi-rsp.c
+@@ -795,12 +795,13 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
+ struct ncsi_rsp_gc_pkt *rsp;
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct ncsi_channel *nc;
++ struct ncsi_package *np;
+ size_t size;
+
+ /* Find the channel */
+ rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp);
+ ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
+- NULL, &nc);
++ &np, &nc);
+ if (!nc)
+ return -ENODEV;
+
+@@ -835,6 +836,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
+ */
+ nc->vlan_filter.bitmap = U64_MAX;
+ nc->vlan_filter.n_vids = rsp->vlan_cnt;
++ np->ndp->channel_count = rsp->channel_cnt;
+
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 1d3bd7f1d9c86df0dc8515c4c59831b95acf1661 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 19:20:23 -0700
+Subject: net: phy: micrel: fix KSZ9477 PHY issues after suspend/resume
+
+From: Tristram Ha <tristram.ha@microchip.com>
+
+[ Upstream commit 6149db4997f582e958da675092f21c666e3b67b7 ]
+
+When the PHY is powered up after powered down most of the registers are
+reset, so the PHY setup code needs to be done again. In addition the
+interrupt register will need to be setup again so that link status
+indication works again.
+
+Fixes: 26dd2974c5b5 ("net: phy: micrel: Move KSZ9477 errata fixes to PHY driver")
+Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/micrel.c | 62 ++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 56 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index 13370439a7cae..c2d99344ade41 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -1858,7 +1858,7 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = {
+ {0x1c, 0x20, 0xeeee},
+ };
+
+-static int ksz9477_config_init(struct phy_device *phydev)
++static int ksz9477_phy_errata(struct phy_device *phydev)
+ {
+ int err;
+ int i;
+@@ -1886,16 +1886,30 @@ static int ksz9477_config_init(struct phy_device *phydev)
+ return err;
+ }
+
++ err = genphy_restart_aneg(phydev);
++ if (err)
++ return err;
++
++ return err;
++}
++
++static int ksz9477_config_init(struct phy_device *phydev)
++{
++ int err;
++
++ /* Only KSZ9897 family of switches needs this fix. */
++ if ((phydev->phy_id & 0xf) == 1) {
++ err = ksz9477_phy_errata(phydev);
++ if (err)
++ return err;
++ }
++
+ /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes
+ * in this switch shall be regarded as broken.
+ */
+ if (phydev->dev_flags & MICREL_NO_EEE)
+ phydev->eee_broken_modes = -1;
+
+- err = genphy_restart_aneg(phydev);
+- if (err)
+- return err;
+-
+ return kszphy_config_init(phydev);
+ }
+
+@@ -2004,6 +2018,42 @@ static int kszphy_resume(struct phy_device *phydev)
+ return 0;
+ }
+
++static int ksz9477_resume(struct phy_device *phydev)
++{
++ int ret;
++
++ /* No need to initialize registers if not powered down. */
++ ret = phy_read(phydev, MII_BMCR);
++ if (ret < 0)
++ return ret;
++ if (!(ret & BMCR_PDOWN))
++ return 0;
++
++ genphy_resume(phydev);
++
++ /* After switching from power-down to normal mode, an internal global
++ * reset is automatically generated. Wait a minimum of 1 ms before
++ * read/write access to the PHY registers.
++ */
++ usleep_range(1000, 2000);
++
++ /* Only KSZ9897 family of switches needs this fix. */
++ if ((phydev->phy_id & 0xf) == 1) {
++ ret = ksz9477_phy_errata(phydev);
++ if (ret)
++ return ret;
++ }
++
++ /* Enable PHY Interrupts */
++ if (phy_interrupt_is_valid(phydev)) {
++ phydev->interrupts = PHY_INTERRUPT_ENABLED;
++ if (phydev->drv->config_intr)
++ phydev->drv->config_intr(phydev);
++ }
++
++ return 0;
++}
++
+ static int kszphy_probe(struct phy_device *phydev)
+ {
+ const struct kszphy_type *type = phydev->drv->driver_data;
+@@ -4980,7 +5030,7 @@ static struct phy_driver ksphy_driver[] = {
+ .config_intr = kszphy_config_intr,
+ .handle_interrupt = kszphy_handle_interrupt,
+ .suspend = genphy_suspend,
+- .resume = genphy_resume,
++ .resume = ksz9477_resume,
+ .get_features = ksz9477_get_features,
+ } };
+
+--
+2.43.0
+
--- /dev/null
+From 6886fe0bf9e15f1576fe4ba31ee2f4c25252da11 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 May 2024 18:38:01 -0700
+Subject: net: phy: Micrel KSZ8061: fix errata solution not taking effect
+ problem
+
+From: Tristram Ha <tristram.ha@microchip.com>
+
+[ Upstream commit 0a8d3f2e3e8d8aea8af017e14227b91d5989b696 ]
+
+KSZ8061 needs to write to a MMD register at driver initialization to fix
+an errata. This worked in 5.0 kernel but not in newer kernels. The
+issue is the main phylib code no longer resets PHY at the very beginning.
+Calling phy resuming code later will reset the chip if it is already
+powered down at the beginning. This wipes out the MMD register write.
+Solution is to implement a phy resume function for KSZ8061 to take care
+of this problem.
+
+Fixes: 232ba3a51cc2 ("net: phy: Micrel KSZ8061: link failure after cable connect")
+Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/micrel.c | 42 +++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 41 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index c2d99344ade41..4b22bb6393e26 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -785,6 +785,17 @@ static int ksz8061_config_init(struct phy_device *phydev)
+ {
+ int ret;
+
++ /* Chip can be powered down by the bootstrap code. */
++ ret = phy_read(phydev, MII_BMCR);
++ if (ret < 0)
++ return ret;
++ if (ret & BMCR_PDOWN) {
++ ret = phy_write(phydev, MII_BMCR, ret & ~BMCR_PDOWN);
++ if (ret < 0)
++ return ret;
++ usleep_range(1000, 2000);
++ }
++
+ ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A);
+ if (ret)
+ return ret;
+@@ -2054,6 +2065,35 @@ static int ksz9477_resume(struct phy_device *phydev)
+ return 0;
+ }
+
++static int ksz8061_resume(struct phy_device *phydev)
++{
++ int ret;
++
++ /* This function can be called twice when the Ethernet device is on. */
++ ret = phy_read(phydev, MII_BMCR);
++ if (ret < 0)
++ return ret;
++ if (!(ret & BMCR_PDOWN))
++ return 0;
++
++ genphy_resume(phydev);
++ usleep_range(1000, 2000);
++
++ /* Re-program the value after chip is reset. */
++ ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A);
++ if (ret)
++ return ret;
++
++ /* Enable PHY Interrupts */
++ if (phy_interrupt_is_valid(phydev)) {
++ phydev->interrupts = PHY_INTERRUPT_ENABLED;
++ if (phydev->drv->config_intr)
++ phydev->drv->config_intr(phydev);
++ }
++
++ return 0;
++}
++
+ static int kszphy_probe(struct phy_device *phydev)
+ {
+ const struct kszphy_type *type = phydev->drv->driver_data;
+@@ -4876,7 +4916,7 @@ static struct phy_driver ksphy_driver[] = {
+ .config_intr = kszphy_config_intr,
+ .handle_interrupt = kszphy_handle_interrupt,
+ .suspend = kszphy_suspend,
+- .resume = kszphy_resume,
++ .resume = ksz8061_resume,
+ }, {
+ .phy_id = PHY_ID_KSZ9021,
+ .phy_id_mask = 0x000ffffe,
+--
+2.43.0
+
--- /dev/null
+From 9bbb21a87641abeccfd9fd07ed71239ca2a81b2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 15:13:03 +0800
+Subject: net: sched: sch_multiq: fix possible OOB write in multiq_tune()
+
+From: Hangyu Hua <hbh25y@gmail.com>
+
+[ Upstream commit affc18fdc694190ca7575b9a86632a73b9fe043d ]
+
+q->bands will be assigned to qopt->bands to execute subsequent code logic
+after kmalloc. So the old q->bands should not be used in kmalloc.
+Otherwise, an out-of-bounds write will occur.
+
+Fixes: c2999f7fb05b ("net: sched: multiq: don't call qdisc_put() while holding tree lock")
+Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
+Acked-by: Cong Wang <cong.wang@bytedance.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_multiq.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
+index 79e93a19d5fab..06e03f5cd7ce1 100644
+--- a/net/sched/sch_multiq.c
++++ b/net/sched/sch_multiq.c
+@@ -185,7 +185,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
+
+ qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
+
+- removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands),
++ removed = kmalloc(sizeof(*removed) * (q->max_bands - qopt->bands),
+ GFP_KERNEL);
+ if (!removed)
+ return -ENOMEM;
+--
+2.43.0
+
--- /dev/null
+From 41f8a75039c6529a48d306e97e3867317ebc8072 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 18:15:11 +0000
+Subject: net/sched: taprio: always validate TCA_TAPRIO_ATTR_PRIOMAP
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit f921a58ae20852d188f70842431ce6519c4fdc36 ]
+
+If one TCA_TAPRIO_ATTR_PRIOMAP attribute has been provided,
+taprio_parse_mqprio_opt() must validate it, or userspace
+can inject arbitrary data to the kernel, the second time
+taprio_change() is called.
+
+First call (with valid attributes) sets dev->num_tc
+to a non zero value.
+
+Second call (with arbitrary mqprio attributes)
+returns early from taprio_parse_mqprio_opt()
+and bad things can happen.
+
+Fixes: a3d43c0d56f1 ("taprio: Add support adding an admin schedule")
+Reported-by: Noam Rathaus <noamr@ssd-disclosure.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20240604181511.769870-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_taprio.c | 15 ++++++---------
+ 1 file changed, 6 insertions(+), 9 deletions(-)
+
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index 5c3f8a278fc2f..0b150b13bee7a 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1176,16 +1176,13 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
+ {
+ bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
+
+- if (!qopt && !dev->num_tc) {
+- NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
+- return -EINVAL;
+- }
+-
+- /* If num_tc is already set, it means that the user already
+- * configured the mqprio part
+- */
+- if (dev->num_tc)
++ if (!qopt) {
++ if (!dev->num_tc) {
++ NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
++ return -EINVAL;
++ }
+ return 0;
++ }
+
+ /* taprio imposes that traffic classes map 1:n to tx queues */
+ if (qopt->num_tc > dev->num_tx_queues) {
+--
+2.43.0
+
--- /dev/null
+From ed165b69aa8dfa7330d5c057dbf37fd6c8920d0c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 May 2024 16:54:17 +0800
+Subject: net/smc: avoid overwriting when adjusting sock bufsizes
+
+From: Wen Gu <guwen@linux.alibaba.com>
+
+[ Upstream commit fb0aa0781a5f457e3864da68af52c3b1f4f7fd8f ]
+
+When copying smc settings to clcsock, avoid setting clcsock's sk_sndbuf
+to sysctl_tcp_wmem[1], since this may overwrite the value set by
+tcp_sndbuf_expand() in TCP connection establishment.
+
+And the other setting sk_{snd|rcv}buf to sysctl value in
+smc_adjust_sock_bufsizes() can also be omitted since the initialization
+of smc sock and clcsock has set sk_{snd|rcv}buf to smc.sysctl_{w|r}mem
+or ipv4_sysctl_tcp_{w|r}mem[1].
+
+Fixes: 30c3c4a4497c ("net/smc: Use correct buffer sizes when switching between TCP and SMC")
+Link: https://lore.kernel.org/r/5eaf3858-e7fd-4db8-83e8-3d7a3e0e9ae2@linux.alibaba.com
+Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
+Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
+Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>, too.
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/af_smc.c | 22 ++--------------------
+ 1 file changed, 2 insertions(+), 20 deletions(-)
+
+diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
+index 4b52b3b159c0e..5f9f3d4c1df5f 100644
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -460,29 +460,11 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
+ static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+ {
+- struct net *nnet = sock_net(nsk);
+-
+ nsk->sk_userlocks = osk->sk_userlocks;
+- if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
++ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ nsk->sk_sndbuf = osk->sk_sndbuf;
+- } else {
+- if (mask == SK_FLAGS_SMC_TO_CLC)
+- WRITE_ONCE(nsk->sk_sndbuf,
+- READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+- else
+- WRITE_ONCE(nsk->sk_sndbuf,
+- 2 * READ_ONCE(nnet->smc.sysctl_wmem));
+- }
+- if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
++ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ nsk->sk_rcvbuf = osk->sk_rcvbuf;
+- } else {
+- if (mask == SK_FLAGS_SMC_TO_CLC)
+- WRITE_ONCE(nsk->sk_rcvbuf,
+- READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+- else
+- WRITE_ONCE(nsk->sk_rcvbuf,
+- 2 * READ_ONCE(nnet->smc.sysctl_rmem));
+- }
+ }
+
+ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
+--
+2.43.0
+
--- /dev/null
+From cea48164b1cbef78c15fc1afd0bff2cd8184a496 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 May 2024 16:26:07 -0700
+Subject: net: tls: fix marking packets as decrypted
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit a535d59432370343058755100ee75ab03c0e3f91 ]
+
+For TLS offload we mark packets with skb->decrypted to make sure
+they don't escape the host without getting encrypted first.
+The crypto state lives in the socket, so it may get detached
+by a call to skb_orphan(). As a safety check - the egress path
+drops all packets with skb->decrypted and no "crypto-safe" socket.
+
+The skb marking was added to sendpage only (and not sendmsg),
+because tls_device injected data into the TCP stack using sendpage.
+This special case was missed when sendpage got folded into sendmsg.
+
+Fixes: c5c37af6ecad ("tcp: Convert do_tcp_sendpages() to use MSG_SPLICE_PAGES")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240530232607.82686-1-kuba@kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 66d77faca64f6..5c79836e4c9e7 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1159,6 +1159,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
+
+ process_backlog++;
+
++#ifdef CONFIG_SKB_DECRYPTED
++ skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
++#endif
+ tcp_skb_entail(sk, skb);
+ copy = size_goal;
+
+--
+2.43.0
+
--- /dev/null
+From db34d1a20aa10db3e7b0a583f65fa28e353a30f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 11:25:00 +0300
+Subject: net: wwan: iosm: Fix tainted pointer delete is case of region
+ creation fail
+
+From: Aleksandr Mishin <amishin@t-argos.ru>
+
+[ Upstream commit b0c9a26435413b81799047a7be53255640432547 ]
+
+In case of region creation fail in ipc_devlink_create_region(), previously
+created regions delete process starts from tainted pointer which actually
+holds error code value.
+Fix this bug by decreasing region index before delete.
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: 4dcd183fbd67 ("net: wwan: iosm: devlink registration")
+Signed-off-by: Aleksandr Mishin <amishin@t-argos.ru>
+Acked-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20240604082500.20769-1-amishin@t-argos.ru
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wwan/iosm/iosm_ipc_devlink.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_devlink.c b/drivers/net/wwan/iosm/iosm_ipc_devlink.c
+index 2fe724d623c06..33c5a46f1b922 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_devlink.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_devlink.c
+@@ -210,7 +210,7 @@ static int ipc_devlink_create_region(struct iosm_devlink *devlink)
+ rc = PTR_ERR(devlink->cd_regions[i]);
+ dev_err(devlink->dev, "Devlink region fail,err %d", rc);
+ /* Delete previously created regions */
+- for ( ; i >= 0; i--)
++ for (i--; i >= 0; i--)
+ devlink_region_destroy(devlink->cd_regions[i]);
+ goto region_create_fail;
+ }
+--
+2.43.0
+
--- /dev/null
+From 142c9d9b882aeacb90240567a78a709f8c9ed10d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 20:59:44 +0530
+Subject: octeontx2-af: Always allocate PF entries from low prioriy zone
+
+From: Subbaraya Sundeep <sbhatta@marvell.com>
+
+[ Upstream commit 8b0f7410942cdc420c4557eda02bfcdf60ccec17 ]
+
+PF mcam entries has to be at low priority always so that VF
+can install longest prefix match rules at higher priority.
+This was taken care currently but when priority allocation
+wrt reference entry is requested then entries are allocated
+from mid-zone instead of low priority zone. Fix this and
+always allocate entries from low priority zone for PFs.
+
+Fixes: 7df5b4b260dd ("octeontx2-af: Allocate low priority entries for PF")
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/af/rvu_npc.c | 33 ++++++++++++-------
+ 1 file changed, 22 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+index e8b73b9d75e31..97722ce8c4cb3 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+@@ -2519,7 +2519,17 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
+ * - when available free entries are less.
+ * Lower priority ones out of avaialble free entries are always
+ * chosen when 'high vs low' question arises.
++ *
++ * For a VF base MCAM match rule is set by its PF. And all the
++ * further MCAM rules installed by VF on its own are
++ * concatenated with the base rule set by its PF. Hence PF entries
++ * should be at lower priority compared to VF entries. Otherwise
++ * base rule is hit always and rules installed by VF will be of
++ * no use. Hence if the request is from PF then allocate low
++ * priority entries.
+ */
++ if (!(pcifunc & RVU_PFVF_FUNC_MASK))
++ goto lprio_alloc;
+
+ /* Get the search range for priority allocation request */
+ if (req->priority) {
+@@ -2528,17 +2538,6 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
+ goto alloc;
+ }
+
+- /* For a VF base MCAM match rule is set by its PF. And all the
+- * further MCAM rules installed by VF on its own are
+- * concatenated with the base rule set by its PF. Hence PF entries
+- * should be at lower priority compared to VF entries. Otherwise
+- * base rule is hit always and rules installed by VF will be of
+- * no use. Hence if the request is from PF and NOT a priority
+- * allocation request then allocate low priority entries.
+- */
+- if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+- goto lprio_alloc;
+-
+ /* Find out the search range for non-priority allocation request
+ *
+ * Get MCAM free entry count in middle zone.
+@@ -2568,6 +2567,18 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
+ reverse = true;
+ start = 0;
+ end = mcam->bmap_entries;
++ /* Ensure PF requests are always at bottom and if PF requests
++ * for higher/lower priority entry wrt reference entry then
++ * honour that criteria and start search for entries from bottom
++ * and not in mid zone.
++ */
++ if (!(pcifunc & RVU_PFVF_FUNC_MASK) &&
++ req->priority == NPC_MCAM_HIGHER_PRIO)
++ end = req->ref_entry;
++
++ if (!(pcifunc & RVU_PFVF_FUNC_MASK) &&
++ req->priority == NPC_MCAM_LOWER_PRIO)
++ start = req->ref_entry;
+ }
+
+ alloc:
+--
+2.43.0
+
--- /dev/null
+From caa070bd30374a1c52b0b50f002f479f2ddd99df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 14:05:27 +0200
+Subject: ptp: Fix error message on failed pin verification
+
+From: Karol Kolacinski <karol.kolacinski@intel.com>
+
+[ Upstream commit 323a359f9b077f382f4483023d096a4d316fd135 ]
+
+On failed verification of PTP clock pin, error message prints channel
+number instead of pin index after "pin", which is incorrect.
+
+Fix error message by adding channel number to the message and printing
+pin number instead of channel number.
+
+Fixes: 6092315dfdec ("ptp: introduce programmable pins.")
+Signed-off-by: Karol Kolacinski <karol.kolacinski@intel.com>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Link: https://lore.kernel.org/r/20240604120555.16643-1-karol.kolacinski@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ptp/ptp_chardev.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
+index 7513018c9f9ac..2067b0120d083 100644
+--- a/drivers/ptp/ptp_chardev.c
++++ b/drivers/ptp/ptp_chardev.c
+@@ -85,7 +85,8 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
+ }
+
+ if (info->verify(info, pin, func, chan)) {
+- pr_err("driver cannot use function %u on pin %u\n", func, chan);
++ pr_err("driver cannot use function %u and channel %u on pin %u\n",
++ func, chan, pin);
+ return -EOPNOTSUPP;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 11410aaeffeb9a14adbe5d76ebf104f45583983d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 10:13:34 +0800
+Subject: RISC-V: KVM: Fix incorrect reg_subtype labels in
+ kvm_riscv_vcpu_set_reg_isa_ext function
+
+From: Quan Zhou <zhouquan@iscas.ac.cn>
+
+[ Upstream commit c66f3b40b17d3dfc4b6abb5efde8e71c46971821 ]
+
+In the function kvm_riscv_vcpu_set_reg_isa_ext, the original code
+used incorrect reg_subtype labels KVM_REG_RISCV_SBI_MULTI_EN/DIS.
+These have been corrected to KVM_REG_RISCV_ISA_MULTI_EN/DIS respectively.
+Although they are numerically equivalent, the actual processing
+will not result in errors, but it may lead to ambiguous code semantics.
+
+Fixes: 613029442a4b ("RISC-V: KVM: Extend ONE_REG to enable/disable multiple ISA extensions")
+Signed-off-by: Quan Zhou <zhouquan@iscas.ac.cn>
+Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
+Link: https://lore.kernel.org/r/ff1c6771a67d660db94372ac9aaa40f51e5e0090.1716429371.git.zhouquan@iscas.ac.cn
+Signed-off-by: Anup Patel <anup@brainfault.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kvm/vcpu_onereg.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
+index 994adc26db4b1..e5706f5f2c71a 100644
+--- a/arch/riscv/kvm/vcpu_onereg.c
++++ b/arch/riscv/kvm/vcpu_onereg.c
+@@ -718,9 +718,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_ISA_SINGLE:
+ return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
+- case KVM_REG_RISCV_SBI_MULTI_EN:
++ case KVM_REG_RISCV_ISA_MULTI_EN:
+ return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
+- case KVM_REG_RISCV_SBI_MULTI_DIS:
++ case KVM_REG_RISCV_ISA_MULTI_DIS:
+ return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
+ default:
+ return -ENOENT;
+--
+2.43.0
+
--- /dev/null
+From 979baf7ee069a48da9632be09fc16de736543357 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Apr 2024 14:49:04 +0800
+Subject: RISC-V: KVM: No need to use mask when hart-index-bit is 0
+
+From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
+
+[ Upstream commit 2d707b4e37f9b0c37b8b2392f91b04c5b63ea538 ]
+
+When the maximum hart number within groups is 1, hart-index-bit is set to
+0. Consequently, there is no need to restore the hart ID from IMSIC
+addresses and hart-index-bit settings. Currently, QEMU and kvmtool do not
+pass correct hart-index-bit values when the maximum hart number is a
+power of 2, thereby avoiding this issue. Corresponding patches for QEMU
+and kvmtool will also be dispatched.
+
+Fixes: 89d01306e34d ("RISC-V: KVM: Implement device interface for AIA irqchip")
+Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
+Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
+Link: https://lore.kernel.org/r/20240415064905.25184-1-yongxuan.wang@sifive.com
+Signed-off-by: Anup Patel <anup@brainfault.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kvm/aia_device.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
+index 0eb689351b7d0..5cd407c6a8e4f 100644
+--- a/arch/riscv/kvm/aia_device.c
++++ b/arch/riscv/kvm/aia_device.c
+@@ -237,10 +237,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
+
+ static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
+ {
+- u32 hart, group = 0;
++ u32 hart = 0, group = 0;
+
+- hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
+- GENMASK_ULL(aia->nr_hart_bits - 1, 0);
++ if (aia->nr_hart_bits)
++ hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
++ GENMASK_ULL(aia->nr_hart_bits - 1, 0);
+ if (aia->nr_group_bits)
+ group = (addr >> aia->nr_group_shift) &
+ GENMASK_ULL(aia->nr_group_bits - 1, 0);
+--
+2.43.0
+
--- /dev/null
+From 1fab2f53815457f864367c7ff3613d96a4aa8772 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2024 11:48:26 -0700
+Subject: rtnetlink: make the "split" NLM_DONE handling generic
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 5b4b62a169e10401cca34a6e7ac39161986f5605 ]
+
+Jaroslav reports Dell's OMSA Systems Management Data Engine
+expects NLM_DONE in a separate recvmsg(), both for rtnl_dump_ifinfo()
+and inet_dump_ifaddr(). We already added a similar fix previously in
+commit 460b0d33cf10 ("inet: bring NLM_DONE out to a separate recv() again")
+
+Instead of modifying all the dump handlers, and making them look
+different than modern for_each_netdev_dump()-based dump handlers -
+put the workaround in rtnetlink code. This will also help us move
+the custom rtnl-locking from af_netlink in the future (in net-next).
+
+Note that this change is not touching rtnl_dump_all(). rtnl_dump_all()
+is different kettle of fish and a potential problem. We now mix families
+in a single recvmsg(), but NLM_DONE is not coalesced.
+
+Tested:
+
+ ./cli.py --dbg-small-recv 4096 --spec netlink/specs/rt_addr.yaml \
+ --dump getaddr --json '{"ifa-family": 2}'
+
+ ./cli.py --dbg-small-recv 4096 --spec netlink/specs/rt_route.yaml \
+ --dump getroute --json '{"rtm-family": 2}'
+
+ ./cli.py --dbg-small-recv 4096 --spec netlink/specs/rt_link.yaml \
+ --dump getlink
+
+Fixes: 3e41af90767d ("rtnetlink: use xarray iterator to implement rtnl_dump_ifinfo()")
+Fixes: cdb2f80f1c10 ("inet: use xa_array iterator to implement inet_dump_ifaddr()")
+Reported-by: Jaroslav Pulchart <jaroslav.pulchart@gooddata.com>
+Link: https://lore.kernel.org/all/CAK8fFZ7MKoFSEzMBDAOjoUt+vTZRRQgLDNXEOfdCCXSoXXKE0g@mail.gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/rtnetlink.h | 1 +
+ net/core/rtnetlink.c | 44 +++++++++++++++++++++++++++++++++++++++--
+ net/ipv4/devinet.c | 2 +-
+ net/ipv4/fib_frontend.c | 7 +------
+ 4 files changed, 45 insertions(+), 9 deletions(-)
+
+diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
+index 3bfb80bad1739..b45d57b5968af 100644
+--- a/include/net/rtnetlink.h
++++ b/include/net/rtnetlink.h
+@@ -13,6 +13,7 @@ enum rtnl_link_flags {
+ RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
+ RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
+ RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
++ RTNL_FLAG_DUMP_SPLIT_NLM_DONE = BIT(3), /* legacy behavior */
+ };
+
+ enum rtnl_kinds {
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 8ba6a4e4be266..74e6f9746fb30 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -6484,6 +6484,46 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
+
+ /* Process one rtnetlink message. */
+
++static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
++{
++ rtnl_dumpit_func dumpit = cb->data;
++ int err;
++
++ /* Previous iteration have already finished, avoid calling->dumpit()
++ * again, it may not expect to be called after it reached the end.
++ */
++ if (!dumpit)
++ return 0;
++
++ err = dumpit(skb, cb);
++
++ /* Old dump handlers used to send NLM_DONE as in a separate recvmsg().
++ * Some applications which parse netlink manually depend on this.
++ */
++ if (cb->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
++ if (err < 0 && err != -EMSGSIZE)
++ return err;
++ if (!err)
++ cb->data = NULL;
++
++ return skb->len;
++ }
++ return err;
++}
++
++static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb,
++ const struct nlmsghdr *nlh,
++ struct netlink_dump_control *control)
++{
++ if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
++ WARN_ON(control->data);
++ control->data = control->dump;
++ control->dump = rtnl_dumpit;
++ }
++
++ return netlink_dump_start(ssk, skb, nlh, control);
++}
++
+ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+ {
+@@ -6548,7 +6588,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ .module = owner,
+ .flags = flags,
+ };
+- err = netlink_dump_start(rtnl, skb, nlh, &c);
++ err = rtnetlink_dump_start(rtnl, skb, nlh, &c);
+ /* netlink_dump_start() will keep a reference on
+ * module if dump is still in progress.
+ */
+@@ -6694,7 +6734,7 @@ void __init rtnetlink_init(void)
+ register_netdevice_notifier(&rtnetlink_dev_notifier);
+
+ rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
+- rtnl_dump_ifinfo, 0);
++ rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
+ rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
+diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
+index 8382cc998bff8..84b5d1ccf716a 100644
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -2801,7 +2801,7 @@ void __init devinet_init(void)
+ rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
+ rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
+ rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
+- RTNL_FLAG_DUMP_UNLOCKED);
++ RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
+ rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
+ inet_netconf_dump_devconf,
+ RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
+diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
+index c484b1c0fc00a..7ad2cafb92763 100644
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1050,11 +1050,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+ e++;
+ }
+ }
+-
+- /* Don't let NLM_DONE coalesce into a message, even if it could.
+- * Some user space expects NLM_DONE in a separate recv().
+- */
+- err = skb->len;
+ out:
+
+ cb->args[1] = e;
+@@ -1665,5 +1660,5 @@ void __init ip_fib_init(void)
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
+- RTNL_FLAG_DUMP_UNLOCKED);
++ RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
+ }
+--
+2.43.0
+
--- /dev/null
+From a45683672a7463fd6c996377ebe6ab02bc9da760 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 10:59:04 +0900
+Subject: scsi: ufs: mcq: Fix error output and clean up ufshcd_mcq_abort()
+
+From: Chanwoo Lee <cw9316.lee@samsung.com>
+
+[ Upstream commit d53b681ce9ca7db5ef4ecb8d2cf465ae4a031264 ]
+
+An error unrelated to ufshcd_try_to_abort_task is being logged and can
+cause confusion. Modify ufshcd_mcq_abort() to print the result of the abort
+failure. For readability, return immediately instead of 'goto'.
+
+Fixes: f1304d442077 ("scsi: ufs: mcq: Added ufshcd_mcq_abort()")
+Signed-off-by: Chanwoo Lee <cw9316.lee@samsung.com>
+Link: https://lore.kernel.org/r/20240524015904.1116005-1-cw9316.lee@samsung.com
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/core/ufs-mcq.c | 17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
+index 005d63ab1f441..8944548c30fa1 100644
+--- a/drivers/ufs/core/ufs-mcq.c
++++ b/drivers/ufs/core/ufs-mcq.c
+@@ -634,20 +634,20 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
+ struct ufshcd_lrb *lrbp = &hba->lrb[tag];
+ struct ufs_hw_queue *hwq;
+ unsigned long flags;
+- int err = FAILED;
++ int err;
+
+ if (!ufshcd_cmd_inflight(lrbp->cmd)) {
+ dev_err(hba->dev,
+ "%s: skip abort. cmd at tag %d already completed.\n",
+ __func__, tag);
+- goto out;
++ return FAILED;
+ }
+
+ /* Skip task abort in case previous aborts failed and report failure */
+ if (lrbp->req_abort_skip) {
+ dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n",
+ __func__, tag);
+- goto out;
++ return FAILED;
+ }
+
+ hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
+@@ -659,7 +659,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
+ */
+ dev_err(hba->dev, "%s: cmd found in sq. hwq=%d, tag=%d\n",
+ __func__, hwq->id, tag);
+- goto out;
++ return FAILED;
+ }
+
+ /*
+@@ -667,18 +667,17 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
+ * in the completion queue either. Query the device to see if
+ * the command is being processed in the device.
+ */
+- if (ufshcd_try_to_abort_task(hba, tag)) {
++ err = ufshcd_try_to_abort_task(hba, tag);
++ if (err) {
+ dev_err(hba->dev, "%s: device abort failed %d\n", __func__, err);
+ lrbp->req_abort_skip = true;
+- goto out;
++ return FAILED;
+ }
+
+- err = SUCCESS;
+ spin_lock_irqsave(&hwq->cq_lock, flags);
+ if (ufshcd_cmd_inflight(lrbp->cmd))
+ ufshcd_release_scsi_cmd(hba, lrbp);
+ spin_unlock_irqrestore(&hwq->cq_lock, flags);
+
+-out:
+- return err;
++ return SUCCESS;
+ }
+--
+2.43.0
+
--- /dev/null
+From 5aa34917cfb84745f9469b7f2989fbb6563b4a10 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 13:13:58 +0530
+Subject: selftests/mm: compaction_test: fix bogus test success and reduce
+ probability of OOM-killer invocation
+
+From: Dev Jain <dev.jain@arm.com>
+
+[ Upstream commit fb9293b6b0156fbf6ab97a1625d99a29c36d9f0c ]
+
+Reset nr_hugepages to zero before the start of the test.
+
+If a non-zero number of hugepages is already set before the start of the
+test, the following problems arise:
+
+ - The probability of the test getting OOM-killed increases. Proof:
+ The test wants to run on 80% of available memory to prevent OOM-killing
+ (see original code comments). Let the value of mem_free at the start
+ of the test, when nr_hugepages = 0, be x. In the other case, when
+ nr_hugepages > 0, let the memory consumed by hugepages be y. In the
+ former case, the test operates on 0.8 * x of memory. In the latter,
+ the test operates on 0.8 * (x - y) of memory, with y already filled,
+ hence, memory consumed is y + 0.8 * (x - y) = 0.8 * x + 0.2 * y > 0.8 *
+ x. Q.E.D
+
+ - The probability of a bogus test success increases. Proof: Let the
+ memory consumed by hugepages be greater than 25% of x, with x and y
+ defined as above. The definition of compaction_index is c_index = (x -
+ y)/z where z is the memory consumed by hugepages after trying to
+ increase them again. In check_compaction(), we set the number of
+ hugepages to zero, and then increase them back; the probability that
+ they will be set back to consume at least y amount of memory again is
+ very high (since there is not much delay between the two attempts of
+ changing nr_hugepages). Hence, z >= y > (x/4) (by the 25% assumption).
+ Therefore, c_index = (x - y)/z <= (x - y)/y = x/y - 1 < 4 - 1 = 3
+ hence, c_index can always be forced to be less than 3, thereby the test
+ succeeding always. Q.E.D
+
+Link: https://lkml.kernel.org/r/20240521074358.675031-4-dev.jain@arm.com
+Fixes: bd67d5c15cc1 ("Test compaction of mlocked memory")
+Signed-off-by: Dev Jain <dev.jain@arm.com>
+Cc: <stable@vger.kernel.org>
+Cc: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Sri Jayaramappa <sjayaram@akamai.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/mm/compaction_test.c | 71 ++++++++++++++------
+ 1 file changed, 49 insertions(+), 22 deletions(-)
+
+diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
+index 5e9bd1da9370c..e140558e6f53f 100644
+--- a/tools/testing/selftests/mm/compaction_test.c
++++ b/tools/testing/selftests/mm/compaction_test.c
+@@ -82,13 +82,16 @@ int prereq(void)
+ return -1;
+ }
+
+-int check_compaction(unsigned long mem_free, unsigned long hugepage_size)
++int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
++ unsigned long initial_nr_hugepages)
+ {
+ unsigned long nr_hugepages_ul;
+ int fd, ret = -1;
+ int compaction_index = 0;
+- char initial_nr_hugepages[20] = {0};
+ char nr_hugepages[20] = {0};
++ char init_nr_hugepages[20] = {0};
++
++ sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages);
+
+ /* We want to test with 80% of available memory. Else, OOM killer comes
+ in to play */
+@@ -102,23 +105,6 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size)
+ goto out;
+ }
+
+- if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
+- ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
+- strerror(errno));
+- goto close_fd;
+- }
+-
+- lseek(fd, 0, SEEK_SET);
+-
+- /* Start with the initial condition of 0 huge pages*/
+- if (write(fd, "0", sizeof(char)) != sizeof(char)) {
+- ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
+- strerror(errno));
+- goto close_fd;
+- }
+-
+- lseek(fd, 0, SEEK_SET);
+-
+ /* Request a large number of huge pages. The Kernel will allocate
+ as much as it can */
+ if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
+@@ -146,8 +132,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size)
+
+ lseek(fd, 0, SEEK_SET);
+
+- if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
+- != strlen(initial_nr_hugepages)) {
++ if (write(fd, init_nr_hugepages, strlen(init_nr_hugepages))
++ != strlen(init_nr_hugepages)) {
+ ksft_print_msg("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ goto close_fd;
+@@ -171,6 +157,41 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size)
+ return ret;
+ }
+
++int set_zero_hugepages(unsigned long *initial_nr_hugepages)
++{
++ int fd, ret = -1;
++ char nr_hugepages[20] = {0};
++
++ fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
++ if (fd < 0) {
++ ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
++ strerror(errno));
++ goto out;
++ }
++ if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
++ ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
++ strerror(errno));
++ goto close_fd;
++ }
++
++ lseek(fd, 0, SEEK_SET);
++
++ /* Start with the initial condition of 0 huge pages */
++ if (write(fd, "0", sizeof(char)) != sizeof(char)) {
++ ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
++ strerror(errno));
++ goto close_fd;
++ }
++
++ *initial_nr_hugepages = strtoul(nr_hugepages, NULL, 10);
++ ret = 0;
++
++ close_fd:
++ close(fd);
++
++ out:
++ return ret;
++}
+
+ int main(int argc, char **argv)
+ {
+@@ -181,6 +202,7 @@ int main(int argc, char **argv)
+ unsigned long mem_free = 0;
+ unsigned long hugepage_size = 0;
+ long mem_fragmentable_MB = 0;
++ unsigned long initial_nr_hugepages;
+
+ ksft_print_header();
+
+@@ -189,6 +211,10 @@ int main(int argc, char **argv)
+
+ ksft_set_plan(1);
+
++ /* Start the test without hugepages reducing mem_free */
++ if (set_zero_hugepages(&initial_nr_hugepages))
++ ksft_exit_fail();
++
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ if (setrlimit(RLIMIT_MEMLOCK, &lim))
+@@ -232,7 +258,8 @@ int main(int argc, char **argv)
+ entry = entry->next;
+ }
+
+- if (check_compaction(mem_free, hugepage_size) == 0)
++ if (check_compaction(mem_free, hugepage_size,
++ initial_nr_hugepages) == 0)
+ ksft_exit_pass();
+
+ ksft_exit_fail();
+--
+2.43.0
+
--- /dev/null
+From 8916f5cf5708512b1471e4fd0d768ca2d8b68445 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Apr 2024 10:24:07 -0700
+Subject: selftests/mm: ksft_exit functions do not return
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+[ Upstream commit 69e545edbe8b17c26aa06ef7e430d0be7f08d876 ]
+
+After commit f7d5bcd35d42 ("selftests: kselftest: Mark functions that
+unconditionally call exit() as __noreturn"), ksft_exit_...() functions
+are marked as __noreturn, which means the return type should not be
+'int' but 'void' because they are not returning anything (and never were
+since exit() has always been called).
+
+To facilitate updating the return type of these functions, remove
+'return' before the calls to ksft_exit_...(), as __noreturn prevents the
+compiler from warning that a caller of the ksft_exit functions does not
+return a value because the program will terminate upon calling these
+functions.
+
+Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
+Stable-dep-of: fb9293b6b015 ("selftests/mm: compaction_test: fix bogus test success and reduce probability of OOM-killer invocation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/mm/compaction_test.c | 6 +++---
+ tools/testing/selftests/mm/cow.c | 2 +-
+ tools/testing/selftests/mm/gup_longterm.c | 2 +-
+ tools/testing/selftests/mm/gup_test.c | 4 ++--
+ tools/testing/selftests/mm/ksm_functional_tests.c | 2 +-
+ tools/testing/selftests/mm/madv_populate.c | 2 +-
+ tools/testing/selftests/mm/mkdirty.c | 2 +-
+ tools/testing/selftests/mm/pagemap_ioctl.c | 4 ++--
+ tools/testing/selftests/mm/soft-dirty.c | 2 +-
+ 9 files changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
+index 326a893647bab..5e9bd1da9370c 100644
+--- a/tools/testing/selftests/mm/compaction_test.c
++++ b/tools/testing/selftests/mm/compaction_test.c
+@@ -185,7 +185,7 @@ int main(int argc, char **argv)
+ ksft_print_header();
+
+ if (prereq() || geteuid())
+- return ksft_exit_skip("Prerequisites unsatisfied\n");
++ ksft_exit_skip("Prerequisites unsatisfied\n");
+
+ ksft_set_plan(1);
+
+@@ -233,7 +233,7 @@ int main(int argc, char **argv)
+ }
+
+ if (check_compaction(mem_free, hugepage_size) == 0)
+- return ksft_exit_pass();
++ ksft_exit_pass();
+
+- return ksft_exit_fail();
++ ksft_exit_fail();
+ }
+diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
+index 363bf5f801be5..fe078d6e18064 100644
+--- a/tools/testing/selftests/mm/cow.c
++++ b/tools/testing/selftests/mm/cow.c
+@@ -1779,5 +1779,5 @@ int main(int argc, char **argv)
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+- return ksft_exit_pass();
++ ksft_exit_pass();
+ }
+diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
+index ad168d35b23b7..d7eaca5bbe9b1 100644
+--- a/tools/testing/selftests/mm/gup_longterm.c
++++ b/tools/testing/selftests/mm/gup_longterm.c
+@@ -456,5 +456,5 @@ int main(int argc, char **argv)
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+- return ksft_exit_pass();
++ ksft_exit_pass();
+ }
+diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
+index 7821cf45c323b..bdeaac67ff9aa 100644
+--- a/tools/testing/selftests/mm/gup_test.c
++++ b/tools/testing/selftests/mm/gup_test.c
+@@ -229,7 +229,7 @@ int main(int argc, char **argv)
+ break;
+ }
+ ksft_test_result_skip("Please run this test as root\n");
+- return ksft_exit_pass();
++ ksft_exit_pass();
+ }
+
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
+@@ -268,5 +268,5 @@ int main(int argc, char **argv)
+
+ free(tid);
+
+- return ksft_exit_pass();
++ ksft_exit_pass();
+ }
+diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
+index d615767e396be..508287560c455 100644
+--- a/tools/testing/selftests/mm/ksm_functional_tests.c
++++ b/tools/testing/selftests/mm/ksm_functional_tests.c
+@@ -646,5 +646,5 @@ int main(int argc, char **argv)
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+- return ksft_exit_pass();
++ ksft_exit_pass();
+ }
+diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
+index 17bcb07f19f34..ef7d911da13e0 100644
+--- a/tools/testing/selftests/mm/madv_populate.c
++++ b/tools/testing/selftests/mm/madv_populate.c
+@@ -307,5 +307,5 @@ int main(int argc, char **argv)
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+- return ksft_exit_pass();
++ ksft_exit_pass();
+ }
+diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
+index 301abb99e027e..b8a7efe9204ea 100644
+--- a/tools/testing/selftests/mm/mkdirty.c
++++ b/tools/testing/selftests/mm/mkdirty.c
+@@ -375,5 +375,5 @@ int main(void)
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+- return ksft_exit_pass();
++ ksft_exit_pass();
+ }
+diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
+index d59517ed3d48b..2d785aca72a5c 100644
+--- a/tools/testing/selftests/mm/pagemap_ioctl.c
++++ b/tools/testing/selftests/mm/pagemap_ioctl.c
+@@ -1484,7 +1484,7 @@ int main(int argc, char *argv[])
+ ksft_print_header();
+
+ if (init_uffd())
+- return ksft_exit_pass();
++ ksft_exit_pass();
+
+ ksft_set_plan(115);
+
+@@ -1660,5 +1660,5 @@ int main(int argc, char *argv[])
+ userfaultfd_tests();
+
+ close(pagemap_fd);
+- return ksft_exit_pass();
++ ksft_exit_pass();
+ }
+diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
+index 7dbfa53d93a05..d9dbf879748b2 100644
+--- a/tools/testing/selftests/mm/soft-dirty.c
++++ b/tools/testing/selftests/mm/soft-dirty.c
+@@ -209,5 +209,5 @@ int main(int argc, char **argv)
+
+ close(pagemap_fd);
+
+- return ksft_exit_pass();
++ ksft_exit_pass();
+ }
+--
+2.43.0
+
nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch
nilfs2-fix-nilfs_empty_dir-misjudgment-and-long-loop-on-i-o-errors.patch
smb-client-fix-deadlock-in-smb2_find_smb_tcon.patch
+wifi-ath11k-fix-wcn6750-firmware-crash-caused-by-17-.patch
+wifi-ath11k-move-power-type-check-to-assoc-stage-whe.patch
+cpufreq-amd-pstate-unify-computation-of-max-min-nomi.patch
+cpufreq-amd-pstate-add-quirk-for-the-pstate-cppc-cap.patch
+cpufreq-amd-pstate-remove-global-header-file.patch
+wifi-mac80211-mesh-fix-leak-of-mesh_preq_queue-objec.patch
+wifi-mac80211-fix-deadlock-in-ieee80211_sta_ps_deliv.patch
+wifi-cfg80211-fully-move-wiphy-work-to-unbound-workq.patch
+wifi-cfg80211-lock-wiphy-in-cfg80211_get_station.patch
+wifi-cfg80211-pmsr-use-correct-nla_get_ux-functions.patch
+wifi-mac80211-pass-proper-link-id-for-channel-switch.patch
+wifi-iwlwifi-mvm-don-t-initialize-csa_work-twice.patch
+wifi-iwlwifi-mvm-revert-gen2-tx-a-mpdu-size-to-64.patch
+wifi-iwlwifi-mvm-set-properly-mac-header.patch
+wifi-iwlwifi-dbg_ini-move-iwl_dbg_tlv_free-outside-o.patch
+wifi-iwlwifi-mvm-check-n_ssids-before-accessing-the-.patch
+wifi-iwlwifi-mvm-don-t-read-past-the-mfuart-notifcat.patch
+wifi-mac80211-fix-spatial-reuse-element-size-check.patch
+wifi-mac80211-correctly-parse-spatial-reuse-paramete.patch
+scsi-ufs-mcq-fix-error-output-and-clean-up-ufshcd_mc.patch
+risc-v-kvm-no-need-to-use-mask-when-hart-index-bit-i.patch
+risc-v-kvm-fix-incorrect-reg_subtype-labels-in-kvm_r.patch
+virtio_net-fix-possible-dim-status-unrecoverable.patch
+ax25-fix-refcount-imbalance-on-inbound-connections.patch
+ax25-replace-kfree-in-ax25_dev_free-with-ax25_dev_pu.patch
+net-ncsi-fix-the-multi-thread-manner-of-ncsi-driver.patch
+net-phy-micrel-fix-ksz9477-phy-issues-after-suspend-.patch
+bpf-fix-a-potential-use-after-free-in-bpf_link_free.patch
+kvm-sev-es-disallow-sev-es-guests-when-x86_feature_l.patch
+kvm-sev-es-delegate-lbr-virtualization-to-the-proces.patch
+vmxnet3-disable-rx-data-ring-on-dma-allocation-failu.patch
+ipv6-ioam-block-bh-from-ioam6_output.patch
+ipv6-sr-block-bh-in-seg6_output_core-and-seg6_input_.patch
+net-tls-fix-marking-packets-as-decrypted.patch
+bpf-set-run-context-for-rawtp-test_run-callback.patch
+octeontx2-af-always-allocate-pf-entries-from-low-pri.patch
+net-smc-avoid-overwriting-when-adjusting-sock-bufsiz.patch
+net-phy-micrel-ksz8061-fix-errata-solution-not-takin.patch
+ionic-fix-kernel-panic-in-xdp_tx-action.patch
+net-sched-sch_multiq-fix-possible-oob-write-in-multi.patch
+tcp-count-close-wait-sockets-for-tcp_mib_currestab.patch
+mptcp-count-close-wait-sockets-for-mptcp_mib_currest.patch
+rtnetlink-make-the-split-nlm_done-handling-generic.patch
+net-ethernet-mtk_eth_soc-handle-dma-buffer-size-soc-.patch
+net-mlx5-stop-waiting-for-pci-if-pci-channel-is-offl.patch
+net-mlx5-always-stop-health-timer-during-driver-remo.patch
+net-mlx5-fix-tainted-pointer-delete-is-case-of-flow-.patch
+net-sched-taprio-always-validate-tca_taprio_attr_pri.patch
+ptp-fix-error-message-on-failed-pin-verification.patch
+ice-fix-iteration-of-tlvs-in-preserved-fields-area.patch
+ice-fix-reads-from-nvm-shadow-ram-on-e830-and-e825-c.patch
+ice-remove-af_xdp_zc_qps-bitmap.patch
+ice-add-flag-to-distinguish-reset-from-.ndo_bpf-in-x.patch
+ice-map-xdp-queues-to-vectors-in-ice_vsi_map_rings_t.patch
+igc-fix-energy-efficient-ethernet-support-declaratio.patch
+net-wwan-iosm-fix-tainted-pointer-delete-is-case-of-.patch
+af_unix-set-sk-sk_state-under-unix_state_lock-for-tr.patch
+af_unix-annodate-data-races-around-sk-sk_state-for-w.patch
+af_unix-annotate-data-race-of-sk-sk_state-in-unix_in.patch
+af_unix-annotate-data-races-around-sk-sk_state-in-un.patch
+af_unix-annotate-data-race-of-sk-sk_state-in-unix_st.patch
+af_unix-annotate-data-races-around-sk-sk_state-in-se.patch
+af_unix-annotate-data-race-of-sk-sk_state-in-unix_st.patch-19410
+af_unix-annotate-data-races-around-sk-sk_state-in-un.patch-17973
+af_unix-annotate-data-races-around-sk-sk_sndbuf.patch
+af_unix-annotate-data-race-of-net-unx.sysctl_max_dgr.patch
+af_unix-use-unix_recvq_full_lockless-in-unix_stream_.patch
+af_unix-use-skb_queue_empty_lockless-in-unix_release.patch
+af_unix-use-skb_queue_len_lockless-in-sk_diag_show_r.patch
+af_unix-annotate-data-race-of-sk-sk_shutdown-in-sk_d.patch
+ipv6-fix-possible-race-in-__fib6_drop_pcpu_from.patch
+net-ethtool-fix-the-error-condition-in-ethtool_get_p.patch
+drm-xe-use-ordered-wq-for-g2h-handler.patch
+x86-cpu-get-rid-of-an-unnecessary-local-variable-in-.patch
+x86-cpu-provide-default-cache-line-size-if-not-enume.patch
+selftests-mm-ksft_exit-functions-do-not-return.patch
+selftests-mm-compaction_test-fix-bogus-test-success-.patch
+ext4-avoid-overflow-when-setting-values-via-sysfs.patch
+ext4-refactor-out-ext4_generic_attr_show.patch
+ext4-fix-slab-out-of-bounds-in-ext4_mb_find_good_gro.patch
+eventfs-update-all-the-eventfs_inodes-from-the-event.patch
--- /dev/null
+From 3cc0e03cbc4e4bebe9c627aacac1a24192290b13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 01:02:16 +0800
+Subject: tcp: count CLOSE-WAIT sockets for TCP_MIB_CURRESTAB
+
+From: Jason Xing <kernelxing@tencent.com>
+
+[ Upstream commit a46d0ea5c94205f40ecf912d1bb7806a8a64704f ]
+
+According to RFC 1213, we should also take CLOSE-WAIT sockets into
+consideration:
+
+ "tcpCurrEstab OBJECT-TYPE
+ ...
+ The number of TCP connections for which the current state
+ is either ESTABLISHED or CLOSE- WAIT."
+
+After this, CurrEstab counter will display the total number of
+ESTABLISHED and CLOSE-WAIT sockets.
+
+The logic of counting
+When we increment the counter?
+a) if we change the state to ESTABLISHED.
+b) if we change the state from SYN-RECEIVED to CLOSE-WAIT.
+
+When we decrement the counter?
+a) if the socket leaves ESTABLISHED and will never go into CLOSE-WAIT,
+say, on the client side, changing from ESTABLISHED to FIN-WAIT-1.
+b) if the socket leaves CLOSE-WAIT, say, on the server side, changing
+from CLOSE-WAIT to LAST-ACK.
+
+Please note: there are two chances that old state of socket can be changed
+to CLOSE-WAIT in tcp_fin(). One is SYN-RECV, the other is ESTABLISHED.
+So we have to take care of the former case.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Jason Xing <kernelxing@tencent.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 5c79836e4c9e7..77ee1eda3fd86 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2640,6 +2640,10 @@ void tcp_set_state(struct sock *sk, int state)
+ if (oldstate != TCP_ESTABLISHED)
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
+ break;
++ case TCP_CLOSE_WAIT:
++ if (oldstate == TCP_SYN_RECV)
++ TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
++ break;
+
+ case TCP_CLOSE:
+ if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
+@@ -2651,7 +2655,7 @@ void tcp_set_state(struct sock *sk, int state)
+ inet_put_port(sk);
+ fallthrough;
+ default:
+- if (oldstate == TCP_ESTABLISHED)
++ if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT)
+ TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From fa7462e07ebbb3485d27396ff409ad3a857d8c9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 21:41:15 +0800
+Subject: virtio_net: fix possible dim status unrecoverable
+
+From: Heng Qi <hengqi@linux.alibaba.com>
+
+[ Upstream commit 9e0945b1901c9eed4fbee3b8a3870487b2bdc936 ]
+
+When the dim worker is scheduled, if it no longer needs to issue
+commands, dim may not be able to return to the working state later.
+
+For example, the following single queue scenario:
+ 1. The dim worker of rxq0 is scheduled, and the dim status is
+ changed to DIM_APPLY_NEW_PROFILE;
+ 2. dim is disabled or parameters have not been modified;
+ 3. virtnet_rx_dim_work exits directly;
+
+Then, even if net_dim is invoked again, it cannot work because the
+state is not restored to DIM_START_MEASURE.
+
+Fixes: 6208799553a8 ("virtio-net: support rx netdim")
+Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20240528134116.117426-2-hengqi@linux.alibaba.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 115c3c5414f2a..574b052a517d7 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -3589,10 +3589,10 @@ static void virtnet_rx_dim_work(struct work_struct *work)
+ if (err)
+ pr_debug("%s: Failed to send dim parameters on rxq%d\n",
+ dev->name, qnum);
+- dim->state = DIM_START_MEASURE;
+ }
+ }
+
++ dim->state = DIM_START_MEASURE;
+ rtnl_unlock();
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 674e702b2754d644787d02745495cf78f82a616a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 May 2024 12:37:11 +0200
+Subject: vmxnet3: disable rx data ring on dma allocation failure
+
+From: Matthias Stocker <mstocker@barracuda.com>
+
+[ Upstream commit ffbe335b8d471f79b259e950cb20999700670456 ]
+
+When vmxnet3_rq_create() fails to allocate memory for rq->data_ring.base,
+the subsequent call to vmxnet3_rq_destroy_all_rxdataring does not reset
+rq->data_ring.desc_size for the data ring that failed, which presumably
+causes the hypervisor to reference it on packet reception.
+
+To fix this bug, rq->data_ring.desc_size needs to be set to 0 to tell
+the hypervisor to disable this feature.
+
+[ 95.436876] kernel BUG at net/core/skbuff.c:207!
+[ 95.439074] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+[ 95.440411] CPU: 7 PID: 0 Comm: swapper/7 Not tainted 6.9.3-dirty #1
+[ 95.441558] Hardware name: VMware, Inc. VMware Virtual
+Platform/440BX Desktop Reference Platform, BIOS 6.00 12/12/2018
+[ 95.443481] RIP: 0010:skb_panic+0x4d/0x4f
+[ 95.444404] Code: 4f 70 50 8b 87 c0 00 00 00 50 8b 87 bc 00 00 00 50
+ff b7 d0 00 00 00 4c 8b 8f c8 00 00 00 48 c7 c7 68 e8 be 9f e8 63 58 f9
+ff <0f> 0b 48 8b 14 24 48 c7 c1 d0 73 65 9f e8 a1 ff ff ff 48 8b 14 24
+[ 95.447684] RSP: 0018:ffffa13340274dd0 EFLAGS: 00010246
+[ 95.448762] RAX: 0000000000000089 RBX: ffff8fbbc72b02d0 RCX: 000000000000083f
+[ 95.450148] RDX: 0000000000000000 RSI: 00000000000000f6 RDI: 000000000000083f
+[ 95.451520] RBP: 000000000000002d R08: 0000000000000000 R09: ffffa13340274c60
+[ 95.452886] R10: ffffffffa04ed468 R11: 0000000000000002 R12: 0000000000000000
+[ 95.454293] R13: ffff8fbbdab3c2d0 R14: ffff8fbbdbd829e0 R15: ffff8fbbdbd809e0
+[ 95.455682] FS: 0000000000000000(0000) GS:ffff8fbeefd80000(0000) knlGS:0000000000000000
+[ 95.457178] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 95.458340] CR2: 00007fd0d1f650c8 CR3: 0000000115f28000 CR4: 00000000000406f0
+[ 95.459791] Call Trace:
+[ 95.460515] <IRQ>
+[ 95.461180] ? __die_body.cold+0x19/0x27
+[ 95.462150] ? die+0x2e/0x50
+[ 95.462976] ? do_trap+0xca/0x110
+[ 95.463973] ? do_error_trap+0x6a/0x90
+[ 95.464966] ? skb_panic+0x4d/0x4f
+[ 95.465901] ? exc_invalid_op+0x50/0x70
+[ 95.466849] ? skb_panic+0x4d/0x4f
+[ 95.467718] ? asm_exc_invalid_op+0x1a/0x20
+[ 95.468758] ? skb_panic+0x4d/0x4f
+[ 95.469655] skb_put.cold+0x10/0x10
+[ 95.470573] vmxnet3_rq_rx_complete+0x862/0x11e0 [vmxnet3]
+[ 95.471853] vmxnet3_poll_rx_only+0x36/0xb0 [vmxnet3]
+[ 95.473185] __napi_poll+0x2b/0x160
+[ 95.474145] net_rx_action+0x2c6/0x3b0
+[ 95.475115] handle_softirqs+0xe7/0x2a0
+[ 95.476122] __irq_exit_rcu+0x97/0xb0
+[ 95.477109] common_interrupt+0x85/0xa0
+[ 95.478102] </IRQ>
+[ 95.478846] <TASK>
+[ 95.479603] asm_common_interrupt+0x26/0x40
+[ 95.480657] RIP: 0010:pv_native_safe_halt+0xf/0x20
+[ 95.481801] Code: 22 d7 e9 54 87 01 00 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa eb 07 0f 00 2d 93 ba 3b 00 fb f4 <e9> 2c 87 01 00 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90
+[ 95.485563] RSP: 0018:ffffa133400ffe58 EFLAGS: 00000246
+[ 95.486882] RAX: 0000000000004000 RBX: ffff8fbbc1d14064 RCX: 0000000000000000
+[ 95.488477] RDX: ffff8fbeefd80000 RSI: ffff8fbbc1d14000 RDI: 0000000000000001
+[ 95.490067] RBP: ffff8fbbc1d14064 R08: ffffffffa0652260 R09: 00000000000010d3
+[ 95.491683] R10: 0000000000000018 R11: ffff8fbeefdb4764 R12: ffffffffa0652260
+[ 95.493389] R13: ffffffffa06522e0 R14: 0000000000000001 R15: 0000000000000000
+[ 95.495035] acpi_safe_halt+0x14/0x20
+[ 95.496127] acpi_idle_do_entry+0x2f/0x50
+[ 95.497221] acpi_idle_enter+0x7f/0xd0
+[ 95.498272] cpuidle_enter_state+0x81/0x420
+[ 95.499375] cpuidle_enter+0x2d/0x40
+[ 95.500400] do_idle+0x1e5/0x240
+[ 95.501385] cpu_startup_entry+0x29/0x30
+[ 95.502422] start_secondary+0x11c/0x140
+[ 95.503454] common_startup_64+0x13e/0x141
+[ 95.504466] </TASK>
+[ 95.505197] Modules linked in: nft_fib_inet nft_fib_ipv4
+nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6
+nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6
+nf_defrag_ipv4 rfkill ip_set nf_tables vsock_loopback
+vmw_vsock_virtio_transport_common qrtr vmw_vsock_vmci_transport vsock
+sunrpc binfmt_misc pktcdvd vmw_balloon pcspkr vmw_vmci i2c_piix4 joydev
+loop dm_multipath nfnetlink zram crct10dif_pclmul crc32_pclmul vmwgfx
+crc32c_intel polyval_clmulni polyval_generic ghash_clmulni_intel
+sha512_ssse3 sha256_ssse3 vmxnet3 sha1_ssse3 drm_ttm_helper vmw_pvscsi
+ttm ata_generic pata_acpi serio_raw scsi_dh_rdac scsi_dh_emc
+scsi_dh_alua ip6_tables ip_tables fuse
+[ 95.516536] ---[ end trace 0000000000000000 ]---
+
+Fixes: 6f4833383e85 ("net: vmxnet3: Fix NULL pointer dereference in vmxnet3_rq_rx_complete()")
+Signed-off-by: Matthias Stocker <mstocker@barracuda.com>
+Reviewed-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Reviewed-by: Ronak Doshi <ronak.doshi@broadcom.com>
+Link: https://lore.kernel.org/r/20240531103711.101961-1-mstocker@barracuda.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
+index 0578864792b60..beebe09eb88ff 100644
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -2034,8 +2034,8 @@ vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter)
+ rq->data_ring.base,
+ rq->data_ring.basePA);
+ rq->data_ring.base = NULL;
+- rq->data_ring.desc_size = 0;
+ }
++ rq->data_ring.desc_size = 0;
+ }
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 5c1f9432c8f793e0bd7d247d0932bf76e684033f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 11:08:10 +0300
+Subject: wifi: ath11k: fix WCN6750 firmware crash caused by 17 num_vdevs
+
+From: Carl Huang <quic_cjhuang@quicinc.com>
+
+[ Upstream commit ed281c6ab6eb8a914f06c74dfeaebde15b34a3f4 ]
+
+WCN6750 firmware crashes because of num_vdevs changed from 4 to 17
+in ath11k_init_wmi_config_qca6390() as the ab->hw_params.num_vdevs
+is 17. This is caused by commit f019f4dff2e4 ("wifi: ath11k: support
+2 station interfaces") which assigns ab->hw_params.num_vdevs directly
+to config->num_vdevs in ath11k_init_wmi_config_qca6390(), therefore
+WCN6750 firmware crashes as it can't support such a big num_vdevs.
+
+Fix it by assign 3 to num_vdevs in hw_params for WCN6750 as 3 is
+sufficient too.
+
+Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
+Tested-on: WCN6750 hw1.0 AHB WLAN.MSL.1.0.1-01371-QCAMSLSWPLZ-1
+
+Fixes: f019f4dff2e4 ("wifi: ath11k: support 2 station interfaces")
+Reported-by: Luca Weiss <luca.weiss@fairphone.com>
+Tested-by: Luca Weiss <luca.weiss@fairphone.com>
+Closes: https://lore.kernel.org/r/D15TIIDIIESY.D1EKKJLZINMA@fairphone.com/
+Signed-off-by: Carl Huang <quic_cjhuang@quicinc.com>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://msgid.link/20240520030757.2209395-1-quic_cjhuang@quicinc.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath11k/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
+index c78bce19bd754..5d07585e59c17 100644
+--- a/drivers/net/wireless/ath/ath11k/core.c
++++ b/drivers/net/wireless/ath/ath11k/core.c
+@@ -595,7 +595,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
+ .coldboot_cal_ftm = true,
+ .cbcal_restart_fw = false,
+ .fw_mem_mode = 0,
+- .num_vdevs = 16 + 1,
++ .num_vdevs = 3,
+ .num_peers = 512,
+ .supports_suspend = false,
+ .hal_desc_sz = sizeof(struct hal_rx_desc_qcn9074),
+--
+2.43.0
+
--- /dev/null
+From 64482b6ba44828639a5ce1aef2b0260d1e3a50cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 11:08:11 +0300
+Subject: wifi: ath11k: move power type check to ASSOC stage when connecting to
+ 6 GHz AP
+
+From: Baochen Qiang <quic_bqiang@quicinc.com>
+
+[ Upstream commit 6e16782d6b4a724f9c9dcd49471219643593b60c ]
+
+With commit bc8a0fac8677 ("wifi: mac80211: don't set bss_conf in parsing")
+ath11k fails to connect to 6 GHz AP.
+
+This is because currently ath11k checks AP's power type in
+ath11k_mac_op_assign_vif_chanctx() which would be called in AUTH stage.
+However with above commit power type is not available until ASSOC stage.
+As a result power type check fails and therefore connection fails.
+
+Fix this by moving power type check to ASSOC stage, also move regulatory
+rules update there because it depends on power type.
+
+Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.30
+
+Fixes: bc8a0fac8677 ("wifi: mac80211: don't set bss_conf in parsing")
+Signed-off-by: Baochen Qiang <quic_bqiang@quicinc.com>
+Acked-by: Jeff Johnson <quic_jjohnson@quicinc.com>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://msgid.link/20240424064019.4847-1-quic_bqiang@quicinc.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath11k/mac.c | 38 ++++++++++++++++++---------
+ 1 file changed, 25 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
+index 2fca415322aec..790277f547bb4 100644
+--- a/drivers/net/wireless/ath/ath11k/mac.c
++++ b/drivers/net/wireless/ath/ath11k/mac.c
+@@ -7889,8 +7889,6 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
+ struct ath11k_base *ab = ar->ab;
+ struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
+ int ret;
+- struct cur_regulatory_info *reg_info;
+- enum ieee80211_ap_reg_power power_type;
+
+ mutex_lock(&ar->conf_mutex);
+
+@@ -7901,17 +7899,6 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
+ if (ath11k_wmi_supports_6ghz_cc_ext(ar) &&
+ ctx->def.chan->band == NL80211_BAND_6GHZ &&
+ arvif->vdev_type == WMI_VDEV_TYPE_STA) {
+- reg_info = &ab->reg_info_store[ar->pdev_idx];
+- power_type = vif->bss_conf.power_type;
+-
+- ath11k_dbg(ab, ATH11K_DBG_MAC, "chanctx power type %d\n", power_type);
+-
+- if (power_type == IEEE80211_REG_UNSET_AP) {
+- ret = -EINVAL;
+- goto out;
+- }
+-
+- ath11k_reg_handle_chan_list(ab, reg_info, power_type);
+ arvif->chanctx = *ctx;
+ ath11k_mac_parse_tx_pwr_env(ar, vif, ctx);
+ }
+@@ -9525,6 +9512,8 @@ static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw,
+ struct ath11k *ar = hw->priv;
+ struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
+ struct ath11k_sta *arsta = ath11k_sta_to_arsta(sta);
++ enum ieee80211_ap_reg_power power_type;
++ struct cur_regulatory_info *reg_info;
+ struct ath11k_peer *peer;
+ int ret = 0;
+
+@@ -9604,6 +9593,29 @@ static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw,
+ ath11k_warn(ar->ab, "Unable to authorize peer %pM vdev %d: %d\n",
+ sta->addr, arvif->vdev_id, ret);
+ }
++
++ if (!ret &&
++ ath11k_wmi_supports_6ghz_cc_ext(ar) &&
++ arvif->vdev_type == WMI_VDEV_TYPE_STA &&
++ arvif->chanctx.def.chan &&
++ arvif->chanctx.def.chan->band == NL80211_BAND_6GHZ) {
++ reg_info = &ar->ab->reg_info_store[ar->pdev_idx];
++ power_type = vif->bss_conf.power_type;
++
++ if (power_type == IEEE80211_REG_UNSET_AP) {
++ ath11k_warn(ar->ab, "invalid power type %d\n",
++ power_type);
++ ret = -EINVAL;
++ } else {
++ ret = ath11k_reg_handle_chan_list(ar->ab,
++ reg_info,
++ power_type);
++ if (ret)
++ ath11k_warn(ar->ab,
++ "failed to handle chan list with power type %d\n",
++ power_type);
++ }
++ }
+ } else if (old_state == IEEE80211_STA_AUTHORIZED &&
+ new_state == IEEE80211_STA_ASSOC) {
+ spin_lock_bh(&ar->ab->base_lock);
+--
+2.43.0
+
--- /dev/null
+From 5f33150423573345b668e76c049425a511ea7559 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 12:41:25 +0200
+Subject: wifi: cfg80211: fully move wiphy work to unbound workqueue
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit e296c95eac655008d5a709b8cf54d0018da1c916 ]
+
+Previously I had moved the wiphy work to the unbound
+system workqueue, but missed that when it restarts and
+during resume it was still using the normal system
+workqueue. Fix that.
+
+Fixes: 91d20ab9d9ca ("wifi: cfg80211: use system_unbound_wq for wiphy work")
+Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
+Link: https://msgid.link/20240522124126.7ca959f2cbd3.I3e2a71ef445d167b84000ccf934ea245aef8d395@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/core.c | 2 +-
+ net/wireless/sysfs.c | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/wireless/core.c b/net/wireless/core.c
+index 3fb1b637352a9..4b1f45e3070e0 100644
+--- a/net/wireless/core.c
++++ b/net/wireless/core.c
+@@ -431,7 +431,7 @@ static void cfg80211_wiphy_work(struct work_struct *work)
+ if (wk) {
+ list_del_init(&wk->entry);
+ if (!list_empty(&rdev->wiphy_work_list))
+- schedule_work(work);
++ queue_work(system_unbound_wq, work);
+ spin_unlock_irq(&rdev->wiphy_work_lock);
+
+ wk->func(&rdev->wiphy, wk);
+diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
+index 565511a3f461e..62f26618f6747 100644
+--- a/net/wireless/sysfs.c
++++ b/net/wireless/sysfs.c
+@@ -5,7 +5,7 @@
+ *
+ * Copyright 2005-2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+- * Copyright (C) 2020-2021, 2023 Intel Corporation
++ * Copyright (C) 2020-2021, 2023-2024 Intel Corporation
+ */
+
+ #include <linux/device.h>
+@@ -137,7 +137,7 @@ static int wiphy_resume(struct device *dev)
+ if (rdev->wiphy.registered && rdev->ops->resume)
+ ret = rdev_resume(rdev);
+ rdev->suspended = false;
+- schedule_work(&rdev->wiphy_work);
++ queue_work(system_unbound_wq, &rdev->wiphy_work);
+ wiphy_unlock(&rdev->wiphy);
+
+ if (ret)
+--
+2.43.0
+
--- /dev/null
+From 2489c170dbfe056502e91905feec18a8e7bb01c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 21:47:26 +0200
+Subject: wifi: cfg80211: Lock wiphy in cfg80211_get_station
+
+From: Remi Pommarel <repk@triplefau.lt>
+
+[ Upstream commit 642f89daa34567d02f312d03e41523a894906dae ]
+
+Wiphy should be locked before calling rdev_get_station() (see lockdep
+assert in ieee80211_get_station()).
+
+This fixes the following kernel NULL dereference:
+
+ Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050
+ Mem abort info:
+ ESR = 0x0000000096000006
+ EC = 0x25: DABT (current EL), IL = 32 bits
+ SET = 0, FnV = 0
+ EA = 0, S1PTW = 0
+ FSC = 0x06: level 2 translation fault
+ Data abort info:
+ ISV = 0, ISS = 0x00000006
+ CM = 0, WnR = 0
+ user pgtable: 4k pages, 48-bit VAs, pgdp=0000000003001000
+ [0000000000000050] pgd=0800000002dca003, p4d=0800000002dca003, pud=08000000028e9003, pmd=0000000000000000
+ Internal error: Oops: 0000000096000006 [#1] SMP
+ Modules linked in: netconsole dwc3_meson_g12a dwc3_of_simple dwc3 ip_gre gre ath10k_pci ath10k_core ath9k ath9k_common ath9k_hw ath
+ CPU: 0 PID: 1091 Comm: kworker/u8:0 Not tainted 6.4.0-02144-g565f9a3a7911-dirty #705
+ Hardware name: RPT (r1) (DT)
+ Workqueue: bat_events batadv_v_elp_throughput_metric_update
+ pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+ pc : ath10k_sta_statistics+0x10/0x2dc [ath10k_core]
+ lr : sta_set_sinfo+0xcc/0xbd4
+ sp : ffff000007b43ad0
+ x29: ffff000007b43ad0 x28: ffff0000071fa900 x27: ffff00000294ca98
+ x26: ffff000006830880 x25: ffff000006830880 x24: ffff00000294c000
+ x23: 0000000000000001 x22: ffff000007b43c90 x21: ffff800008898acc
+ x20: ffff00000294c6e8 x19: ffff000007b43c90 x18: 0000000000000000
+ x17: 445946354d552d78 x16: 62661f7200000000 x15: 57464f445946354d
+ x14: 0000000000000000 x13: 00000000000000e3 x12: d5f0acbcebea978e
+ x11: 00000000000000e3 x10: 000000010048fe41 x9 : 0000000000000000
+ x8 : ffff000007b43d90 x7 : 000000007a1e2125 x6 : 0000000000000000
+ x5 : ffff0000024e0900 x4 : ffff800000a0250c x3 : ffff000007b43c90
+ x2 : ffff00000294ca98 x1 : ffff000006831920 x0 : 0000000000000000
+ Call trace:
+ ath10k_sta_statistics+0x10/0x2dc [ath10k_core]
+ sta_set_sinfo+0xcc/0xbd4
+ ieee80211_get_station+0x2c/0x44
+ cfg80211_get_station+0x80/0x154
+ batadv_v_elp_get_throughput+0x138/0x1fc
+ batadv_v_elp_throughput_metric_update+0x1c/0xa4
+ process_one_work+0x1ec/0x414
+ worker_thread+0x70/0x46c
+ kthread+0xdc/0xe0
+ ret_from_fork+0x10/0x20
+ Code: a9bb7bfd 910003fd a90153f3 f9411c40 (f9402814)
+
+This happens because STA has time to disconnect and reconnect before
+batadv_v_elp_throughput_metric_update() delayed work gets scheduled. In
+this situation, ath10k_sta_state() can be in the middle of resetting
+arsta data when the work queue get chance to be scheduled and ends up
+accessing it. Locking wiphy prevents that.
+
+Fixes: 7406353d43c8 ("cfg80211: implement cfg80211_get_station cfg80211 API")
+Signed-off-by: Remi Pommarel <repk@triplefau.lt>
+Reviewed-by: Nicolas Escande <nico.escande@gmail.com>
+Acked-by: Antonio Quartulli <a@unstable.cc>
+Link: https://msgid.link/983b24a6a176e0800c01aedcd74480d9b551cb13.1716046653.git.repk@triplefau.lt
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/util.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 2bde8a3546313..082c6f9c5416e 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -2549,6 +2549,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
+ {
+ struct cfg80211_registered_device *rdev;
+ struct wireless_dev *wdev;
++ int ret;
+
+ wdev = dev->ieee80211_ptr;
+ if (!wdev)
+@@ -2560,7 +2561,11 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
+
+ memset(sinfo, 0, sizeof(*sinfo));
+
+- return rdev_get_station(rdev, dev, mac_addr, sinfo);
++ wiphy_lock(&rdev->wiphy);
++ ret = rdev_get_station(rdev, dev, mac_addr, sinfo);
++ wiphy_unlock(&rdev->wiphy);
++
++ return ret;
+ }
+ EXPORT_SYMBOL(cfg80211_get_station);
+
+--
+2.43.0
+
--- /dev/null
+From a16b90b60aa64959ffb246d6c6b6e86bbb970ccf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 15:50:59 +0800
+Subject: wifi: cfg80211: pmsr: use correct nla_get_uX functions
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit ab904521f4de52fef4f179d2dfc1877645ef5f5c ]
+
+The commit 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM
+initiator API") defines four attributes NL80211_PMSR_FTM_REQ_ATTR_
+{NUM_BURSTS_EXP}/{BURST_PERIOD}/{BURST_DURATION}/{FTMS_PER_BURST} in
+following ways.
+
+static const struct nla_policy
+nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
+ ...
+ [NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP] =
+ NLA_POLICY_MAX(NLA_U8, 15),
+ [NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD] = { .type = NLA_U16 },
+ [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] =
+ NLA_POLICY_MAX(NLA_U8, 15),
+ [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] =
+ NLA_POLICY_MAX(NLA_U8, 31),
+ ...
+};
+
+That is, those attributes are expected to be NLA_U8 and NLA_U16 types.
+However, the consumers of these attributes in `pmsr_parse_ftm` blindly
+all use `nla_get_u32`, which is incorrect and causes functionality issues
+on little-endian platforms. Hence, fix them with the correct `nla_get_u8`
+and `nla_get_u16` functions.
+
+Fixes: 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM initiator API")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Link: https://msgid.link/20240521075059.47999-1-linma@zju.edu.cn
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/pmsr.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
+index e106dcea39778..c569c37da3175 100644
+--- a/net/wireless/pmsr.c
++++ b/net/wireless/pmsr.c
+@@ -56,7 +56,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
+ out->ftm.burst_period = 0;
+ if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD])
+ out->ftm.burst_period =
+- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
++ nla_get_u16(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
+
+ out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP];
+ if (out->ftm.asap && !capa->ftm.asap) {
+@@ -75,7 +75,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
+ out->ftm.num_bursts_exp = 0;
+ if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP])
+ out->ftm.num_bursts_exp =
+- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
++ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
+
+ if (capa->ftm.max_bursts_exponent >= 0 &&
+ out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) {
+@@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
+ out->ftm.burst_duration = 15;
+ if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION])
+ out->ftm.burst_duration =
+- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
++ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
+
+ out->ftm.ftms_per_burst = 0;
+ if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST])
+@@ -107,7 +107,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
+ out->ftm.ftmr_retries = 3;
+ if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES])
+ out->ftm.ftmr_retries =
+- nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
++ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
+
+ out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI];
+ if (out->ftm.request_lci && !capa->ftm.request_lci) {
+--
+2.43.0
+
--- /dev/null
+From 575287bb2f851544cc9a955432fbd31b6745ae5e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 May 2024 17:06:39 +0300
+Subject: wifi: iwlwifi: dbg_ini: move iwl_dbg_tlv_free outside of debugfs
+ ifdef
+
+From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
+
+[ Upstream commit 87821b67dea87addbc4ab093ba752753b002176a ]
+
+The driver should call iwl_dbg_tlv_free even if debugfs is not defined
+since ini mode does not depend on debugfs ifdef.
+
+Fixes: 68f6f492c4fa ("iwlwifi: trans: support loading ini TLVs from external file")
+Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
+Reviewed-by: Luciano Coelho <luciano.coelho@intel.com>
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Link: https://msgid.link/20240510170500.c8e3723f55b0.I5e805732b0be31ee6b83c642ec652a34e974ff10@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+index 1b7254569a37a..6c27ef2f7c7e5 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+@@ -1821,8 +1821,8 @@ struct iwl_drv *iwl_drv_start(struct iwl_trans *trans)
+ err_fw:
+ #ifdef CONFIG_IWLWIFI_DEBUGFS
+ debugfs_remove_recursive(drv->dbgfs_drv);
+- iwl_dbg_tlv_free(drv->trans);
+ #endif
++ iwl_dbg_tlv_free(drv->trans);
+ kfree(drv);
+ err:
+ return ERR_PTR(ret);
+--
+2.43.0
+
--- /dev/null
+From 49ba92241a427db123dd765081002b9b8c3e2587 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 May 2024 13:27:12 +0300
+Subject: wifi: iwlwifi: mvm: check n_ssids before accessing the ssids
+
+From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+
+[ Upstream commit 60d62757df30b74bf397a2847a6db7385c6ee281 ]
+
+In some versions of cfg80211, the ssids poinet might be a valid one even
+though n_ssids is 0. Accessing the pointer in this case will cuase an
+out-of-bound access. Fix this by checking n_ssids first.
+
+Fixes: c1a7515393e4 ("iwlwifi: mvm: add adaptive dwell support")
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Reviewed-by: Ilan Peer <ilan.peer@intel.com>
+Reviewed-by: Johannes Berg <johannes.berg@intel.com>
+Link: https://msgid.link/20240513132416.6e4d1762bf0d.I5a0e6cc8f02050a766db704d15594c61fe583d45@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+index 22bc032cffc8b..525d8efcc1475 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+@@ -1303,7 +1303,7 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm,
+ if (IWL_MVM_ADWELL_MAX_BUDGET)
+ cmd->v7.adwell_max_budget =
+ cpu_to_le16(IWL_MVM_ADWELL_MAX_BUDGET);
+- else if (params->ssids && params->ssids[0].ssid_len)
++ else if (params->n_ssids && params->ssids[0].ssid_len)
+ cmd->v7.adwell_max_budget =
+ cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN);
+ else
+@@ -1405,7 +1405,7 @@ iwl_mvm_scan_umac_dwell_v11(struct iwl_mvm *mvm,
+ if (IWL_MVM_ADWELL_MAX_BUDGET)
+ general_params->adwell_max_budget =
+ cpu_to_le16(IWL_MVM_ADWELL_MAX_BUDGET);
+- else if (params->ssids && params->ssids[0].ssid_len)
++ else if (params->n_ssids && params->ssids[0].ssid_len)
+ general_params->adwell_max_budget =
+ cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN);
+ else
+--
+2.43.0
+
--- /dev/null
+From 971a1446d382ea69a4e32bcd797df2b570738aef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 12 May 2024 15:25:00 +0300
+Subject: wifi: iwlwifi: mvm: don't initialize csa_work twice
+
+From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+
+[ Upstream commit 92158790ce4391ce4c35d8dfbce759195e4724cb ]
+
+The initialization of this worker moved to iwl_mvm_mac_init_mvmvif
+but we removed only from the pre-MLD version of the add_interface
+callback. Remove it also from the MLD version.
+
+Fixes: 0bcc2155983e ("wifi: iwlwifi: mvm: init vif works only once")
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Reviewed-by: Johannes Berg <johannes.berg@intel.com>
+Link: https://msgid.link/20240512152312.4f15b41604f0.Iec912158e5a706175531d3736d77d25adf02fba4@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
+index df183a79db4c8..43f3002ede464 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
+@@ -75,8 +75,6 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw,
+ goto out_free_bf;
+
+ iwl_mvm_tcm_add_vif(mvm, vif);
+- INIT_DELAYED_WORK(&mvmvif->csa_work,
+- iwl_mvm_channel_switch_disconnect_wk);
+
+ if (vif->type == NL80211_IFTYPE_MONITOR) {
+ mvm->monitor_on = true;
+--
+2.43.0
+
--- /dev/null
+From 9326cd066e23328a9d6178bd072fe1830e06416e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 May 2024 13:27:14 +0300
+Subject: wifi: iwlwifi: mvm: don't read past the mfuart notifcation
+
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+
+[ Upstream commit 4bb95f4535489ed830cf9b34b0a891e384d1aee4 ]
+
+In case the firmware sends a notification that claims it has more data
+than it has, we will read past that was allocated for the notification.
+Remove the print of the buffer, we won't see it by default. If needed,
+we can see the content with tracing.
+
+This was reported by KFENCE.
+
+Fixes: bdccdb854f2f ("iwlwifi: mvm: support MFUART dump in case of MFUART assert")
+Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Reviewed-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Link: https://msgid.link/20240513132416.ba82a01a559e.Ia91dd20f5e1ca1ad380b95e68aebf2794f553d9b@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+index e1c2b7fc92ab9..c56212c2c3066 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+@@ -94,20 +94,10 @@ void iwl_mvm_mfu_assert_dump_notif(struct iwl_mvm *mvm,
+ {
+ struct iwl_rx_packet *pkt = rxb_addr(rxb);
+ struct iwl_mfu_assert_dump_notif *mfu_dump_notif = (void *)pkt->data;
+- __le32 *dump_data = mfu_dump_notif->data;
+- int n_words = le32_to_cpu(mfu_dump_notif->data_size) / sizeof(__le32);
+- int i;
+
+ if (mfu_dump_notif->index_num == 0)
+ IWL_INFO(mvm, "MFUART assert id 0x%x occurred\n",
+ le32_to_cpu(mfu_dump_notif->assert_id));
+-
+- for (i = 0; i < n_words; i++)
+- IWL_DEBUG_INFO(mvm,
+- "MFUART assert dump, dword %u: 0x%08x\n",
+- le16_to_cpu(mfu_dump_notif->index_num) *
+- n_words + i,
+- le32_to_cpu(dump_data[i]));
+ }
+
+ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait,
+--
+2.43.0
+
--- /dev/null
+From 3a97cf624678ac9012dbe1d151ca666ef5d2c332 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 May 2024 17:06:33 +0300
+Subject: wifi: iwlwifi: mvm: revert gen2 TX A-MPDU size to 64
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 4a7aace2899711592327463c1a29ffee44fcc66e ]
+
+We don't actually support >64 even for HE devices, so revert
+back to 64. This fixes an issue where the session is refused
+because the queue is configured differently from the actual
+session later.
+
+Fixes: 514c30696fbc ("iwlwifi: add support for IEEE802.11ax")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Reviewed-by: Liad Kaufman <liad.kaufman@intel.com>
+Reviewed-by: Luciano Coelho <luciano.coelho@intel.com>
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Link: https://msgid.link/20240510170500.52f7b4cf83aa.If47e43adddf7fe250ed7f5571fbb35d8221c7c47@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/rs.h | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
+index 376b23b409dca..6cd4ec4d8f344 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
+@@ -122,13 +122,8 @@ enum {
+
+ #define LINK_QUAL_AGG_FRAME_LIMIT_DEF (63)
+ #define LINK_QUAL_AGG_FRAME_LIMIT_MAX (63)
+-/*
+- * FIXME - various places in firmware API still use u8,
+- * e.g. LQ command and SCD config command.
+- * This should be 256 instead.
+- */
+-#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_DEF (255)
+-#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_MAX (255)
++#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_DEF (64)
++#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_MAX (64)
+ #define LINK_QUAL_AGG_FRAME_LIMIT_MIN (0)
+
+ #define LQ_SIZE 2 /* 2 mode tables: "Active" and "Search" */
+--
+2.43.0
+
--- /dev/null
+From aebe367d44a11a5e40d8710ad0a0af86b660e8ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 May 2024 17:06:35 +0300
+Subject: wifi: iwlwifi: mvm: set properly mac header
+
+From: Mordechay Goodstein <mordechay.goodstein@intel.com>
+
+[ Upstream commit 0f2e9f6f21d1ff292363cdfb5bc4d492eeaff76e ]
+
+In the driver we only use skb_put* for adding data to the skb, hence data
+never moves and skb_reset_mac_haeder would set mac_header to the first
+time data was added and not to mac80211 header, fix this my using the
+actual len of bytes added for setting the mac header.
+
+Fixes: 3f7a9d577d47 ("wifi: iwlwifi: mvm: simplify by using SKB MAC header pointer")
+Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
+Reviewed-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Link: https://msgid.link/20240510170500.12f2de2909c3.I72a819b96f2fe55bde192a8fd31a4b96c301aa73@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+index ce8d83c771a70..8ac5c045fcfcb 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+@@ -2456,8 +2456,11 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
+ *
+ * We mark it as mac header, for upper layers to know where
+ * all radio tap header ends.
++ *
++ * Since data doesn't move data while putting data on skb and that is
++ * the only way we use, data + len is the next place that hdr would be put
+ */
+- skb_reset_mac_header(skb);
++ skb_set_mac_header(skb, skb->len);
+
+ /*
+ * Override the nss from the rx_vec since the rate_n_flags has
+--
+2.43.0
+
--- /dev/null
+From c0898526e089abd2eea2f3792607688518b48c0a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 May 2024 10:18:54 +0800
+Subject: wifi: mac80211: correctly parse Spatial Reuse Parameter Set element
+
+From: Lingbo Kong <quic_lingbok@quicinc.com>
+
+[ Upstream commit a26d8dc5227f449a54518a8b40733a54c6600a8b ]
+
+Currently, the way of parsing Spatial Reuse Parameter Set element is
+incorrect and some members of struct ieee80211_he_obss_pd are not assigned.
+
+To address this issue, it must be parsed in the order of the elements of
+Spatial Reuse Parameter Set defined in the IEEE Std 802.11ax specification.
+
+The diagram of the Spatial Reuse Parameter Set element (IEEE Std 802.11ax
+-2021-9.4.2.252).
+
+-------------------------------------------------------------------------
+| | | | |Non-SRG| SRG | SRG | SRG | SRG |
+|Element|Length| Element | SR |OBSS PD|OBSS PD|OBSS PD| BSS |Partial|
+| ID | | ID |Control| Max | Min | Max |Color | BSSID |
+| | |Extension| | Offset| Offset|Offset |Bitmap|Bitmap |
+-------------------------------------------------------------------------
+
+Fixes: 1ced169cc1c2 ("mac80211: allow setting spatial reuse parameters from bss_conf")
+Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
+Link: https://msgid.link/20240516021854.5682-3-quic_lingbok@quicinc.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/he.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/net/mac80211/he.c b/net/mac80211/he.c
+index 9f5ffdc9db284..ecbb042dd0433 100644
+--- a/net/mac80211/he.c
++++ b/net/mac80211/he.c
+@@ -230,15 +230,21 @@ ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
+
+ if (!he_spr_ie_elem)
+ return;
++
++ he_obss_pd->sr_ctrl = he_spr_ie_elem->he_sr_control;
+ data = he_spr_ie_elem->optional;
+
+ if (he_spr_ie_elem->he_sr_control &
+ IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
+- data++;
++ he_obss_pd->non_srg_max_offset = *data++;
++
+ if (he_spr_ie_elem->he_sr_control &
+ IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) {
+- he_obss_pd->max_offset = *data++;
+ he_obss_pd->min_offset = *data++;
++ he_obss_pd->max_offset = *data++;
++ memcpy(he_obss_pd->bss_color_bitmap, data, 8);
++ data += 8;
++ memcpy(he_obss_pd->partial_bssid_bitmap, data, 8);
+ he_obss_pd->enable = true;
+ }
+ }
+--
+2.43.0
+
--- /dev/null
+From da12835ef99f313ed6091216c399ea536e0217c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 08:57:53 +0200
+Subject: wifi: mac80211: Fix deadlock in ieee80211_sta_ps_deliver_wakeup()
+
+From: Remi Pommarel <repk@triplefau.lt>
+
+[ Upstream commit 44c06bbde6443de206b30f513100b5670b23fc5e ]
+
+The ieee80211_sta_ps_deliver_wakeup() function takes sta->ps_lock to
+synchronizes with ieee80211_tx_h_unicast_ps_buf() which is called from
+softirq context. However using only spin_lock() to get sta->ps_lock in
+ieee80211_sta_ps_deliver_wakeup() does not prevent softirq to execute
+on this same CPU, to run ieee80211_tx_h_unicast_ps_buf() and try to
+take this same lock ending in deadlock. Below is an example of rcu stall
+that arises in such situation.
+
+ rcu: INFO: rcu_sched self-detected stall on CPU
+ rcu: 2-....: (42413413 ticks this GP) idle=b154/1/0x4000000000000000 softirq=1763/1765 fqs=21206996
+ rcu: (t=42586894 jiffies g=2057 q=362405 ncpus=4)
+ CPU: 2 PID: 719 Comm: wpa_supplicant Tainted: G W 6.4.0-02158-g1b062f552873 #742
+ Hardware name: RPT (r1) (DT)
+ pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+ pc : queued_spin_lock_slowpath+0x58/0x2d0
+ lr : invoke_tx_handlers_early+0x5b4/0x5c0
+ sp : ffff00001ef64660
+ x29: ffff00001ef64660 x28: ffff000009bc1070 x27: ffff000009bc0ad8
+ x26: ffff000009bc0900 x25: ffff00001ef647a8 x24: 0000000000000000
+ x23: ffff000009bc0900 x22: ffff000009bc0900 x21: ffff00000ac0e000
+ x20: ffff00000a279e00 x19: ffff00001ef646e8 x18: 0000000000000000
+ x17: ffff800016468000 x16: ffff00001ef608c0 x15: 0010533c93f64f80
+ x14: 0010395c9faa3946 x13: 0000000000000000 x12: 00000000fa83b2da
+ x11: 000000012edeceea x10: ffff0000010fbe00 x9 : 0000000000895440
+ x8 : 000000000010533c x7 : ffff00000ad8b740 x6 : ffff00000c350880
+ x5 : 0000000000000007 x4 : 0000000000000001 x3 : 0000000000000000
+ x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffff00000ac0e0e8
+ Call trace:
+ queued_spin_lock_slowpath+0x58/0x2d0
+ ieee80211_tx+0x80/0x12c
+ ieee80211_tx_pending+0x110/0x278
+ tasklet_action_common.constprop.0+0x10c/0x144
+ tasklet_action+0x20/0x28
+ _stext+0x11c/0x284
+ ____do_softirq+0xc/0x14
+ call_on_irq_stack+0x24/0x34
+ do_softirq_own_stack+0x18/0x20
+ do_softirq+0x74/0x7c
+ __local_bh_enable_ip+0xa0/0xa4
+ _ieee80211_wake_txqs+0x3b0/0x4b8
+ __ieee80211_wake_queue+0x12c/0x168
+ ieee80211_add_pending_skbs+0xec/0x138
+ ieee80211_sta_ps_deliver_wakeup+0x2a4/0x480
+ ieee80211_mps_sta_status_update.part.0+0xd8/0x11c
+ ieee80211_mps_sta_status_update+0x18/0x24
+ sta_apply_parameters+0x3bc/0x4c0
+ ieee80211_change_station+0x1b8/0x2dc
+ nl80211_set_station+0x444/0x49c
+ genl_family_rcv_msg_doit.isra.0+0xa4/0xfc
+ genl_rcv_msg+0x1b0/0x244
+ netlink_rcv_skb+0x38/0x10c
+ genl_rcv+0x34/0x48
+ netlink_unicast+0x254/0x2bc
+ netlink_sendmsg+0x190/0x3b4
+ ____sys_sendmsg+0x1e8/0x218
+ ___sys_sendmsg+0x68/0x8c
+ __sys_sendmsg+0x44/0x84
+ __arm64_sys_sendmsg+0x20/0x28
+ do_el0_svc+0x6c/0xe8
+ el0_svc+0x14/0x48
+ el0t_64_sync_handler+0xb0/0xb4
+ el0t_64_sync+0x14c/0x150
+
+Using spin_lock_bh()/spin_unlock_bh() instead prevents softirq to raise
+on the same CPU that is holding the lock.
+
+Fixes: 1d147bfa6429 ("mac80211: fix AP powersave TX vs. wakeup race")
+Signed-off-by: Remi Pommarel <repk@triplefau.lt>
+Link: https://msgid.link/8e36fe07d0fbc146f89196cd47a53c8a0afe84aa.1716910344.git.repk@triplefau.lt
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/sta_info.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
+index da5fdd6f5c852..aa22f09e6d145 100644
+--- a/net/mac80211/sta_info.c
++++ b/net/mac80211/sta_info.c
+@@ -1724,7 +1724,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
+ skb_queue_head_init(&pending);
+
+ /* sync with ieee80211_tx_h_unicast_ps_buf */
+- spin_lock(&sta->ps_lock);
++ spin_lock_bh(&sta->ps_lock);
+ /* Send all buffered frames to the station */
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ int count = skb_queue_len(&pending), tmp;
+@@ -1753,7 +1753,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
+ */
+ clear_sta_flag(sta, WLAN_STA_PSPOLL);
+ clear_sta_flag(sta, WLAN_STA_UAPSD);
+- spin_unlock(&sta->ps_lock);
++ spin_unlock_bh(&sta->ps_lock);
+
+ atomic_dec(&ps->num_sta_ps);
+
+--
+2.43.0
+
--- /dev/null
+From a9decd350d4209e2163dca231bd3e6dd54008372 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 May 2024 10:18:53 +0800
+Subject: wifi: mac80211: fix Spatial Reuse element size check
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lingbo Kong <quic_lingbok@quicinc.com>
+
+[ Upstream commit 0c2fd18f7ec552796179c14f13a0e06942f09d16 ]
+
+Currently, the way to check the size of Spatial Reuse IE data in the
+ieee80211_parse_extension_element() is incorrect.
+
+This is because the len variable in the ieee80211_parse_extension_element()
+function is equal to the size of Spatial Reuse IE data minus one and the
+value of returned by the ieee80211_he_spr_size() function is equal to
+the length of Spatial Reuse IE data. So the result of the
+len >= ieee80211_he_spr_size(data) statement always false.
+
+To address this issue and make it consistent with the logic used elsewhere
+with ieee80211_he_oper_size(), change the
+"len >= ieee80211_he_spr_size(data)" to
+“len >= ieee80211_he_spr_size(data) - 1”.
+
+Fixes: 9d0480a7c05b ("wifi: mac80211: move element parsing to a new file")
+Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
+Link: https://msgid.link/20240516021854.5682-2-quic_lingbok@quicinc.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/parse.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
+index 55e5497f89781..055a60e90979b 100644
+--- a/net/mac80211/parse.c
++++ b/net/mac80211/parse.c
+@@ -111,7 +111,7 @@ ieee80211_parse_extension_element(u32 *crc,
+ if (params->mode < IEEE80211_CONN_MODE_HE)
+ break;
+ if (len >= sizeof(*elems->he_spr) &&
+- len >= ieee80211_he_spr_size(data))
++ len >= ieee80211_he_spr_size(data) - 1)
+ elems->he_spr = data;
+ break;
+ case WLAN_EID_EXT_HE_6GHZ_CAPA:
+--
+2.43.0
+
--- /dev/null
+From c864f76074b9065541e9e9d85af8fc8c3b473e35 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 16:26:05 +0200
+Subject: wifi: mac80211: mesh: Fix leak of mesh_preq_queue objects
+
+From: Nicolas Escande <nico.escande@gmail.com>
+
+[ Upstream commit b7d7f11a291830fdf69d3301075dd0fb347ced84 ]
+
+The hwmp code use objects of type mesh_preq_queue, added to a list in
+ieee80211_if_mesh, to keep track of mpath we need to resolve. If the mpath
+gets deleted, ex mesh interface is removed, the entries in that list will
+never get cleaned. Fix this by flushing all corresponding items of the
+preq_queue in mesh_path_flush_pending().
+
+This should take care of KASAN reports like this:
+
+unreferenced object 0xffff00000668d800 (size 128):
+ comm "kworker/u8:4", pid 67, jiffies 4295419552 (age 1836.444s)
+ hex dump (first 32 bytes):
+ 00 1f 05 09 00 00 ff ff 00 d5 68 06 00 00 ff ff ..........h.....
+ 8e 97 ea eb 3e b8 01 00 00 00 00 00 00 00 00 00 ....>...........
+ backtrace:
+ [<000000007302a0b6>] __kmem_cache_alloc_node+0x1e0/0x35c
+ [<00000000049bd418>] kmalloc_trace+0x34/0x80
+ [<0000000000d792bb>] mesh_queue_preq+0x44/0x2a8
+ [<00000000c99c3696>] mesh_nexthop_resolve+0x198/0x19c
+ [<00000000926bf598>] ieee80211_xmit+0x1d0/0x1f4
+ [<00000000fc8c2284>] __ieee80211_subif_start_xmit+0x30c/0x764
+ [<000000005926ee38>] ieee80211_subif_start_xmit+0x9c/0x7a4
+ [<000000004c86e916>] dev_hard_start_xmit+0x174/0x440
+ [<0000000023495647>] __dev_queue_xmit+0xe24/0x111c
+ [<00000000cfe9ca78>] batadv_send_skb_packet+0x180/0x1e4
+ [<000000007bacc5d5>] batadv_v_elp_periodic_work+0x2f4/0x508
+ [<00000000adc3cd94>] process_one_work+0x4b8/0xa1c
+ [<00000000b36425d1>] worker_thread+0x9c/0x634
+ [<0000000005852dd5>] kthread+0x1bc/0x1c4
+ [<000000005fccd770>] ret_from_fork+0x10/0x20
+unreferenced object 0xffff000009051f00 (size 128):
+ comm "kworker/u8:4", pid 67, jiffies 4295419553 (age 1836.440s)
+ hex dump (first 32 bytes):
+ 90 d6 92 0d 00 00 ff ff 00 d8 68 06 00 00 ff ff ..........h.....
+ 36 27 92 e4 02 e0 01 00 00 58 79 06 00 00 ff ff 6'.......Xy.....
+ backtrace:
+ [<000000007302a0b6>] __kmem_cache_alloc_node+0x1e0/0x35c
+ [<00000000049bd418>] kmalloc_trace+0x34/0x80
+ [<0000000000d792bb>] mesh_queue_preq+0x44/0x2a8
+ [<00000000c99c3696>] mesh_nexthop_resolve+0x198/0x19c
+ [<00000000926bf598>] ieee80211_xmit+0x1d0/0x1f4
+ [<00000000fc8c2284>] __ieee80211_subif_start_xmit+0x30c/0x764
+ [<000000005926ee38>] ieee80211_subif_start_xmit+0x9c/0x7a4
+ [<000000004c86e916>] dev_hard_start_xmit+0x174/0x440
+ [<0000000023495647>] __dev_queue_xmit+0xe24/0x111c
+ [<00000000cfe9ca78>] batadv_send_skb_packet+0x180/0x1e4
+ [<000000007bacc5d5>] batadv_v_elp_periodic_work+0x2f4/0x508
+ [<00000000adc3cd94>] process_one_work+0x4b8/0xa1c
+ [<00000000b36425d1>] worker_thread+0x9c/0x634
+ [<0000000005852dd5>] kthread+0x1bc/0x1c4
+ [<000000005fccd770>] ret_from_fork+0x10/0x20
+
+Fixes: 050ac52cbe1f ("mac80211: code for on-demand Hybrid Wireless Mesh Protocol")
+Signed-off-by: Nicolas Escande <nico.escande@gmail.com>
+Link: https://msgid.link/20240528142605.1060566-1-nico.escande@gmail.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/mesh_pathtbl.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
+index a6b62169f0848..c0a5c75cddcb9 100644
+--- a/net/mac80211/mesh_pathtbl.c
++++ b/net/mac80211/mesh_pathtbl.c
+@@ -1017,10 +1017,23 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
+ */
+ void mesh_path_flush_pending(struct mesh_path *mpath)
+ {
++ struct ieee80211_sub_if_data *sdata = mpath->sdata;
++ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
++ struct mesh_preq_queue *preq, *tmp;
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL)
+ mesh_path_discard_frame(mpath->sdata, skb);
++
++ spin_lock_bh(&ifmsh->mesh_preq_queue_lock);
++ list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) {
++ if (ether_addr_equal(mpath->dst, preq->dst)) {
++ list_del(&preq->list);
++ kfree(preq);
++ --ifmsh->preq_queue_len;
++ }
++ }
++ spin_unlock_bh(&ifmsh->mesh_preq_queue_lock);
+ }
+
+ /**
+--
+2.43.0
+
--- /dev/null
+From 575ba8bba63be53ca45a426d7e96cfd11b598ba3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 May 2024 08:55:55 +0530
+Subject: wifi: mac80211: pass proper link id for channel switch started
+ notification
+
+From: Aditya Kumar Singh <quic_adisi@quicinc.com>
+
+[ Upstream commit 8ecc4d7a7cd3e9704b63b8e4f6cd8b6b7314210f ]
+
+Original changes[1] posted is having proper changes. However, at the same
+time, there was chandef puncturing changes which had a conflict with this.
+While applying, two errors crept in -
+ a) Whitespace error.
+ b) Link ID being passed to channel switch started notifier function is
+ 0. However proper link ID is present in the function.
+
+Fix these now.
+
+[1] https://lore.kernel.org/all/20240130140918.1172387-5-quic_adisi@quicinc.com/
+
+Fixes: 1a96bb4e8a79 ("wifi: mac80211: start and finalize channel switch on link basis")
+Signed-off-by: Aditya Kumar Singh <quic_adisi@quicinc.com>
+Link: https://msgid.link/20240509032555.263933-1-quic_adisi@quicinc.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/cfg.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
+index 07abaf7820c56..51dc2d9dd6b84 100644
+--- a/net/mac80211/cfg.c
++++ b/net/mac80211/cfg.c
+@@ -4012,7 +4012,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+ goto out;
+ }
+
+- link_data->csa_chanreq = chanreq;
++ link_data->csa_chanreq = chanreq;
+ link_conf->csa_active = true;
+
+ if (params->block_tx &&
+@@ -4023,7 +4023,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+ }
+
+ cfg80211_ch_switch_started_notify(sdata->dev,
+- &link_data->csa_chanreq.oper, 0,
++ &link_data->csa_chanreq.oper, link_id,
+ params->count, params->block_tx);
+
+ if (changed) {
+--
+2.43.0
+
--- /dev/null
+From 3a2b16e9d7be8975d7c8bfb675a77056f76cecdf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Mar 2024 13:07:06 +0100
+Subject: x86/cpu: Get rid of an unnecessary local variable in
+ get_cpu_address_sizes()
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+[ Upstream commit 95bfb35269b2e85cff0dd2c957b2d42ebf95ae5f ]
+
+Drop 'vp_bits_from_cpuid' as it is not really needed.
+
+No functional changes.
+
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/20240316120706.4352-1-bp@alien8.de
+Stable-dep-of: 2a38e4ca3022 ("x86/cpu: Provide default cache line size if not enumerated")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/common.c | 17 +++++++----------
+ 1 file changed, 7 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index ae987a26f26e4..d636991536a5f 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1053,18 +1053,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
+ {
+ u32 eax, ebx, ecx, edx;
+- bool vp_bits_from_cpuid = true;
+
+ if (!cpu_has(c, X86_FEATURE_CPUID) ||
+- (c->extended_cpuid_level < 0x80000008))
+- vp_bits_from_cpuid = false;
+-
+- if (vp_bits_from_cpuid) {
+- cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+-
+- c->x86_virt_bits = (eax >> 8) & 0xff;
+- c->x86_phys_bits = eax & 0xff;
+- } else {
++ (c->extended_cpuid_level < 0x80000008)) {
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ c->x86_clflush_size = 64;
+ c->x86_phys_bits = 36;
+@@ -1078,7 +1069,13 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
+ cpu_has(c, X86_FEATURE_PSE36))
+ c->x86_phys_bits = 36;
+ }
++ } else {
++ cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
++
++ c->x86_virt_bits = (eax >> 8) & 0xff;
++ c->x86_phys_bits = eax & 0xff;
+ }
++
+ c->x86_cache_bits = c->x86_phys_bits;
+ c->x86_cache_alignment = c->x86_clflush_size;
+ }
+--
+2.43.0
+
--- /dev/null
+From 7692d0278be8271d42a87fde9aca4d5892965ae7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 May 2024 13:05:34 -0700
+Subject: x86/cpu: Provide default cache line size if not enumerated
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+[ Upstream commit 2a38e4ca302280fdcce370ba2bee79bac16c4587 ]
+
+tl;dr: CPUs with CPUID.80000008H but without CPUID.01H:EDX[CLFSH]
+will end up reporting cache_line_size()==0 and bad things happen.
+Fill in a default on those to avoid the problem.
+
+Long Story:
+
+The kernel dies a horrible death if c->x86_cache_alignment (aka.
+cache_line_size() is 0. Normally, this value is populated from
+c->x86_clflush_size.
+
+Right now the code is set up to get c->x86_clflush_size from two
+places. First, modern CPUs get it from CPUID. Old CPUs that don't
+have leaf 0x80000008 (or CPUID at all) just get some sane defaults
+from the kernel in get_cpu_address_sizes().
+
+The vast majority of CPUs that have leaf 0x80000008 also get
+->x86_clflush_size from CPUID. But there are oddballs.
+
+Intel Quark CPUs[1] and others[2] have leaf 0x80000008 but don't set
+CPUID.01H:EDX[CLFSH], so they skip over filling in ->x86_clflush_size:
+
+ cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
+ if (cap0 & (1<<19))
+ c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+
+So they: land in get_cpu_address_sizes() and see that CPUID has level
+0x80000008 and jump into the side of the if() that does not fill in
+c->x86_clflush_size. That assigns a 0 to c->x86_cache_alignment, and
+hilarity ensues in code like:
+
+ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
+ GFP_KERNEL);
+
+To fix this, always provide a sane value for ->x86_clflush_size.
+
+Big thanks to Andy Shevchenko for finding and reporting this and also
+providing a first pass at a fix. But his fix was only partial and only
+worked on the Quark CPUs. It would not, for instance, have worked on
+the QEMU config.
+
+1. https://raw.githubusercontent.com/InstLatx64/InstLatx64/master/GenuineIntel/GenuineIntel0000590_Clanton_03_CPUID.txt
+2. You can also get this behavior if you use "-cpu 486,+clzero"
+ in QEMU.
+
+[ dhansen: remove 'vp_bits_from_cpuid' reference in changelog
+ because bpetkov brutally murdered it recently. ]
+
+Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach")
+Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Tested-by: Jörn Heusipp <osmanx@heusipp.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20240516173928.3960193-1-andriy.shevchenko@linux.intel.com/
+Link: https://lore.kernel.org/lkml/5e31cad3-ad4d-493e-ab07-724cfbfaba44@heusipp.de/
+Link: https://lore.kernel.org/all/20240517200534.8EC5F33E%40davehans-spike.ostc.intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/common.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index d636991536a5f..1982007828276 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1074,6 +1074,10 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
+
+ c->x86_virt_bits = (eax >> 8) & 0xff;
+ c->x86_phys_bits = eax & 0xff;
++
++ /* Provide a sane default if not enumerated: */
++ if (!c->x86_clflush_size)
++ c->x86_clflush_size = 32;
+ }
+
+ c->x86_cache_bits = c->x86_phys_bits;
+--
+2.43.0
+