]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.6
authorSasha Levin <sashal@kernel.org>
Mon, 15 Apr 2024 08:56:16 +0000 (04:56 -0400)
committerSasha Levin <sashal@kernel.org>
Mon, 15 Apr 2024 08:56:16 +0000 (04:56 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
65 files changed:
queue-6.6/af_unix-clear-stale-u-oob_skb.patch [new file with mode: 0644]
queue-6.6/af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch [new file with mode: 0644]
queue-6.6/af_unix-fix-garbage-collector-racing-against-connect.patch [new file with mode: 0644]
queue-6.6/arm-omap2-fix-bogus-mmc-gpio-labels-on-nokia-n8x0.patch [new file with mode: 0644]
queue-6.6/arm-omap2-fix-n810-mmc-gpiod-table.patch [new file with mode: 0644]
queue-6.6/arm-omap2-fix-usb-regression-on-nokia-n8x0.patch [new file with mode: 0644]
queue-6.6/arm64-dts-imx8-ss-conn-fix-usdhc-wrong-lpcg-clock-or.patch [new file with mode: 0644]
queue-6.6/block-fix-q-blkg_list-corruption-during-disk-rebind.patch [new file with mode: 0644]
queue-6.6/bluetooth-hci_sync-fix-using-the-same-interval-and-w.patch [new file with mode: 0644]
queue-6.6/bluetooth-hci_sync-use-qos-to-determine-which-phy-to.patch [new file with mode: 0644]
queue-6.6/bluetooth-iso-align-broadcast-sync_timeout-with-conn.patch [new file with mode: 0644]
queue-6.6/bluetooth-iso-don-t-reject-bt_iso_qos-if-parameters-.patch [new file with mode: 0644]
queue-6.6/bluetooth-l2cap-don-t-double-set-the-hci_conn_mgmt_c.patch [new file with mode: 0644]
queue-6.6/bluetooth-sco-fix-not-validating-setsockopt-user-inp.patch [new file with mode: 0644]
queue-6.6/bnxt_en-fix-error-recovery-for-roce-ulp-client.patch [new file with mode: 0644]
queue-6.6/bnxt_en-fix-possible-memory-leak-in-bnxt_rdma_aux_de.patch [new file with mode: 0644]
queue-6.6/bnxt_en-reset-ptp-tx_avail-after-possible-firmware-r.patch [new file with mode: 0644]
queue-6.6/cxl-core-fix-initialization-of-mbox_cmd.size_out-in-.patch [new file with mode: 0644]
queue-6.6/cxl-core-regs-fix-usage-of-map-reg_type-in-cxl_decod.patch [new file with mode: 0644]
queue-6.6/cxl-mem-fix-for-the-index-of-clear-event-record-hand.patch [new file with mode: 0644]
queue-6.6/drm-msm-dpu-don-t-allow-overriding-data-from-catalog.patch [new file with mode: 0644]
queue-6.6/firmware-arm_scmi-make-raw-debugfs-entries-non-seeka.patch [new file with mode: 0644]
queue-6.6/geneve-fix-header-validation-in-geneve-6-_xmit_skb.patch [new file with mode: 0644]
queue-6.6/iommu-vt-d-allocate-local-memory-for-page-request-qu.patch [new file with mode: 0644]
queue-6.6/iommu-vt-d-fix-wrong-use-of-pasid-config.patch [new file with mode: 0644]
queue-6.6/ipv4-route-avoid-unused-but-set-variable-warning.patch [new file with mode: 0644]
queue-6.6/ipv6-fib-hide-unused-pn-variable.patch [new file with mode: 0644]
queue-6.6/ipv6-fix-race-condition-between-ipv6_get_ifaddr-and-.patch [new file with mode: 0644]
queue-6.6/mmc-omap-fix-broken-slot-switch-lookup.patch [new file with mode: 0644]
queue-6.6/mmc-omap-fix-deferred-probe.patch [new file with mode: 0644]
queue-6.6/mmc-omap-restore-original-power-up-down-steps.patch [new file with mode: 0644]
queue-6.6/net-dsa-mt7530-trap-link-local-frames-regardless-of-.patch [new file with mode: 0644]
queue-6.6/net-ena-fix-incorrect-descriptor-free-behavior.patch [new file with mode: 0644]
queue-6.6/net-ena-fix-potential-sign-extension-issue.patch [new file with mode: 0644]
queue-6.6/net-ena-move-xdp-code-to-its-new-files.patch [new file with mode: 0644]
queue-6.6/net-ena-pass-ena_adapter-instead-of-net_device-to-en.patch [new file with mode: 0644]
queue-6.6/net-ena-set-tx_info-xdpf-value-to-null.patch [new file with mode: 0644]
queue-6.6/net-ena-use-tx_ring-instead-of-xdp_ring-for-xdp-chan.patch [new file with mode: 0644]
queue-6.6/net-ena-wrong-missing-io-completions-check-order.patch [new file with mode: 0644]
queue-6.6/net-ks8851-handle-softirqs-at-the-end-of-irq-thread-.patch [new file with mode: 0644]
queue-6.6/net-ks8851-inline-ks8851_rx_skb.patch [new file with mode: 0644]
queue-6.6/net-mlx5-correctly-compare-pkt-reformat-ids.patch [new file with mode: 0644]
queue-6.6/net-mlx5-offset-comp-irq-index-in-name-by-one.patch [new file with mode: 0644]
queue-6.6/net-mlx5-properly-link-new-fs-rules-into-the-tree.patch [new file with mode: 0644]
queue-6.6/net-mlx5-register-devlink-first-under-devlink-lock.patch [new file with mode: 0644]
queue-6.6/net-mlx5-sf-stop-waiting-for-fw-as-teardown-was-call.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-do-not-produce-metadata-freelist-entries-i.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-fix-mlx5e_priv_init-cleanup-flow.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-htb-fix-inconsistencies-with-qos-sqs-numbe.patch [new file with mode: 0644]
queue-6.6/net-openvswitch-fix-unwanted-error-log-on-timeout-po.patch [new file with mode: 0644]
queue-6.6/net-sparx5-fix-wrong-config-being-used-when-reconfig.patch [new file with mode: 0644]
queue-6.6/netfilter-complete-validation-of-user-input.patch [new file with mode: 0644]
queue-6.6/nouveau-fix-function-cast-warning.patch [new file with mode: 0644]
queue-6.6/octeontx2-af-fix-nix-sq-mode-and-bp-config.patch [new file with mode: 0644]
queue-6.6/octeontx2-pf-fix-transmit-scheduler-resource-leak.patch [new file with mode: 0644]
queue-6.6/revert-drm-qxl-simplify-qxl_fence_wait.patch [new file with mode: 0644]
queue-6.6/revert-s390-ism-fix-receive-message-buffer-allocatio.patch [new file with mode: 0644]
queue-6.6/s390-ism-fix-receive-message-buffer-allocation.patch [new file with mode: 0644]
queue-6.6/scsi-hisi_sas-modify-the-deadline-for-ata_wait_after.patch [new file with mode: 0644]
queue-6.6/scsi-qla2xxx-fix-off-by-one-in-qla_edif_app_getstats.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/tracing-fix-ftrace_record_recursion_size-kconfig-ent.patch [new file with mode: 0644]
queue-6.6/tracing-hide-unused-ftrace_event_id_fops.patch [new file with mode: 0644]
queue-6.6/u64_stats-fix-u64_stats_init-for-lockdep-when-used-r.patch [new file with mode: 0644]
queue-6.6/xsk-validate-user-input-for-xdp_-umem-completion-_fi.patch [new file with mode: 0644]

diff --git a/queue-6.6/af_unix-clear-stale-u-oob_skb.patch b/queue-6.6/af_unix-clear-stale-u-oob_skb.patch
new file mode 100644 (file)
index 0000000..5ce6a20
--- /dev/null
@@ -0,0 +1,104 @@
+From 50395390af6511b65ef09052d794066cb1ac444b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 15:10:57 -0700
+Subject: af_unix: Clear stale u->oob_skb.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit b46f4eaa4f0ec38909fb0072eea3aeddb32f954e ]
+
+syzkaller started to report deadlock of unix_gc_lock after commit
+4090fa373f0e ("af_unix: Replace garbage collection algorithm."), but
+it just uncovers the bug that has been there since commit 314001f0bf92
+("af_unix: Add OOB support").
+
+The repro basically does the following.
+
+  from socket import *
+  from array import array
+
+  c1, c2 = socketpair(AF_UNIX, SOCK_STREAM)
+  c1.sendmsg([b'a'], [(SOL_SOCKET, SCM_RIGHTS, array("i", [c2.fileno()]))], MSG_OOB)
+  c2.recv(1)  # blocked as no normal data in recv queue
+
+  c2.close()  # done async and unblock recv()
+  c1.close()  # done async and trigger GC
+
+A socket sends its file descriptor to itself as OOB data and tries to
+receive normal data, but finally recv() fails due to async close().
+
+The problem here is wrong handling of OOB skb in manage_oob().  When
+recvmsg() is called without MSG_OOB, manage_oob() is called to check
+if the peeked skb is OOB skb.  In such a case, manage_oob() pops it
+out of the receive queue but does not clear unix_sock(sk)->oob_skb.
+This is wrong in terms of uAPI.
+
+Let's say we send "hello" with MSG_OOB, and "world" without MSG_OOB.
+The 'o' is handled as OOB data.  When recv() is called twice without
+MSG_OOB, the OOB data should be lost.
+
+  >>> from socket import *
+  >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM, 0)
+  >>> c1.send(b'hello', MSG_OOB)  # 'o' is OOB data
+  5
+  >>> c1.send(b'world')
+  5
+  >>> c2.recv(5)  # OOB data is not received
+  b'hell'
+  >>> c2.recv(5)  # OOB date is skipped
+  b'world'
+  >>> c2.recv(5, MSG_OOB)  # This should return an error
+  b'o'
+
+In the same situation, TCP actually returns -EINVAL for the last
+recv().
+
+Also, if we do not clear unix_sk(sk)->oob_skb, unix_poll() always set
+EPOLLPRI even though the data has passed through by previous recv().
+
+To avoid these issues, we must clear unix_sk(sk)->oob_skb when dequeuing
+it from recv queue.
+
+The reason why the old GC did not trigger the deadlock is because the
+old GC relied on the receive queue to detect the loop.
+
+When it is triggered, the socket with OOB data is marked as GC candidate
+because file refcount == inflight count (1).  However, after traversing
+all inflight sockets, the socket still has a positive inflight count (1),
+thus the socket is excluded from candidates.  Then, the old GC lose the
+chance to garbage-collect the socket.
+
+With the old GC, the repro continues to create true garbage that will
+never be freed nor detected by kmemleak as it's linked to the global
+inflight list.  That's why we couldn't even notice the issue.
+
+Fixes: 314001f0bf92 ("af_unix: Add OOB support")
+Reported-by: syzbot+7f7f201cc2668a8fd169@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=7f7f201cc2668a8fd169
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240405221057.2406-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 510b1d6758db7..ac3d4b540c100 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2589,7 +2589,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
+                               }
+                       } else if (!(flags & MSG_PEEK)) {
+                               skb_unlink(skb, &sk->sk_receive_queue);
+-                              consume_skb(skb);
++                              WRITE_ONCE(u->oob_skb, NULL);
++                              if (!WARN_ON_ONCE(skb_unref(skb)))
++                                      kfree_skb(skb);
+                               skb = skb_peek(&sk->sk_receive_queue);
+                       }
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.6/af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch b/queue-6.6/af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch
new file mode 100644 (file)
index 0000000..c81c0cd
--- /dev/null
@@ -0,0 +1,147 @@
+From 695a051e1b37cea28ccda99978ca41a17e3d1738 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Jan 2024 09:08:53 -0800
+Subject: af_unix: Do not use atomic ops for unix_sk(sk)->inflight.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 ]
+
+When touching unix_sk(sk)->inflight, we are always under
+spin_lock(&unix_gc_lock).
+
+Let's convert unix_sk(sk)->inflight to the normal unsigned long.
+
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/af_unix.h |  2 +-
+ net/unix/af_unix.c    |  4 ++--
+ net/unix/garbage.c    | 17 ++++++++---------
+ net/unix/scm.c        |  8 +++++---
+ 4 files changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/include/net/af_unix.h b/include/net/af_unix.h
+index afd40dce40f3d..d1b07ddbe677e 100644
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -55,7 +55,7 @@ struct unix_sock {
+       struct mutex            iolock, bindlock;
+       struct sock             *peer;
+       struct list_head        link;
+-      atomic_long_t           inflight;
++      unsigned long           inflight;
+       spinlock_t              lock;
+       unsigned long           gc_flags;
+ #define UNIX_GC_CANDIDATE     0
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index ac3d4b540c100..918724844231e 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -992,11 +992,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
+       sk->sk_write_space      = unix_write_space;
+       sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
+       sk->sk_destruct         = unix_sock_destructor;
+-      u         = unix_sk(sk);
++      u = unix_sk(sk);
++      u->inflight = 0;
+       u->path.dentry = NULL;
+       u->path.mnt = NULL;
+       spin_lock_init(&u->lock);
+-      atomic_long_set(&u->inflight, 0);
+       INIT_LIST_HEAD(&u->link);
+       mutex_init(&u->iolock); /* single task reading lock */
+       mutex_init(&u->bindlock); /* single task binding lock */
+diff --git a/net/unix/garbage.c b/net/unix/garbage.c
+index 027c86e804f8a..aea222796dfdc 100644
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
+ static void dec_inflight(struct unix_sock *usk)
+ {
+-      atomic_long_dec(&usk->inflight);
++      usk->inflight--;
+ }
+ static void inc_inflight(struct unix_sock *usk)
+ {
+-      atomic_long_inc(&usk->inflight);
++      usk->inflight++;
+ }
+ static void inc_inflight_move_tail(struct unix_sock *u)
+ {
+-      atomic_long_inc(&u->inflight);
++      u->inflight++;
++
+       /* If this still might be part of a cycle, move it to the end
+        * of the list, so that it's checked even if it was already
+        * passed over
+@@ -237,14 +238,12 @@ void unix_gc(void)
+        */
+       list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+               long total_refs;
+-              long inflight_refs;
+               total_refs = file_count(u->sk.sk_socket->file);
+-              inflight_refs = atomic_long_read(&u->inflight);
+-              BUG_ON(inflight_refs < 1);
+-              BUG_ON(total_refs < inflight_refs);
+-              if (total_refs == inflight_refs) {
++              BUG_ON(!u->inflight);
++              BUG_ON(total_refs < u->inflight);
++              if (total_refs == u->inflight) {
+                       list_move_tail(&u->link, &gc_candidates);
+                       __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
+                       __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+@@ -271,7 +270,7 @@ void unix_gc(void)
+               /* Move cursor to after the current position. */
+               list_move(&cursor, &u->link);
+-              if (atomic_long_read(&u->inflight) > 0) {
++              if (u->inflight) {
+                       list_move_tail(&u->link, &not_cycle_list);
+                       __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+                       scan_children(&u->sk, inc_inflight_move_tail, NULL);
+diff --git a/net/unix/scm.c b/net/unix/scm.c
+index 822ce0d0d7915..e92f2fad64105 100644
+--- a/net/unix/scm.c
++++ b/net/unix/scm.c
+@@ -53,12 +53,13 @@ void unix_inflight(struct user_struct *user, struct file *fp)
+       if (s) {
+               struct unix_sock *u = unix_sk(s);
+-              if (atomic_long_inc_return(&u->inflight) == 1) {
++              if (!u->inflight) {
+                       BUG_ON(!list_empty(&u->link));
+                       list_add_tail(&u->link, &gc_inflight_list);
+               } else {
+                       BUG_ON(list_empty(&u->link));
+               }
++              u->inflight++;
+               /* Paired with READ_ONCE() in wait_for_unix_gc() */
+               WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+       }
+@@ -75,10 +76,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
+       if (s) {
+               struct unix_sock *u = unix_sk(s);
+-              BUG_ON(!atomic_long_read(&u->inflight));
++              BUG_ON(!u->inflight);
+               BUG_ON(list_empty(&u->link));
+-              if (atomic_long_dec_and_test(&u->inflight))
++              u->inflight--;
++              if (!u->inflight)
+                       list_del_init(&u->link);
+               /* Paired with READ_ONCE() in wait_for_unix_gc() */
+               WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+-- 
+2.43.0
+
diff --git a/queue-6.6/af_unix-fix-garbage-collector-racing-against-connect.patch b/queue-6.6/af_unix-fix-garbage-collector-racing-against-connect.patch
new file mode 100644 (file)
index 0000000..198d99f
--- /dev/null
@@ -0,0 +1,122 @@
+From 436023a9b4776400882b0d48ebc06043a3105601 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:09:39 +0200
+Subject: af_unix: Fix garbage collector racing against connect()
+
+From: Michal Luczaj <mhal@rbox.co>
+
+[ Upstream commit 47d8ac011fe1c9251070e1bd64cb10b48193ec51 ]
+
+Garbage collector does not take into account the risk of embryo getting
+enqueued during the garbage collection. If such embryo has a peer that
+carries SCM_RIGHTS, two consecutive passes of scan_children() may see a
+different set of children. Leading to an incorrectly elevated inflight
+count, and then a dangling pointer within the gc_inflight_list.
+
+sockets are AF_UNIX/SOCK_STREAM
+S is an unconnected socket
+L is a listening in-flight socket bound to addr, not in fdtable
+V's fd will be passed via sendmsg(), gets inflight count bumped
+
+connect(S, addr)       sendmsg(S, [V]); close(V)       __unix_gc()
+----------------       -------------------------       -----------
+
+NS = unix_create1()
+skb1 = sock_wmalloc(NS)
+L = unix_find_other(addr)
+unix_state_lock(L)
+unix_peer(S) = NS
+                       // V count=1 inflight=0
+
+                       NS = unix_peer(S)
+                       skb2 = sock_alloc()
+                       skb_queue_tail(NS, skb2[V])
+
+                       // V became in-flight
+                       // V count=2 inflight=1
+
+                       close(V)
+
+                       // V count=1 inflight=1
+                       // GC candidate condition met
+
+                                               for u in gc_inflight_list:
+                                                 if (total_refs == inflight_refs)
+                                                   add u to gc_candidates
+
+                                               // gc_candidates={L, V}
+
+                                               for u in gc_candidates:
+                                                 scan_children(u, dec_inflight)
+
+                                               // embryo (skb1) was not
+                                               // reachable from L yet, so V's
+                                               // inflight remains unchanged
+__skb_queue_tail(L, skb1)
+unix_state_unlock(L)
+                                               for u in gc_candidates:
+                                                 if (u.inflight)
+                                                   scan_children(u, inc_inflight_move_tail)
+
+                                               // V count=1 inflight=2 (!)
+
+If there is a GC-candidate listening socket, lock/unlock its state. This
+makes GC wait until the end of any ongoing connect() to that socket. After
+flipping the lock, a possibly SCM-laden embryo is already enqueued. And if
+there is another embryo coming, it can not possibly carry SCM_RIGHTS. At
+this point, unix_inflight() can not happen because unix_gc_lock is already
+taken. Inflight graph remains unaffected.
+
+Fixes: 1fd05ba5a2f2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/garbage.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/net/unix/garbage.c b/net/unix/garbage.c
+index aea222796dfdc..8734c0c1fc197 100644
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -235,11 +235,22 @@ void unix_gc(void)
+        * receive queues.  Other, non candidate sockets _can_ be
+        * added to queue, so we must make sure only to touch
+        * candidates.
++       *
++       * Embryos, though never candidates themselves, affect which
++       * candidates are reachable by the garbage collector.  Before
++       * being added to a listener's queue, an embryo may already
++       * receive data carrying SCM_RIGHTS, potentially making the
++       * passed socket a candidate that is not yet reachable by the
++       * collector.  It becomes reachable once the embryo is
++       * enqueued.  Therefore, we must ensure that no SCM-laden
++       * embryo appears in a (candidate) listener's queue between
++       * consecutive scan_children() calls.
+        */
+       list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
++              struct sock *sk = &u->sk;
+               long total_refs;
+-              total_refs = file_count(u->sk.sk_socket->file);
++              total_refs = file_count(sk->sk_socket->file);
+               BUG_ON(!u->inflight);
+               BUG_ON(total_refs < u->inflight);
+@@ -247,6 +258,11 @@ void unix_gc(void)
+                       list_move_tail(&u->link, &gc_candidates);
+                       __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
+                       __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
++
++                      if (sk->sk_state == TCP_LISTEN) {
++                              unix_state_lock(sk);
++                              unix_state_unlock(sk);
++                      }
+               }
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/arm-omap2-fix-bogus-mmc-gpio-labels-on-nokia-n8x0.patch b/queue-6.6/arm-omap2-fix-bogus-mmc-gpio-labels-on-nokia-n8x0.patch
new file mode 100644 (file)
index 0000000..2ec74d1
--- /dev/null
@@ -0,0 +1,53 @@
+From 21b19d425d9e7cab6f2d407c2cd3df503c889203 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Feb 2024 20:14:35 +0200
+Subject: ARM: OMAP2+: fix bogus MMC GPIO labels on Nokia N8x0
+
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+
+[ Upstream commit 95f37eb52e18879a1b16e51b972d992b39e50a81 ]
+
+The GPIO bank width is 32 on OMAP2, so all labels are incorrect.
+
+Fixes: e519f0bb64ef ("ARM/mmc: Convert old mmci-omap to GPIO descriptors")
+Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
+Message-ID: <20240223181439.1099750-2-aaro.koskinen@iki.fi>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mach-omap2/board-n8x0.c | 9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
+index 8e3b5068d4ab0..5ac27ed650e0e 100644
+--- a/arch/arm/mach-omap2/board-n8x0.c
++++ b/arch/arm/mach-omap2/board-n8x0.c
+@@ -144,8 +144,7 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+       .dev_id = "mmci-omap.0",
+       .table = {
+               /* Slot switch, GPIO 96 */
+-              GPIO_LOOKUP("gpio-80-111", 16,
+-                          "switch", GPIO_ACTIVE_HIGH),
++              GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
+               { }
+       },
+ };
+@@ -154,11 +153,9 @@ static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
+       .dev_id = "mmci-omap.0",
+       .table = {
+               /* Slot index 1, VSD power, GPIO 23 */
+-              GPIO_LOOKUP_IDX("gpio-16-31", 7,
+-                              "vsd", 1, GPIO_ACTIVE_HIGH),
++              GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
+               /* Slot index 1, VIO power, GPIO 9 */
+-              GPIO_LOOKUP_IDX("gpio-0-15", 9,
+-                              "vio", 1, GPIO_ACTIVE_HIGH),
++              GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH),
+               { }
+       },
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.6/arm-omap2-fix-n810-mmc-gpiod-table.patch b/queue-6.6/arm-omap2-fix-n810-mmc-gpiod-table.patch
new file mode 100644 (file)
index 0000000..4efe8d0
--- /dev/null
@@ -0,0 +1,69 @@
+From 29cbf8985e79445f9f1a45e7c3733c89ab1a79ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Feb 2024 20:14:36 +0200
+Subject: ARM: OMAP2+: fix N810 MMC gpiod table
+
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+
+[ Upstream commit 480d44d0820dd5ae043dc97c0b46dabbe53cb1cf ]
+
+Trying to append a second table for the same dev_id doesn't seem to work.
+The second table is just silently ignored. As a result eMMC GPIOs are not
+present.
+
+Fix by using separate tables for N800 and N810.
+
+Fixes: e519f0bb64ef ("ARM/mmc: Convert old mmci-omap to GPIO descriptors")
+Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
+Message-ID: <20240223181439.1099750-3-aaro.koskinen@iki.fi>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mach-omap2/board-n8x0.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
+index 5ac27ed650e0e..c5b2e1e79698d 100644
+--- a/arch/arm/mach-omap2/board-n8x0.c
++++ b/arch/arm/mach-omap2/board-n8x0.c
+@@ -140,7 +140,7 @@ static int slot1_cover_open;
+ static int slot2_cover_open;
+ static struct device *mmc_device;
+-static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
++static struct gpiod_lookup_table nokia800_mmc_gpio_table = {
+       .dev_id = "mmci-omap.0",
+       .table = {
+               /* Slot switch, GPIO 96 */
+@@ -152,6 +152,8 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+ static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
+       .dev_id = "mmci-omap.0",
+       .table = {
++              /* Slot switch, GPIO 96 */
++              GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
+               /* Slot index 1, VSD power, GPIO 23 */
+               GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
+               /* Slot index 1, VIO power, GPIO 9 */
+@@ -412,8 +414,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC];
+ static void __init n8x0_mmc_init(void)
+ {
+-      gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
+-
+       if (board_is_n810()) {
+               mmc1_data.slots[0].name = "external";
+@@ -426,6 +426,8 @@ static void __init n8x0_mmc_init(void)
+               mmc1_data.slots[1].name = "internal";
+               mmc1_data.slots[1].ban_openended = 1;
+               gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
++      } else {
++              gpiod_add_lookup_table(&nokia800_mmc_gpio_table);
+       }
+       mmc1_data.nr_slots = 2;
+-- 
+2.43.0
+
diff --git a/queue-6.6/arm-omap2-fix-usb-regression-on-nokia-n8x0.patch b/queue-6.6/arm-omap2-fix-usb-regression-on-nokia-n8x0.patch
new file mode 100644 (file)
index 0000000..5245e8d
--- /dev/null
@@ -0,0 +1,41 @@
+From 00e3d3492b59f6d266f12efa1c23d48eefb3d721 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Feb 2024 20:16:56 +0200
+Subject: ARM: OMAP2+: fix USB regression on Nokia N8x0
+
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+
+[ Upstream commit 4421405e3634a3189b541cf1e34598e44260720d ]
+
+GPIO chip labels are wrong for OMAP2, so the USB does not work. Fix.
+
+Fixes: 8e0285ab95a9 ("ARM/musb: omap2: Remove global GPIO numbers from TUSB6010")
+Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Message-ID: <20240223181656.1099845-1-aaro.koskinen@iki.fi>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mach-omap2/board-n8x0.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
+index c5b2e1e79698d..b45a3879eb344 100644
+--- a/arch/arm/mach-omap2/board-n8x0.c
++++ b/arch/arm/mach-omap2/board-n8x0.c
+@@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = {
+ static struct gpiod_lookup_table tusb_gpio_table = {
+       .dev_id = "musb-tusb",
+       .table = {
+-              GPIO_LOOKUP("gpio-0-15", 0, "enable",
+-                          GPIO_ACTIVE_HIGH),
+-              GPIO_LOOKUP("gpio-48-63", 10, "int",
+-                          GPIO_ACTIVE_HIGH),
++              GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH),
++              GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH),
+               { }
+       },
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.6/arm64-dts-imx8-ss-conn-fix-usdhc-wrong-lpcg-clock-or.patch b/queue-6.6/arm64-dts-imx8-ss-conn-fix-usdhc-wrong-lpcg-clock-or.patch
new file mode 100644 (file)
index 0000000..f46b272
--- /dev/null
@@ -0,0 +1,95 @@
+From 057e2150f1422ad41aad1b1b9363eb1427514f69 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Mar 2024 12:47:05 -0400
+Subject: arm64: dts: imx8-ss-conn: fix usdhc wrong lpcg clock order
+
+From: Frank Li <Frank.Li@nxp.com>
+
+[ Upstream commit c6ddd6e7b166532a0816825442ff60f70aed9647 ]
+
+The actual clock show wrong frequency:
+
+   echo on >/sys/devices/platform/bus\@5b000000/5b010000.mmc/power/control
+   cat /sys/kernel/debug/mmc0/ios
+
+   clock:          200000000 Hz
+   actual clock:   166000000 Hz
+                   ^^^^^^^^^
+   .....
+
+According to
+
+sdhc0_lpcg: clock-controller@5b200000 {
+                compatible = "fsl,imx8qxp-lpcg";
+                reg = <0x5b200000 0x10000>;
+                #clock-cells = <1>;
+                clocks = <&clk IMX_SC_R_SDHC_0 IMX_SC_PM_CLK_PER>,
+                         <&conn_ipg_clk>, <&conn_axi_clk>;
+                clock-indices = <IMX_LPCG_CLK_0>, <IMX_LPCG_CLK_4>,
+                                <IMX_LPCG_CLK_5>;
+                clock-output-names = "sdhc0_lpcg_per_clk",
+                                     "sdhc0_lpcg_ipg_clk",
+                                     "sdhc0_lpcg_ahb_clk";
+                power-domains = <&pd IMX_SC_R_SDHC_0>;
+        }
+
+"per_clk" should be IMX_LPCG_CLK_0 instead of IMX_LPCG_CLK_5.
+
+After correct clocks order:
+
+   echo on >/sys/devices/platform/bus\@5b000000/5b010000.mmc/power/control
+   cat /sys/kernel/debug/mmc0/ios
+
+   clock:          200000000 Hz
+   actual clock:   198000000 Hz
+                   ^^^^^^^^
+   ...
+
+Fixes: 16c4ea7501b1 ("arm64: dts: imx8: switch to new lpcg clock binding")
+Signed-off-by: Frank Li <Frank.Li@nxp.com>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+index fc1a5d34382b7..5f151ae78586e 100644
+--- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+@@ -67,8 +67,8 @@ usdhc1: mmc@5b010000 {
+               interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
+               reg = <0x5b010000 0x10000>;
+               clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
+-                       <&sdhc0_lpcg IMX_LPCG_CLK_0>,
+-                       <&sdhc0_lpcg IMX_LPCG_CLK_5>;
++                       <&sdhc0_lpcg IMX_LPCG_CLK_5>,
++                       <&sdhc0_lpcg IMX_LPCG_CLK_0>;
+               clock-names = "ipg", "ahb", "per";
+               power-domains = <&pd IMX_SC_R_SDHC_0>;
+               status = "disabled";
+@@ -78,8 +78,8 @@ usdhc2: mmc@5b020000 {
+               interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
+               reg = <0x5b020000 0x10000>;
+               clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
+-                       <&sdhc1_lpcg IMX_LPCG_CLK_0>,
+-                       <&sdhc1_lpcg IMX_LPCG_CLK_5>;
++                       <&sdhc1_lpcg IMX_LPCG_CLK_5>,
++                       <&sdhc1_lpcg IMX_LPCG_CLK_0>;
+               clock-names = "ipg", "ahb", "per";
+               power-domains = <&pd IMX_SC_R_SDHC_1>;
+               fsl,tuning-start-tap = <20>;
+@@ -91,8 +91,8 @@ usdhc3: mmc@5b030000 {
+               interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
+               reg = <0x5b030000 0x10000>;
+               clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
+-                       <&sdhc2_lpcg IMX_LPCG_CLK_0>,
+-                       <&sdhc2_lpcg IMX_LPCG_CLK_5>;
++                       <&sdhc2_lpcg IMX_LPCG_CLK_5>,
++                       <&sdhc2_lpcg IMX_LPCG_CLK_0>;
+               clock-names = "ipg", "ahb", "per";
+               power-domains = <&pd IMX_SC_R_SDHC_2>;
+               status = "disabled";
+-- 
+2.43.0
+
diff --git a/queue-6.6/block-fix-q-blkg_list-corruption-during-disk-rebind.patch b/queue-6.6/block-fix-q-blkg_list-corruption-during-disk-rebind.patch
new file mode 100644 (file)
index 0000000..2e50593
--- /dev/null
@@ -0,0 +1,100 @@
+From 047e2c364589b8069fb4a2a370957bc3277d0e15 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 7 Apr 2024 20:59:10 +0800
+Subject: block: fix q->blkg_list corruption during disk rebind
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit 8b8ace080319a866f5dfe9da8e665ae51d971c54 ]
+
+Multiple gendisk instances can allocated/added for single request queue
+in case of disk rebind. blkg may still stay in q->blkg_list when calling
+blkcg_init_disk() for rebind, then q->blkg_list becomes corrupted.
+
+Fix the list corruption issue by:
+
+- add blkg_init_queue() to initialize q->blkg_list & q->blkcg_mutex only
+- move calling blkg_init_queue() into blk_alloc_queue()
+
+The list corruption should be started since commit f1c006f1c685 ("blk-cgroup:
+synchronize pd_free_fn() from blkg_free_workfn() and blkcg_deactivate_policy()")
+which delays removing blkg from q->blkg_list into blkg_free_workfn().
+
+Fixes: f1c006f1c685 ("blk-cgroup: synchronize pd_free_fn() from blkg_free_workfn() and blkcg_deactivate_policy()")
+Fixes: 1059699f87eb ("block: move blkcg initialization/destroy into disk allocation/release handler")
+Cc: Yu Kuai <yukuai3@huawei.com>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Yu Kuai <yukuai3@huawei.com>
+Link: https://lore.kernel.org/r/20240407125910.4053377-1-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-cgroup.c | 9 ++++++---
+ block/blk-cgroup.h | 2 ++
+ block/blk-core.c   | 2 ++
+ 3 files changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
+index 4b48c2c440981..4c49a70b46bd1 100644
+--- a/block/blk-cgroup.c
++++ b/block/blk-cgroup.c
+@@ -1409,6 +1409,12 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
+       return 0;
+ }
++void blkg_init_queue(struct request_queue *q)
++{
++      INIT_LIST_HEAD(&q->blkg_list);
++      mutex_init(&q->blkcg_mutex);
++}
++
+ int blkcg_init_disk(struct gendisk *disk)
+ {
+       struct request_queue *q = disk->queue;
+@@ -1416,9 +1422,6 @@ int blkcg_init_disk(struct gendisk *disk)
+       bool preloaded;
+       int ret;
+-      INIT_LIST_HEAD(&q->blkg_list);
+-      mutex_init(&q->blkcg_mutex);
+-
+       new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
+       if (!new_blkg)
+               return -ENOMEM;
+diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
+index b927a4a0ad030..5b0bdc268ade9 100644
+--- a/block/blk-cgroup.h
++++ b/block/blk-cgroup.h
+@@ -188,6 +188,7 @@ struct blkcg_policy {
+ extern struct blkcg blkcg_root;
+ extern bool blkcg_debug_stats;
++void blkg_init_queue(struct request_queue *q);
+ int blkcg_init_disk(struct gendisk *disk);
+ void blkcg_exit_disk(struct gendisk *disk);
+@@ -481,6 +482,7 @@ struct blkcg {
+ };
+ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
++static inline void blkg_init_queue(struct request_queue *q) { }
+ static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
+ static inline void blkcg_exit_disk(struct gendisk *disk) { }
+ static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
+diff --git a/block/blk-core.c b/block/blk-core.c
+index 2eca76ccf4ee0..a3726d8cf8738 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -430,6 +430,8 @@ struct request_queue *blk_alloc_queue(int node_id)
+       init_waitqueue_head(&q->mq_freeze_wq);
+       mutex_init(&q->mq_freeze_lock);
++      blkg_init_queue(q);
++
+       /*
+        * Init percpu_ref in atomic mode so that it's faster to shutdown.
+        * See blk_register_queue() for details.
+-- 
+2.43.0
+
diff --git a/queue-6.6/bluetooth-hci_sync-fix-using-the-same-interval-and-w.patch b/queue-6.6/bluetooth-hci_sync-fix-using-the-same-interval-and-w.patch
new file mode 100644 (file)
index 0000000..e425730
--- /dev/null
@@ -0,0 +1,48 @@
+From 2b935e36febfb97f1a8c3dec1d880186ecf4ba14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Mar 2024 15:58:10 -0400
+Subject: Bluetooth: hci_sync: Fix using the same interval and window for Coded
+ PHY
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 53cb4197e63ab2363aa28c3029061e4d516e7626 ]
+
+Coded PHY recommended intervals are 3 time bigger than the 1M PHY so
+this aligns with that by multiplying by 3 the values given to 1M PHY
+since the code already used recommended values for that.
+
+Fixes: 288c90224eec ("Bluetooth: Enable all supported LE PHY by default")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_sync.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
+index f462e3fb5af05..1bc58b324b73e 100644
+--- a/net/bluetooth/hci_sync.c
++++ b/net/bluetooth/hci_sync.c
+@@ -2732,8 +2732,8 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
+                               if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
+                                       cp->scanning_phys |= LE_SCAN_PHY_CODED;
+                                       hci_le_scan_phy_params(phy, type,
+-                                                             interval,
+-                                                             window);
++                                                             interval * 3,
++                                                             window * 3);
+                                       num_phy++;
+                                       phy++;
+                               }
+@@ -2753,7 +2753,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
+       if (scan_coded(hdev)) {
+               cp->scanning_phys |= LE_SCAN_PHY_CODED;
+-              hci_le_scan_phy_params(phy, type, interval, window);
++              hci_le_scan_phy_params(phy, type, interval * 3, window * 3);
+               num_phy++;
+               phy++;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/bluetooth-hci_sync-use-qos-to-determine-which-phy-to.patch b/queue-6.6/bluetooth-hci_sync-use-qos-to-determine-which-phy-to.patch
new file mode 100644 (file)
index 0000000..5339f7b
--- /dev/null
@@ -0,0 +1,125 @@
+From 0cf9f161469e445f5e79ca12e4d429991546d4e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Feb 2024 09:38:10 -0500
+Subject: Bluetooth: hci_sync: Use QoS to determine which PHY to scan
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 22cbf4f84c00da64196eb15034feee868e63eef0 ]
+
+This used the hci_conn QoS to determine which PHY to scan when creating
+a PA Sync.
+
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Stable-dep-of: 53cb4197e63a ("Bluetooth: hci_sync: Fix using the same interval and window for Coded PHY")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_sync.c | 66 +++++++++++++++++++++++++++++++++-------
+ 1 file changed, 55 insertions(+), 11 deletions(-)
+
+diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
+index d6c0633bfe5bf..f462e3fb5af05 100644
+--- a/net/bluetooth/hci_sync.c
++++ b/net/bluetooth/hci_sync.c
+@@ -2679,6 +2679,14 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
+       return filter_policy;
+ }
++static void hci_le_scan_phy_params(struct hci_cp_le_scan_phy_params *cp,
++                                 u8 type, u16 interval, u16 window)
++{
++      cp->type = type;
++      cp->interval = cpu_to_le16(interval);
++      cp->window = cpu_to_le16(window);
++}
++
+ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
+                                         u16 interval, u16 window,
+                                         u8 own_addr_type, u8 filter_policy)
+@@ -2686,7 +2694,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
+       struct hci_cp_le_set_ext_scan_params *cp;
+       struct hci_cp_le_scan_phy_params *phy;
+       u8 data[sizeof(*cp) + sizeof(*phy) * 2];
+-      u8 num_phy = 0;
++      u8 num_phy = 0x00;
+       cp = (void *)data;
+       phy = (void *)cp->data;
+@@ -2696,28 +2704,64 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
+       cp->own_addr_type = own_addr_type;
+       cp->filter_policy = filter_policy;
++      /* Check if PA Sync is in progress then select the PHY based on the
++       * hci_conn.iso_qos.
++       */
++      if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
++              struct hci_cp_le_add_to_accept_list *sent;
++
++              sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST);
++              if (sent) {
++                      struct hci_conn *conn;
++
++                      conn = hci_conn_hash_lookup_ba(hdev, ISO_LINK,
++                                                     &sent->bdaddr);
++                      if (conn) {
++                              struct bt_iso_qos *qos = &conn->iso_qos;
++
++                              if (qos->bcast.in.phy & BT_ISO_PHY_1M ||
++                                  qos->bcast.in.phy & BT_ISO_PHY_2M) {
++                                      cp->scanning_phys |= LE_SCAN_PHY_1M;
++                                      hci_le_scan_phy_params(phy, type,
++                                                             interval,
++                                                             window);
++                                      num_phy++;
++                                      phy++;
++                              }
++
++                              if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
++                                      cp->scanning_phys |= LE_SCAN_PHY_CODED;
++                                      hci_le_scan_phy_params(phy, type,
++                                                             interval,
++                                                             window);
++                                      num_phy++;
++                                      phy++;
++                              }
++
++                              if (num_phy)
++                                      goto done;
++                      }
++              }
++      }
++
+       if (scan_1m(hdev) || scan_2m(hdev)) {
+               cp->scanning_phys |= LE_SCAN_PHY_1M;
+-
+-              phy->type = type;
+-              phy->interval = cpu_to_le16(interval);
+-              phy->window = cpu_to_le16(window);
+-
++              hci_le_scan_phy_params(phy, type, interval, window);
+               num_phy++;
+               phy++;
+       }
+       if (scan_coded(hdev)) {
+               cp->scanning_phys |= LE_SCAN_PHY_CODED;
+-
+-              phy->type = type;
+-              phy->interval = cpu_to_le16(interval);
+-              phy->window = cpu_to_le16(window);
+-
++              hci_le_scan_phy_params(phy, type, interval, window);
+               num_phy++;
+               phy++;
+       }
++done:
++      if (!num_phy)
++              return -EINVAL;
++
+       return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_PARAMS,
+                                    sizeof(*cp) + sizeof(*phy) * num_phy,
+                                    data, HCI_CMD_TIMEOUT);
+-- 
+2.43.0
+
diff --git a/queue-6.6/bluetooth-iso-align-broadcast-sync_timeout-with-conn.patch b/queue-6.6/bluetooth-iso-align-broadcast-sync_timeout-with-conn.patch
new file mode 100644 (file)
index 0000000..20ee385
--- /dev/null
@@ -0,0 +1,53 @@
+From c3bf7da2dcf9497c7798f05a4f4f3adec0cf2ded Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Mar 2024 11:58:17 -0500
+Subject: Bluetooth: ISO: Align broadcast sync_timeout with connection timeout
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 42ed95de82c01184a88945d3ca274be6a7ea607d ]
+
+This aligns broadcast sync_timeout with existing connection timeouts
+which are 20 seconds long.
+
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Stable-dep-of: b37cab587aa3 ("Bluetooth: ISO: Don't reject BT_ISO_QOS if parameters are unset")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/bluetooth/bluetooth.h | 2 ++
+ net/bluetooth/iso.c               | 4 ++--
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
+index aa90adc3b2a4d..28e32c9a6cc99 100644
+--- a/include/net/bluetooth/bluetooth.h
++++ b/include/net/bluetooth/bluetooth.h
+@@ -164,6 +164,8 @@ struct bt_voice {
+ #define BT_ISO_QOS_BIG_UNSET  0xff
+ #define BT_ISO_QOS_BIS_UNSET  0xff
++#define BT_ISO_SYNC_TIMEOUT   0x07d0 /* 20 secs */
++
+ struct bt_iso_io_qos {
+       __u32 interval;
+       __u16 latency;
+diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
+index 0eeec64801390..698d0b67c7ed4 100644
+--- a/net/bluetooth/iso.c
++++ b/net/bluetooth/iso.c
+@@ -764,10 +764,10 @@ static struct bt_iso_qos default_qos = {
+               .bcode                  = {0x00},
+               .options                = 0x00,
+               .skip                   = 0x0000,
+-              .sync_timeout           = 0x4000,
++              .sync_timeout           = BT_ISO_SYNC_TIMEOUT,
+               .sync_cte_type          = 0x00,
+               .mse                    = 0x00,
+-              .timeout                = 0x4000,
++              .timeout                = BT_ISO_SYNC_TIMEOUT,
+       },
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.6/bluetooth-iso-don-t-reject-bt_iso_qos-if-parameters-.patch b/queue-6.6/bluetooth-iso-don-t-reject-bt_iso_qos-if-parameters-.patch
new file mode 100644 (file)
index 0000000..59f9611
--- /dev/null
@@ -0,0 +1,57 @@
+From 8cfe88f24d4faeaf80baccb5d2d680dfb2d62e9f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Mar 2024 15:43:18 -0400
+Subject: Bluetooth: ISO: Don't reject BT_ISO_QOS if parameters are unset
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit b37cab587aa3c9ab29c6b10aa55627dad713011f ]
+
+Consider certain values (0x00) as unset and load proper default if
+an application has not set them properly.
+
+Fixes: 0fe8c8d07134 ("Bluetooth: Split bt_iso_qos into dedicated structures")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/iso.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
+index 698d0b67c7ed4..2f63ea9e62ecd 100644
+--- a/net/bluetooth/iso.c
++++ b/net/bluetooth/iso.c
+@@ -1301,8 +1301,8 @@ static bool check_ucast_qos(struct bt_iso_qos *qos)
+ static bool check_bcast_qos(struct bt_iso_qos *qos)
+ {
+-      if (qos->bcast.sync_factor == 0x00)
+-              return false;
++      if (!qos->bcast.sync_factor)
++              qos->bcast.sync_factor = 0x01;
+       if (qos->bcast.packing > 0x01)
+               return false;
+@@ -1325,6 +1325,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
+       if (qos->bcast.skip > 0x01f3)
+               return false;
++      if (!qos->bcast.sync_timeout)
++              qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
++
+       if (qos->bcast.sync_timeout < 0x000a || qos->bcast.sync_timeout > 0x4000)
+               return false;
+@@ -1334,6 +1337,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
+       if (qos->bcast.mse > 0x1f)
+               return false;
++      if (!qos->bcast.timeout)
++              qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
++
+       if (qos->bcast.timeout < 0x000a || qos->bcast.timeout > 0x4000)
+               return false;
+-- 
+2.43.0
+
diff --git a/queue-6.6/bluetooth-l2cap-don-t-double-set-the-hci_conn_mgmt_c.patch b/queue-6.6/bluetooth-l2cap-don-t-double-set-the-hci_conn_mgmt_c.patch
new file mode 100644 (file)
index 0000000..a62e4ca
--- /dev/null
@@ -0,0 +1,38 @@
+From fad6bb0fa41426ec7f532c9e33d28002b76897ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Apr 2024 18:50:23 +0800
+Subject: Bluetooth: l2cap: Don't double set the HCI_CONN_MGMT_CONNECTED bit
+
+From: Archie Pusaka <apusaka@chromium.org>
+
+[ Upstream commit 600b0bbe73d3a9a264694da0e4c2c0800309141e ]
+
+The bit is set and tested inside mgmt_device_connected(), therefore we
+must not set it just outside the function.
+
+Fixes: eeda1bf97bb5 ("Bluetooth: hci_event: Fix not indicating new connection for BIG Sync")
+Signed-off-by: Archie Pusaka <apusaka@chromium.org>
+Reviewed-by: Manish Mandlik <mmandlik@chromium.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/l2cap_core.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index ab5a9d42fae71..706d2478ddb33 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -4054,8 +4054,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
+               return -EPROTO;
+       hci_dev_lock(hdev);
+-      if (hci_dev_test_flag(hdev, HCI_MGMT) &&
+-          !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
++      if (hci_dev_test_flag(hdev, HCI_MGMT))
+               mgmt_device_connected(hdev, hcon, NULL, 0);
+       hci_dev_unlock(hdev);
+-- 
+2.43.0
+
diff --git a/queue-6.6/bluetooth-sco-fix-not-validating-setsockopt-user-inp.patch b/queue-6.6/bluetooth-sco-fix-not-validating-setsockopt-user-inp.patch
new file mode 100644 (file)
index 0000000..7c422ce
--- /dev/null
@@ -0,0 +1,122 @@
+From 6213cc59a1257817a2706aeb2915cd2488e9c2e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 15:41:52 -0400
+Subject: Bluetooth: SCO: Fix not validating setsockopt user input
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 51eda36d33e43201e7a4fd35232e069b2c850b01 ]
+
+syzbot reported sco_sock_setsockopt() is copying data without
+checking user input length.
+
+BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset
+include/linux/sockptr.h:49 [inline]
+BUG: KASAN: slab-out-of-bounds in copy_from_sockptr
+include/linux/sockptr.h:55 [inline]
+BUG: KASAN: slab-out-of-bounds in sco_sock_setsockopt+0xc0b/0xf90
+net/bluetooth/sco.c:893
+Read of size 4 at addr ffff88805f7b15a3 by task syz-executor.5/12578
+
+Fixes: ad10b1a48754 ("Bluetooth: Add Bluetooth socket voice option")
+Fixes: b96e9c671b05 ("Bluetooth: Add BT_DEFER_SETUP option to sco socket")
+Fixes: 00398e1d5183 ("Bluetooth: Add support for BT_PKT_STATUS CMSG data for SCO connections")
+Fixes: f6873401a608 ("Bluetooth: Allow setting of codec for HFP offload use case")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/bluetooth/bluetooth.h |  9 +++++++++
+ net/bluetooth/sco.c               | 23 ++++++++++-------------
+ 2 files changed, 19 insertions(+), 13 deletions(-)
+
+diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
+index 28e32c9a6cc99..e4a6831133f81 100644
+--- a/include/net/bluetooth/bluetooth.h
++++ b/include/net/bluetooth/bluetooth.h
+@@ -585,6 +585,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
+       return skb;
+ }
++static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
++                                     sockptr_t src, size_t src_size)
++{
++      if (dst_size > src_size)
++              return -EINVAL;
++
++      return copy_from_sockptr(dst, src, dst_size);
++}
++
+ int bt_to_errno(u16 code);
+ __u8 bt_status(int err);
+diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
+index c736186aba26b..8e4f39b8601cb 100644
+--- a/net/bluetooth/sco.c
++++ b/net/bluetooth/sco.c
+@@ -823,7 +823,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
+                              sockptr_t optval, unsigned int optlen)
+ {
+       struct sock *sk = sock->sk;
+-      int len, err = 0;
++      int err = 0;
+       struct bt_voice voice;
+       u32 opt;
+       struct bt_codecs *codecs;
+@@ -842,10 +842,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
+                       break;
+               }
+-              if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+-                      err = -EFAULT;
++              err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
++              if (err)
+                       break;
+-              }
+               if (opt)
+                       set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
+@@ -862,11 +861,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
+               voice.setting = sco_pi(sk)->setting;
+-              len = min_t(unsigned int, sizeof(voice), optlen);
+-              if (copy_from_sockptr(&voice, optval, len)) {
+-                      err = -EFAULT;
++              err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
++                                         optlen);
++              if (err)
+                       break;
+-              }
+               /* Explicitly check for these values */
+               if (voice.setting != BT_VOICE_TRANSPARENT &&
+@@ -889,10 +887,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
+               break;
+       case BT_PKT_STATUS:
+-              if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+-                      err = -EFAULT;
++              err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
++              if (err)
+                       break;
+-              }
+               if (opt)
+                       set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
+@@ -933,9 +930,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
+                       break;
+               }
+-              if (copy_from_sockptr(buffer, optval, optlen)) {
++              err = bt_copy_from_sockptr(buffer, optlen, optval, optlen);
++              if (err) {
+                       hci_dev_put(hdev);
+-                      err = -EFAULT;
+                       break;
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.6/bnxt_en-fix-error-recovery-for-roce-ulp-client.patch b/queue-6.6/bnxt_en-fix-error-recovery-for-roce-ulp-client.patch
new file mode 100644 (file)
index 0000000..19d81cc
--- /dev/null
@@ -0,0 +1,41 @@
+From 2bffb6e16f938bd2eb89a277efd573534dd0d170 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 16:55:12 -0700
+Subject: bnxt_en: Fix error recovery for RoCE ulp client
+
+From: Vikas Gupta <vikas.gupta@broadcom.com>
+
+[ Upstream commit b5ea7d33ba2a42b95b4298d08d2af9cdeeaf0090 ]
+
+Since runtime MSIXs vector allocation/free has been removed,
+the L2 driver needs to repopulate the MSIX entries for the
+ulp client as the irq table may change during the recovery
+process.
+
+Fixes: 303432211324 ("bnxt_en: Remove runtime interrupt vector allocation")
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+index 7188ea81401de..7689086371e03 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+@@ -213,6 +213,9 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
+       if (err)
+               return;
++      if (edev->ulp_tbl->msix_requested)
++              bnxt_fill_msix_vecs(bp, edev->msix_entries);
++
+       if (aux_priv) {
+               struct auxiliary_device *adev;
+-- 
+2.43.0
+
diff --git a/queue-6.6/bnxt_en-fix-possible-memory-leak-in-bnxt_rdma_aux_de.patch b/queue-6.6/bnxt_en-fix-possible-memory-leak-in-bnxt_rdma_aux_de.patch
new file mode 100644 (file)
index 0000000..0ff74d5
--- /dev/null
@@ -0,0 +1,45 @@
+From a01c9a53f75ead091cef6f80a8c0f1490ec00d9a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 16:55:11 -0700
+Subject: bnxt_en: Fix possible memory leak in bnxt_rdma_aux_device_init()
+
+From: Vikas Gupta <vikas.gupta@broadcom.com>
+
+[ Upstream commit 7ac10c7d728d75bc9daaa8fade3c7a3273b9a9ff ]
+
+If ulp = kzalloc() fails, the allocated edev will leak because it is
+not properly assigned and the cleanup path will not be able to free it.
+Fix it by assigning it properly immediately after allocation.
+
+Fixes: 303432211324 ("bnxt_en: Remove runtime interrupt vector allocation")
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+index 6ba2b93986333..7188ea81401de 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+@@ -394,12 +394,13 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
+       if (!edev)
+               goto aux_dev_uninit;
++      aux_priv->edev = edev;
++
+       ulp = kzalloc(sizeof(*ulp), GFP_KERNEL);
+       if (!ulp)
+               goto aux_dev_uninit;
+       edev->ulp_tbl = ulp;
+-      aux_priv->edev = edev;
+       bp->edev = edev;
+       bnxt_set_edev_info(edev, bp);
+-- 
+2.43.0
+
diff --git a/queue-6.6/bnxt_en-reset-ptp-tx_avail-after-possible-firmware-r.patch b/queue-6.6/bnxt_en-reset-ptp-tx_avail-after-possible-firmware-r.patch
new file mode 100644 (file)
index 0000000..972cc4a
--- /dev/null
@@ -0,0 +1,42 @@
+From 6f6e433a1015a60f70aa740825aa3bea3af24b97 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 16:55:13 -0700
+Subject: bnxt_en: Reset PTP tx_avail after possible firmware reset
+
+From: Pavan Chebbi <pavan.chebbi@broadcom.com>
+
+[ Upstream commit faa12ca245585379d612736a4b5e98e88481ea59 ]
+
+It is possible that during error recovery and firmware reset,
+there is a pending TX PTP packet waiting for the timestamp.
+We need to reset this condition so that after recovery, the
+tx_avail count for PTP is reset back to the initial value.
+Otherwise, we may not accept any PTP TX timestamps after
+recovery.
+
+Fixes: 118612d519d8 ("bnxt_en: Add PTP clock APIs, ioctls, and ethtool methods")
+Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index dac4f9510c173..38e3b2225ff1c 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -10549,6 +10549,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
+       /* VF-reps may need to be re-opened after the PF is re-opened */
+       if (BNXT_PF(bp))
+               bnxt_vf_reps_open(bp);
++      if (bp->ptp_cfg)
++              atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
+       bnxt_ptp_init_rtc(bp, true);
+       bnxt_ptp_cfg_tstamp_filters(bp);
+       return 0;
+-- 
+2.43.0
+
diff --git a/queue-6.6/cxl-core-fix-initialization-of-mbox_cmd.size_out-in-.patch b/queue-6.6/cxl-core-fix-initialization-of-mbox_cmd.size_out-in-.patch
new file mode 100644 (file)
index 0000000..7c0f5d3
--- /dev/null
@@ -0,0 +1,61 @@
+From 500d41fbc0228d461a81ab447f81b76ee50dc0de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Apr 2024 17:14:03 +0900
+Subject: cxl/core: Fix initialization of mbox_cmd.size_out in get event
+
+From: Kwangjin Ko <kwangjin.ko@sk.com>
+
+[ Upstream commit f7c52345ccc96343c0a05bdea3121c8ac7b67d5f ]
+
+Since mbox_cmd.size_out is overwritten with the actual output size in
+the function below, it needs to be initialized every time.
+
+cxl_internal_send_cmd -> __cxl_pci_mbox_send_cmd
+
+Problem scenario:
+
+1) The size_out variable is initially set to the size of the mailbox.
+2) Read an event.
+   - size_out is set to 160 bytes(header 32B + one event 128B).
+   - Two event are created while reading.
+3) Read the new *two* events.
+   - size_out is still set to 160 bytes.
+   - Although the value of out_len is 288 bytes, only 160 bytes are
+     copied from the mailbox register to the local variable.
+   - record_count is set to 2.
+   - Accessing records[1] will result in reading incorrect data.
+
+Fixes: 6ebe28f9ec72 ("cxl/mem: Read, trace, and clear events on driver load")
+Tested-by: Ira Weiny <ira.weiny@intel.com>
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Kwangjin Ko <kwangjin.ko@sk.com>
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/core/mbox.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
+index e5f3592e54191..4b4c15e943380 100644
+--- a/drivers/cxl/core/mbox.c
++++ b/drivers/cxl/core/mbox.c
+@@ -971,13 +971,14 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
+               .payload_in = &log_type,
+               .size_in = sizeof(log_type),
+               .payload_out = payload,
+-              .size_out = mds->payload_size,
+               .min_out = struct_size(payload, records, 0),
+       };
+       do {
+               int rc, i;
++              mbox_cmd.size_out = mds->payload_size;
++
+               rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+               if (rc) {
+                       dev_err_ratelimited(dev,
+-- 
+2.43.0
+
diff --git a/queue-6.6/cxl-core-regs-fix-usage-of-map-reg_type-in-cxl_decod.patch b/queue-6.6/cxl-core-regs-fix-usage-of-map-reg_type-in-cxl_decod.patch
new file mode 100644 (file)
index 0000000..b544523
--- /dev/null
@@ -0,0 +1,53 @@
+From 60c9e87bf6a849b299312c78d06f055fb08283f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Mar 2024 11:15:08 -0700
+Subject: cxl/core/regs: Fix usage of map->reg_type in cxl_decode_regblock()
+ before assigned
+
+From: Dave Jiang <dave.jiang@intel.com>
+
+[ Upstream commit 5c88a9ccd4c431d58b532e4158b6999a8350062c ]
+
+In the error path, map->reg_type is being used for kernel warning
+before its value is setup. Found by code inspection. Exposure to
+user is wrong reg_type being emitted via kernel log. Use a local
+var for reg_type and retrieve value for usage.
+
+Fixes: 6c7f4f1e51c2 ("cxl/core/regs: Make cxl_map_{component, device}_regs() device generic")
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/core/regs.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
+index e0fbe964f6f0a..bab4592db647f 100644
+--- a/drivers/cxl/core/regs.c
++++ b/drivers/cxl/core/regs.c
+@@ -271,6 +271,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
+ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
+                               struct cxl_register_map *map)
+ {
++      u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+       int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
+       u64 offset = ((u64)reg_hi << 32) |
+                    (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
+@@ -278,11 +279,11 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
+       if (offset > pci_resource_len(pdev, bar)) {
+               dev_warn(&pdev->dev,
+                        "BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
+-                       &pdev->resource[bar], &offset, map->reg_type);
++                       &pdev->resource[bar], &offset, reg_type);
+               return false;
+       }
+-      map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
++      map->reg_type = reg_type;
+       map->resource = pci_resource_start(pdev, bar) + offset;
+       map->max_size = pci_resource_len(pdev, bar) - offset;
+       return true;
+-- 
+2.43.0
+
diff --git a/queue-6.6/cxl-mem-fix-for-the-index-of-clear-event-record-hand.patch b/queue-6.6/cxl-mem-fix-for-the-index-of-clear-event-record-hand.patch
new file mode 100644 (file)
index 0000000..48af89b
--- /dev/null
@@ -0,0 +1,42 @@
+From 358944fe4cceb582a5082ba5b02eebbc28eb9ca4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Mar 2024 10:29:28 +0800
+Subject: cxl/mem: Fix for the index of Clear Event Record Handle
+
+From: Yuquan Wang <wangyuquan1236@phytium.com.cn>
+
+[ Upstream commit b7c59b038c656214f56432867056997c2e0fc268 ]
+
+The dev_dbg info for Clear Event Records mailbox command would report
+the handle of the next record to clear not the current one.
+
+This was because the index 'i' had incremented before printing the
+current handle value.
+
+Fixes: 6ebe28f9ec72 ("cxl/mem: Read, trace, and clear events on driver load")
+Signed-off-by: Yuquan Wang <wangyuquan1236@phytium.com.cn>
+Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Fan Ni <fan.ni@samsung.com>
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/core/mbox.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
+index b12986b968da4..e5f3592e54191 100644
+--- a/drivers/cxl/core/mbox.c
++++ b/drivers/cxl/core/mbox.c
+@@ -928,7 +928,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
+       for (cnt = 0; cnt < total; cnt++) {
+               payload->handles[i++] = get_pl->records[cnt].hdr.handle;
+               dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
+-                      le16_to_cpu(payload->handles[i]));
++                      le16_to_cpu(payload->handles[i - 1]));
+               if (i == max_handles) {
+                       payload->nr_recs = i;
+-- 
+2.43.0
+
diff --git a/queue-6.6/drm-msm-dpu-don-t-allow-overriding-data-from-catalog.patch b/queue-6.6/drm-msm-dpu-don-t-allow-overriding-data-from-catalog.patch
new file mode 100644 (file)
index 0000000..5d16f6e
--- /dev/null
@@ -0,0 +1,53 @@
+From 038840c902011b7a48fb647961714a2ca5a04929 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Mar 2024 03:10:41 +0200
+Subject: drm/msm/dpu: don't allow overriding data from catalog
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+[ Upstream commit 4f3b77ae5ff5b5ba9d99c5d5450db388dbee5107 ]
+
+The data from catalog is marked as const, so it is a part of the RO
+segment. Allowing userspace to write to it through debugfs can cause
+protection faults. Set debugfs file mode to read-only for debug entries
+corresponding to perf_cfg coming from catalog.
+
+Fixes: abda0d925f9c ("drm/msm/dpu: Mark various data tables as const")
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
+Patchwork: https://patchwork.freedesktop.org/patch/582844/
+Link: https://lore.kernel.org/r/20240314-dpu-perf-rework-v3-1-79fa4e065574@linaro.org
+Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
+index ef871239adb2a..68fae048a9a83 100644
+--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
+@@ -459,15 +459,15 @@ int dpu_core_perf_debugfs_init(struct dpu_kms *dpu_kms, struct dentry *parent)
+                       &perf->core_clk_rate);
+       debugfs_create_u32("enable_bw_release", 0600, entry,
+                       (u32 *)&perf->enable_bw_release);
+-      debugfs_create_u32("threshold_low", 0600, entry,
++      debugfs_create_u32("threshold_low", 0400, entry,
+                       (u32 *)&perf->perf_cfg->max_bw_low);
+-      debugfs_create_u32("threshold_high", 0600, entry,
++      debugfs_create_u32("threshold_high", 0400, entry,
+                       (u32 *)&perf->perf_cfg->max_bw_high);
+-      debugfs_create_u32("min_core_ib", 0600, entry,
++      debugfs_create_u32("min_core_ib", 0400, entry,
+                       (u32 *)&perf->perf_cfg->min_core_ib);
+-      debugfs_create_u32("min_llcc_ib", 0600, entry,
++      debugfs_create_u32("min_llcc_ib", 0400, entry,
+                       (u32 *)&perf->perf_cfg->min_llcc_ib);
+-      debugfs_create_u32("min_dram_ib", 0600, entry,
++      debugfs_create_u32("min_dram_ib", 0400, entry,
+                       (u32 *)&perf->perf_cfg->min_dram_ib);
+       debugfs_create_file("perf_mode", 0600, entry,
+                       (u32 *)perf, &dpu_core_perf_mode_fops);
+-- 
+2.43.0
+
diff --git a/queue-6.6/firmware-arm_scmi-make-raw-debugfs-entries-non-seeka.patch b/queue-6.6/firmware-arm_scmi-make-raw-debugfs-entries-non-seeka.patch
new file mode 100644 (file)
index 0000000..f56c955
--- /dev/null
@@ -0,0 +1,80 @@
+From af56df9eaa041fe41ed2ed09f2ed2cc717e4051f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Mar 2024 14:03:24 +0000
+Subject: firmware: arm_scmi: Make raw debugfs entries non-seekable
+
+From: Cristian Marussi <cristian.marussi@arm.com>
+
+[ Upstream commit b70c7996d4ffb2e02895132e8a79a37cee66504f ]
+
+SCMI raw debugfs entries are used to inject and snoop messages out of the
+SCMI core and, as such, the underlying virtual files have no reason to
+support seeking.
+
+Modify the related file_operations descriptors to be non-seekable.
+
+Fixes: 3c3d818a9317 ("firmware: arm_scmi: Add core raw transmission support")
+Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
+Link: https://lore.kernel.org/r/20240315140324.231830-1-cristian.marussi@arm.com
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_scmi/raw_mode.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
+index 3505735185033..130d13e9cd6be 100644
+--- a/drivers/firmware/arm_scmi/raw_mode.c
++++ b/drivers/firmware/arm_scmi/raw_mode.c
+@@ -921,7 +921,7 @@ static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp)
+       rd->raw = raw;
+       filp->private_data = rd;
+-      return 0;
++      return nonseekable_open(inode, filp);
+ }
+ static int scmi_dbg_raw_mode_release(struct inode *inode, struct file *filp)
+@@ -950,6 +950,7 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = {
+       .open = scmi_dbg_raw_mode_open,
+       .release = scmi_dbg_raw_mode_release,
+       .write = scmi_dbg_raw_mode_reset_write,
++      .llseek = no_llseek,
+       .owner = THIS_MODULE,
+ };
+@@ -959,6 +960,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = {
+       .read = scmi_dbg_raw_mode_message_read,
+       .write = scmi_dbg_raw_mode_message_write,
+       .poll = scmi_dbg_raw_mode_message_poll,
++      .llseek = no_llseek,
+       .owner = THIS_MODULE,
+ };
+@@ -975,6 +977,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = {
+       .read = scmi_dbg_raw_mode_message_read,
+       .write = scmi_dbg_raw_mode_message_async_write,
+       .poll = scmi_dbg_raw_mode_message_poll,
++      .llseek = no_llseek,
+       .owner = THIS_MODULE,
+ };
+@@ -998,6 +1001,7 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = {
+       .release = scmi_dbg_raw_mode_release,
+       .read = scmi_test_dbg_raw_mode_notif_read,
+       .poll = scmi_test_dbg_raw_mode_notif_poll,
++      .llseek = no_llseek,
+       .owner = THIS_MODULE,
+ };
+@@ -1021,6 +1025,7 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = {
+       .release = scmi_dbg_raw_mode_release,
+       .read = scmi_test_dbg_raw_mode_errors_read,
+       .poll = scmi_test_dbg_raw_mode_errors_poll,
++      .llseek = no_llseek,
+       .owner = THIS_MODULE,
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.6/geneve-fix-header-validation-in-geneve-6-_xmit_skb.patch b/queue-6.6/geneve-fix-header-validation-in-geneve-6-_xmit_skb.patch
new file mode 100644 (file)
index 0000000..96b89f1
--- /dev/null
@@ -0,0 +1,166 @@
+From dad1c0552b97fbf0b9b8bb57e9def80f27f63faa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 10:30:34 +0000
+Subject: geneve: fix header validation in geneve[6]_xmit_skb
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit d8a6213d70accb403b82924a1c229e733433a5ef ]
+
+syzbot is able to trigger an uninit-value in geneve_xmit() [1]
+
+Problem : While most ip tunnel helpers (like ip_tunnel_get_dsfield())
+uses skb_protocol(skb, true), pskb_inet_may_pull() is only using
+skb->protocol.
+
+If anything else than ETH_P_IPV6 or ETH_P_IP is found in skb->protocol,
+pskb_inet_may_pull() does nothing at all.
+
+If a vlan tag was provided by the caller (af_packet in the syzbot case),
+the network header might not point to the correct location, and skb
+linear part could be smaller than expected.
+
+Add skb_vlan_inet_prepare() to perform a complete mac validation.
+
+Use this in geneve for the moment, I suspect we need to adopt this
+more broadly.
+
+v4 - Jakub reported v3 broke l2_tos_ttl_inherit.sh selftest
+   - Only call __vlan_get_protocol() for vlan types.
+Link: https://lore.kernel.org/netdev/20240404100035.3270a7d5@kernel.org/
+
+v2,v3 - Addressed Sabrina comments on v1 and v2
+Link: https://lore.kernel.org/netdev/Zg1l9L2BNoZWZDZG@hog/
+
+[1]
+
+BUG: KMSAN: uninit-value in geneve_xmit_skb drivers/net/geneve.c:910 [inline]
+ BUG: KMSAN: uninit-value in geneve_xmit+0x302d/0x5420 drivers/net/geneve.c:1030
+  geneve_xmit_skb drivers/net/geneve.c:910 [inline]
+  geneve_xmit+0x302d/0x5420 drivers/net/geneve.c:1030
+  __netdev_start_xmit include/linux/netdevice.h:4903 [inline]
+  netdev_start_xmit include/linux/netdevice.h:4917 [inline]
+  xmit_one net/core/dev.c:3531 [inline]
+  dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3547
+  __dev_queue_xmit+0x348d/0x52c0 net/core/dev.c:4335
+  dev_queue_xmit include/linux/netdevice.h:3091 [inline]
+  packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276
+  packet_snd net/packet/af_packet.c:3081 [inline]
+  packet_sendmsg+0x8bb0/0x9ef0 net/packet/af_packet.c:3113
+  sock_sendmsg_nosec net/socket.c:730 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:745
+  __sys_sendto+0x685/0x830 net/socket.c:2191
+  __do_sys_sendto net/socket.c:2203 [inline]
+  __se_sys_sendto net/socket.c:2199 [inline]
+  __x64_sys_sendto+0x125/0x1d0 net/socket.c:2199
+ do_syscall_64+0xd5/0x1f0
+ entry_SYSCALL_64_after_hwframe+0x6d/0x75
+
+Uninit was created at:
+  slab_post_alloc_hook mm/slub.c:3804 [inline]
+  slab_alloc_node mm/slub.c:3845 [inline]
+  kmem_cache_alloc_node+0x613/0xc50 mm/slub.c:3888
+  kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:577
+  __alloc_skb+0x35b/0x7a0 net/core/skbuff.c:668
+  alloc_skb include/linux/skbuff.h:1318 [inline]
+  alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6504
+  sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2795
+  packet_alloc_skb net/packet/af_packet.c:2930 [inline]
+  packet_snd net/packet/af_packet.c:3024 [inline]
+  packet_sendmsg+0x722d/0x9ef0 net/packet/af_packet.c:3113
+  sock_sendmsg_nosec net/socket.c:730 [inline]
+  __sock_sendmsg+0x30f/0x380 net/socket.c:745
+  __sys_sendto+0x685/0x830 net/socket.c:2191
+  __do_sys_sendto net/socket.c:2203 [inline]
+  __se_sys_sendto net/socket.c:2199 [inline]
+  __x64_sys_sendto+0x125/0x1d0 net/socket.c:2199
+ do_syscall_64+0xd5/0x1f0
+ entry_SYSCALL_64_after_hwframe+0x6d/0x75
+
+CPU: 0 PID: 5033 Comm: syz-executor346 Not tainted 6.9.0-rc1-syzkaller-00005-g928a87efa423 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024
+
+Fixes: d13f048dd40e ("net: geneve: modify IP header check in geneve6_xmit_skb and geneve_xmit_skb")
+Reported-by: syzbot+9ee20ec1de7b3168db09@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/000000000000d19c3a06152f9ee4@google.com/
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Phillip Potter <phil@philpotter.co.uk>
+Cc: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: Phillip Potter <phil@philpotter.co.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/geneve.c     |  4 ++--
+ include/net/ip_tunnels.h | 33 +++++++++++++++++++++++++++++++++
+ 2 files changed, 35 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
+index 0204ac92b05ab..0a18b67d0d669 100644
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -926,7 +926,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
+       __be16 sport;
+       int err;
+-      if (!pskb_inet_may_pull(skb))
++      if (!skb_vlan_inet_prepare(skb))
+               return -EINVAL;
+       sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
+@@ -1024,7 +1024,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
+       __be16 sport;
+       int err;
+-      if (!pskb_inet_may_pull(skb))
++      if (!skb_vlan_inet_prepare(skb))
+               return -EINVAL;
+       sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
+diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
+index f346b4efbc307..822f0fad39623 100644
+--- a/include/net/ip_tunnels.h
++++ b/include/net/ip_tunnels.h
+@@ -360,6 +360,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
+       return pskb_network_may_pull(skb, nhlen);
+ }
++/* Variant of pskb_inet_may_pull().
++ */
++static inline bool skb_vlan_inet_prepare(struct sk_buff *skb)
++{
++      int nhlen = 0, maclen = ETH_HLEN;
++      __be16 type = skb->protocol;
++
++      /* Essentially this is skb_protocol(skb, true)
++       * And we get MAC len.
++       */
++      if (eth_type_vlan(type))
++              type = __vlan_get_protocol(skb, type, &maclen);
++
++      switch (type) {
++#if IS_ENABLED(CONFIG_IPV6)
++      case htons(ETH_P_IPV6):
++              nhlen = sizeof(struct ipv6hdr);
++              break;
++#endif
++      case htons(ETH_P_IP):
++              nhlen = sizeof(struct iphdr);
++              break;
++      }
++      /* For ETH_P_IPV6/ETH_P_IP we make sure to pull
++       * a base network header in skb->head.
++       */
++      if (!pskb_may_pull(skb, maclen + nhlen))
++              return false;
++
++      skb_set_network_header(skb, maclen);
++      return true;
++}
++
+ static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
+ {
+       const struct ip_tunnel_encap_ops *ops;
+-- 
+2.43.0
+
diff --git a/queue-6.6/iommu-vt-d-allocate-local-memory-for-page-request-qu.patch b/queue-6.6/iommu-vt-d-allocate-local-memory-for-page-request-qu.patch
new file mode 100644 (file)
index 0000000..58ae3b3
--- /dev/null
@@ -0,0 +1,39 @@
+From 4e6099c26caac162bc3a53ac615eee262d5c8534 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 11 Apr 2024 11:07:43 +0800
+Subject: iommu/vt-d: Allocate local memory for page request queue
+
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+[ Upstream commit a34f3e20ddff02c4f12df2c0635367394e64c63d ]
+
+The page request queue is per IOMMU, its allocation should be made
+NUMA-aware for performance reasons.
+
+Fixes: a222a7f0bb6c ("iommu/vt-d: Implement page request handling")
+Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Link: https://lore.kernel.org/r/20240403214007.985600-1-jacob.jun.pan@linux.intel.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/svm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
+index ac12f76c1212a..6010b93c514c5 100644
+--- a/drivers/iommu/intel/svm.c
++++ b/drivers/iommu/intel/svm.c
+@@ -67,7 +67,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
+       struct page *pages;
+       int irq, ret;
+-      pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
++      pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+       if (!pages) {
+               pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
+                       iommu->name);
+-- 
+2.43.0
+
diff --git a/queue-6.6/iommu-vt-d-fix-wrong-use-of-pasid-config.patch b/queue-6.6/iommu-vt-d-fix-wrong-use-of-pasid-config.patch
new file mode 100644 (file)
index 0000000..5ce1e37
--- /dev/null
@@ -0,0 +1,39 @@
+From 1cb1a4017512a4e8d4c8ab33e7a7702cc561dab9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 11 Apr 2024 11:07:42 +0800
+Subject: iommu/vt-d: Fix wrong use of pasid config
+
+From: Xuchun Shang <xuchun.shang@linux.alibaba.com>
+
+[ Upstream commit 5b3625a4f6422e8982f90f0c11b5546149c962b8 ]
+
+The commit "iommu/vt-d: Add IOMMU perfmon support" introduce IOMMU
+PMU feature, but use the wrong config when set pasid filter.
+
+Fixes: 7232ab8b89e9 ("iommu/vt-d: Add IOMMU perfmon support")
+Signed-off-by: Xuchun Shang <xuchun.shang@linux.alibaba.com>
+Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
+Link: https://lore.kernel.org/r/20240401060753.3321318-1-xuchun.shang@linux.alibaba.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/perfmon.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
+index cf43e798eca49..44083d01852db 100644
+--- a/drivers/iommu/intel/perfmon.c
++++ b/drivers/iommu/intel/perfmon.c
+@@ -438,7 +438,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
+       iommu_pmu_set_filter(domain, event->attr.config1,
+                            IOMMU_PMU_FILTER_DOMAIN, idx,
+                            event->attr.config1);
+-      iommu_pmu_set_filter(pasid, event->attr.config1,
++      iommu_pmu_set_filter(pasid, event->attr.config2,
+                            IOMMU_PMU_FILTER_PASID, idx,
+                            event->attr.config1);
+       iommu_pmu_set_filter(ats, event->attr.config2,
+-- 
+2.43.0
+
diff --git a/queue-6.6/ipv4-route-avoid-unused-but-set-variable-warning.patch b/queue-6.6/ipv4-route-avoid-unused-but-set-variable-warning.patch
new file mode 100644 (file)
index 0000000..3b8cf0b
--- /dev/null
@@ -0,0 +1,51 @@
+From 26f3138bac441342d96dd6d0b282800e022368b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Apr 2024 09:42:03 +0200
+Subject: ipv4/route: avoid unused-but-set-variable warning
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit cf1b7201df59fb936f40f4a807433fe3f2ce310a ]
+
+The log_martians variable is only used in an #ifdef, causing a 'make W=1'
+warning with gcc:
+
+net/ipv4/route.c: In function 'ip_rt_send_redirect':
+net/ipv4/route.c:880:13: error: variable 'log_martians' set but not used [-Werror=unused-but-set-variable]
+
+Change the #ifdef to an equivalent IS_ENABLED() to let the compiler
+see where the variable is used.
+
+Fixes: 30038fc61adf ("net: ip_rt_send_redirect() optimization")
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240408074219.3030256-2-arnd@kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/route.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 3bad9aa066db3..e1e30c09a1753 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
+               icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
+               peer->rate_last = jiffies;
+               ++peer->n_redirects;
+-#ifdef CONFIG_IP_ROUTE_VERBOSE
+-              if (log_martians &&
++              if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
+                   peer->n_redirects == ip_rt_redirect_number)
+                       net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
+                                            &ip_hdr(skb)->saddr, inet_iif(skb),
+                                            &ip_hdr(skb)->daddr, &gw);
+-#endif
+       }
+ out_put_peer:
+       inet_putpeer(peer);
+-- 
+2.43.0
+
diff --git a/queue-6.6/ipv6-fib-hide-unused-pn-variable.patch b/queue-6.6/ipv6-fib-hide-unused-pn-variable.patch
new file mode 100644 (file)
index 0000000..6849897
--- /dev/null
@@ -0,0 +1,60 @@
+From 4a8aa0c03f5a5597c9c33c29064a7a5e928ed7af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Apr 2024 09:42:02 +0200
+Subject: ipv6: fib: hide unused 'pn' variable
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 74043489fcb5e5ca4074133582b5b8011b67f9e7 ]
+
+When CONFIG_IPV6_SUBTREES is disabled, the only user is hidden, causing
+a 'make W=1' warning:
+
+net/ipv6/ip6_fib.c: In function 'fib6_add':
+net/ipv6/ip6_fib.c:1388:32: error: variable 'pn' set but not used [-Werror=unused-but-set-variable]
+
+Add another #ifdef around the variable declaration, matching the other
+uses in this file.
+
+Fixes: 66729e18df08 ("[IPV6] ROUTE: Make sure we have fn->leaf when adding a node on subtree.")
+Link: https://lore.kernel.org/netdev/20240322131746.904943-1-arnd@kernel.org/
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240408074219.3030256-1-arnd@kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6_fib.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
+index 54294f6a8ec51..8184076a3924e 100644
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -1375,7 +1375,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
+            struct nl_info *info, struct netlink_ext_ack *extack)
+ {
+       struct fib6_table *table = rt->fib6_table;
+-      struct fib6_node *fn, *pn = NULL;
++      struct fib6_node *fn;
++#ifdef CONFIG_IPV6_SUBTREES
++      struct fib6_node *pn = NULL;
++#endif
+       int err = -ENOMEM;
+       int allow_create = 1;
+       int replace_required = 0;
+@@ -1399,9 +1402,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
+               goto out;
+       }
++#ifdef CONFIG_IPV6_SUBTREES
+       pn = fn;
+-#ifdef CONFIG_IPV6_SUBTREES
+       if (rt->fib6_src.plen) {
+               struct fib6_node *sn;
+-- 
+2.43.0
+
diff --git a/queue-6.6/ipv6-fix-race-condition-between-ipv6_get_ifaddr-and-.patch b/queue-6.6/ipv6-fix-race-condition-between-ipv6_get_ifaddr-and-.patch
new file mode 100644 (file)
index 0000000..323a407
--- /dev/null
@@ -0,0 +1,133 @@
+From 8910799465bd58baea5447bf815f3a72007e7cb4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Apr 2024 16:18:21 +0200
+Subject: ipv6: fix race condition between ipv6_get_ifaddr and ipv6_del_addr
+
+From: Jiri Benc <jbenc@redhat.com>
+
+[ Upstream commit 7633c4da919ad51164acbf1aa322cc1a3ead6129 ]
+
+Although ipv6_get_ifaddr walks inet6_addr_lst under the RCU lock, it
+still means hlist_for_each_entry_rcu can return an item that got removed
+from the list. The memory itself of such item is not freed thanks to RCU
+but nothing guarantees the actual content of the memory is sane.
+
+In particular, the reference count can be zero. This can happen if
+ipv6_del_addr is called in parallel. ipv6_del_addr removes the entry
+from inet6_addr_lst (hlist_del_init_rcu(&ifp->addr_lst)) and drops all
+references (__in6_ifa_put(ifp) + in6_ifa_put(ifp)). With bad enough
+timing, this can happen:
+
+1. In ipv6_get_ifaddr, hlist_for_each_entry_rcu returns an entry.
+
+2. Then, the whole ipv6_del_addr is executed for the given entry. The
+   reference count drops to zero and kfree_rcu is scheduled.
+
+3. ipv6_get_ifaddr continues and tries to increments the reference count
+   (in6_ifa_hold).
+
+4. The rcu is unlocked and the entry is freed.
+
+5. The freed entry is returned.
+
+Prevent increasing of the reference count in such case. The name
+in6_ifa_hold_safe is chosen to mimic the existing fib6_info_hold_safe.
+
+[   41.506330] refcount_t: addition on 0; use-after-free.
+[   41.506760] WARNING: CPU: 0 PID: 595 at lib/refcount.c:25 refcount_warn_saturate+0xa5/0x130
+[   41.507413] Modules linked in: veth bridge stp llc
+[   41.507821] CPU: 0 PID: 595 Comm: python3 Not tainted 6.9.0-rc2.main-00208-g49563be82afa #14
+[   41.508479] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+[   41.509163] RIP: 0010:refcount_warn_saturate+0xa5/0x130
+[   41.509586] Code: ad ff 90 0f 0b 90 90 c3 cc cc cc cc 80 3d c0 30 ad 01 00 75 a0 c6 05 b7 30 ad 01 01 90 48 c7 c7 38 cc 7a 8c e8 cc 18 ad ff 90 <0f> 0b 90 90 c3 cc cc cc cc 80 3d 98 30 ad 01 00 0f 85 75 ff ff ff
+[   41.510956] RSP: 0018:ffffbda3c026baf0 EFLAGS: 00010282
+[   41.511368] RAX: 0000000000000000 RBX: ffff9e9c46914800 RCX: 0000000000000000
+[   41.511910] RDX: ffff9e9c7ec29c00 RSI: ffff9e9c7ec1c900 RDI: ffff9e9c7ec1c900
+[   41.512445] RBP: ffff9e9c43660c9c R08: 0000000000009ffb R09: 00000000ffffdfff
+[   41.512998] R10: 00000000ffffdfff R11: ffffffff8ca58a40 R12: ffff9e9c4339a000
+[   41.513534] R13: 0000000000000001 R14: ffff9e9c438a0000 R15: ffffbda3c026bb48
+[   41.514086] FS:  00007fbc4cda1740(0000) GS:ffff9e9c7ec00000(0000) knlGS:0000000000000000
+[   41.514726] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   41.515176] CR2: 000056233b337d88 CR3: 000000000376e006 CR4: 0000000000370ef0
+[   41.515713] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[   41.516252] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[   41.516799] Call Trace:
+[   41.517037]  <TASK>
+[   41.517249]  ? __warn+0x7b/0x120
+[   41.517535]  ? refcount_warn_saturate+0xa5/0x130
+[   41.517923]  ? report_bug+0x164/0x190
+[   41.518240]  ? handle_bug+0x3d/0x70
+[   41.518541]  ? exc_invalid_op+0x17/0x70
+[   41.520972]  ? asm_exc_invalid_op+0x1a/0x20
+[   41.521325]  ? refcount_warn_saturate+0xa5/0x130
+[   41.521708]  ipv6_get_ifaddr+0xda/0xe0
+[   41.522035]  inet6_rtm_getaddr+0x342/0x3f0
+[   41.522376]  ? __pfx_inet6_rtm_getaddr+0x10/0x10
+[   41.522758]  rtnetlink_rcv_msg+0x334/0x3d0
+[   41.523102]  ? netlink_unicast+0x30f/0x390
+[   41.523445]  ? __pfx_rtnetlink_rcv_msg+0x10/0x10
+[   41.523832]  netlink_rcv_skb+0x53/0x100
+[   41.524157]  netlink_unicast+0x23b/0x390
+[   41.524484]  netlink_sendmsg+0x1f2/0x440
+[   41.524826]  __sys_sendto+0x1d8/0x1f0
+[   41.525145]  __x64_sys_sendto+0x1f/0x30
+[   41.525467]  do_syscall_64+0xa5/0x1b0
+[   41.525794]  entry_SYSCALL_64_after_hwframe+0x72/0x7a
+[   41.526213] RIP: 0033:0x7fbc4cfcea9a
+[   41.526528] Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89
+[   41.527942] RSP: 002b:00007ffcf54012a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+[   41.528593] RAX: ffffffffffffffda RBX: 00007ffcf5401368 RCX: 00007fbc4cfcea9a
+[   41.529173] RDX: 000000000000002c RSI: 00007fbc4b9d9bd0 RDI: 0000000000000005
+[   41.529786] RBP: 00007fbc4bafb040 R08: 00007ffcf54013e0 R09: 000000000000000c
+[   41.530375] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+[   41.530977] R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007fbc4ca85d1b
+[   41.531573]  </TASK>
+
+Fixes: 5c578aedcb21d ("IPv6: convert addrconf hash list to RCU")
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: Jiri Benc <jbenc@redhat.com>
+Link: https://lore.kernel.org/r/8ab821e36073a4a406c50ec83c9e8dc586c539e4.1712585809.git.jbenc@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/addrconf.h | 4 ++++
+ net/ipv6/addrconf.c    | 7 ++++---
+ 2 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/addrconf.h b/include/net/addrconf.h
+index 61ebe723ee4d5..facb7a469efad 100644
+--- a/include/net/addrconf.h
++++ b/include/net/addrconf.h
+@@ -437,6 +437,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
+       refcount_inc(&ifp->refcnt);
+ }
++static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
++{
++      return refcount_inc_not_zero(&ifp->refcnt);
++}
+ /*
+  *    compute link-local solicited-node multicast address
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index 6f57cbddeee63..d1806eee1687d 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -2058,9 +2058,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
+               if (ipv6_addr_equal(&ifp->addr, addr)) {
+                       if (!dev || ifp->idev->dev == dev ||
+                           !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+-                              result = ifp;
+-                              in6_ifa_hold(ifp);
+-                              break;
++                              if (in6_ifa_hold_safe(ifp)) {
++                                      result = ifp;
++                                      break;
++                              }
+                       }
+               }
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/mmc-omap-fix-broken-slot-switch-lookup.patch b/queue-6.6/mmc-omap-fix-broken-slot-switch-lookup.patch
new file mode 100644 (file)
index 0000000..507bc46
--- /dev/null
@@ -0,0 +1,58 @@
+From 00c42c16e0350f7bd8bbfedcc82e39d1cef941cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Feb 2024 20:14:37 +0200
+Subject: mmc: omap: fix broken slot switch lookup
+
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+
+[ Upstream commit d4debbcbffa45c3de5df0040af2eea74a9e794a3 ]
+
+The lookup is done before host->dev is initialized. It will always just
+fail silently, and the MMC behaviour is totally unpredictable as the switch
+is left in an undefined state. Fix that.
+
+Fixes: e519f0bb64ef ("ARM/mmc: Convert old mmci-omap to GPIO descriptors")
+Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
+Message-ID: <20240223181439.1099750-4-aaro.koskinen@iki.fi>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mmc/host/omap.c | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
+index 9fb8995b43a1c..aa40e1a9dc29e 100644
+--- a/drivers/mmc/host/omap.c
++++ b/drivers/mmc/host/omap.c
+@@ -1384,13 +1384,6 @@ static int mmc_omap_probe(struct platform_device *pdev)
+       if (IS_ERR(host->virt_base))
+               return PTR_ERR(host->virt_base);
+-      host->slot_switch = gpiod_get_optional(host->dev, "switch",
+-                                             GPIOD_OUT_LOW);
+-      if (IS_ERR(host->slot_switch))
+-              return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+-                                   "error looking up slot switch GPIO\n");
+-
+-
+       INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work);
+       INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work);
+@@ -1409,6 +1402,12 @@ static int mmc_omap_probe(struct platform_device *pdev)
+       host->dev = &pdev->dev;
+       platform_set_drvdata(pdev, host);
++      host->slot_switch = gpiod_get_optional(host->dev, "switch",
++                                             GPIOD_OUT_LOW);
++      if (IS_ERR(host->slot_switch))
++              return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
++                                   "error looking up slot switch GPIO\n");
++
+       host->id = pdev->id;
+       host->irq = irq;
+       host->phys_base = res->start;
+-- 
+2.43.0
+
diff --git a/queue-6.6/mmc-omap-fix-deferred-probe.patch b/queue-6.6/mmc-omap-fix-deferred-probe.patch
new file mode 100644 (file)
index 0000000..a8305fd
--- /dev/null
@@ -0,0 +1,66 @@
+From 0e81a265c69278bf799cd285d10a7435becc1088 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Feb 2024 20:14:38 +0200
+Subject: mmc: omap: fix deferred probe
+
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+
+[ Upstream commit f6862c7f156d04f81c38467e1c304b7e9517e810 ]
+
+After a deferred probe, GPIO descriptor lookup will fail with EBUSY. Fix by
+using managed descriptors.
+
+Fixes: e519f0bb64ef ("ARM/mmc: Convert old mmci-omap to GPIO descriptors")
+Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
+Message-ID: <20240223181439.1099750-5-aaro.koskinen@iki.fi>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mmc/host/omap.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
+index aa40e1a9dc29e..50408771ae01c 100644
+--- a/drivers/mmc/host/omap.c
++++ b/drivers/mmc/host/omap.c
+@@ -1259,18 +1259,18 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
+       slot->pdata = &host->pdata->slots[id];
+       /* Check for some optional GPIO controls */
+-      slot->vsd = gpiod_get_index_optional(host->dev, "vsd",
+-                                           id, GPIOD_OUT_LOW);
++      slot->vsd = devm_gpiod_get_index_optional(host->dev, "vsd",
++                                                id, GPIOD_OUT_LOW);
+       if (IS_ERR(slot->vsd))
+               return dev_err_probe(host->dev, PTR_ERR(slot->vsd),
+                                    "error looking up VSD GPIO\n");
+-      slot->vio = gpiod_get_index_optional(host->dev, "vio",
+-                                           id, GPIOD_OUT_LOW);
++      slot->vio = devm_gpiod_get_index_optional(host->dev, "vio",
++                                                id, GPIOD_OUT_LOW);
+       if (IS_ERR(slot->vio))
+               return dev_err_probe(host->dev, PTR_ERR(slot->vio),
+                                    "error looking up VIO GPIO\n");
+-      slot->cover = gpiod_get_index_optional(host->dev, "cover",
+-                                              id, GPIOD_IN);
++      slot->cover = devm_gpiod_get_index_optional(host->dev, "cover",
++                                                  id, GPIOD_IN);
+       if (IS_ERR(slot->cover))
+               return dev_err_probe(host->dev, PTR_ERR(slot->cover),
+                                    "error looking up cover switch GPIO\n");
+@@ -1402,8 +1402,8 @@ static int mmc_omap_probe(struct platform_device *pdev)
+       host->dev = &pdev->dev;
+       platform_set_drvdata(pdev, host);
+-      host->slot_switch = gpiod_get_optional(host->dev, "switch",
+-                                             GPIOD_OUT_LOW);
++      host->slot_switch = devm_gpiod_get_optional(host->dev, "switch",
++                                                  GPIOD_OUT_LOW);
+       if (IS_ERR(host->slot_switch))
+               return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+                                    "error looking up slot switch GPIO\n");
+-- 
+2.43.0
+
diff --git a/queue-6.6/mmc-omap-restore-original-power-up-down-steps.patch b/queue-6.6/mmc-omap-restore-original-power-up-down-steps.patch
new file mode 100644 (file)
index 0000000..7ea0a69
--- /dev/null
@@ -0,0 +1,65 @@
+From 4d5169901152f0ae6fd7cb665697c83b25a4d275 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Feb 2024 20:14:39 +0200
+Subject: mmc: omap: restore original power up/down steps
+
+From: Aaro Koskinen <aaro.koskinen@iki.fi>
+
+[ Upstream commit 894ad61b85d6ba8efd4274aa8719d9ff1c89ea54 ]
+
+Commit e519f0bb64ef ("ARM/mmc: Convert old mmci-omap to GPIO descriptors")
+moved Nokia N810 MMC power up/down from the board file into the MMC driver.
+
+The change removed some delays, and ordering without a valid reason.
+Restore power up/down to match the original code. This matters only on N810
+where the 2nd GPIO is in use. Other boards will see an additional delay but
+that should be a lesser concern than omitting delays altogether.
+
+Fixes: e519f0bb64ef ("ARM/mmc: Convert old mmci-omap to GPIO descriptors")
+Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
+Message-ID: <20240223181439.1099750-6-aaro.koskinen@iki.fi>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mmc/host/omap.c | 23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
+index 50408771ae01c..13fa8588e38c1 100644
+--- a/drivers/mmc/host/omap.c
++++ b/drivers/mmc/host/omap.c
+@@ -1119,10 +1119,25 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on,
+       host = slot->host;
+-      if (slot->vsd)
+-              gpiod_set_value(slot->vsd, power_on);
+-      if (slot->vio)
+-              gpiod_set_value(slot->vio, power_on);
++      if (power_on) {
++              if (slot->vsd) {
++                      gpiod_set_value(slot->vsd, power_on);
++                      msleep(1);
++              }
++              if (slot->vio) {
++                      gpiod_set_value(slot->vio, power_on);
++                      msleep(1);
++              }
++      } else {
++              if (slot->vio) {
++                      gpiod_set_value(slot->vio, power_on);
++                      msleep(50);
++              }
++              if (slot->vsd) {
++                      gpiod_set_value(slot->vsd, power_on);
++                      msleep(50);
++              }
++      }
+       if (slot->pdata->set_power != NULL)
+               slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-dsa-mt7530-trap-link-local-frames-regardless-of-.patch b/queue-6.6/net-dsa-mt7530-trap-link-local-frames-regardless-of-.patch
new file mode 100644 (file)
index 0000000..28e56be
--- /dev/null
@@ -0,0 +1,495 @@
+From 4f0b7e35459e8e960409002ae212c7b51afa154f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 18:01:14 +0300
+Subject: net: dsa: mt7530: trap link-local frames regardless of ST Port State
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Arınç ÜNAL <arinc.unal@arinc9.com>
+
+[ Upstream commit 17c560113231ddc20088553c7b499b289b664311 ]
+
+In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer
+(DLL) of the Open Systems Interconnection basic reference model (OSI/RM)
+are described; the medium access control (MAC) and logical link control
+(LLC) sublayers. The MAC sublayer is the one facing the physical layer.
+
+In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A
+Bridge component comprises a MAC Relay Entity for interconnecting the Ports
+of the Bridge, at least two Ports, and higher layer entities with at least
+a Spanning Tree Protocol Entity included.
+
+Each Bridge Port also functions as an end station and shall provide the MAC
+Service to an LLC Entity. Each instance of the MAC Service is provided to a
+distinct LLC Entity that supports protocol identification, multiplexing,
+and demultiplexing, for protocol data unit (PDU) transmission and reception
+by one or more higher layer entities.
+
+It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC
+Entity associated with each Bridge Port is modeled as being directly
+connected to the attached Local Area Network (LAN).
+
+On the switch with CPU port architecture, CPU port functions as Management
+Port, and the Management Port functionality is provided by software which
+functions as an end station. Software is connected to an IEEE 802 LAN that
+is wholly contained within the system that incorporates the Bridge.
+Software provides access to the LLC Entity associated with each Bridge Port
+by the value of the source port field on the special tag on the frame
+received by software.
+
+We call frames that carry control information to determine the active
+topology and current extent of each Virtual Local Area Network (VLAN),
+i.e., spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN
+Registration Protocol Data Units (MVRPDUs), and frames from other link
+constrained protocols, such as Extensible Authentication Protocol over LAN
+(EAPOL) and Link Layer Discovery Protocol (LLDP), link-local frames. They
+are not forwarded by a Bridge. Permanently configured entries in the
+filtering database (FDB) ensure that such frames are discarded by the
+Forwarding Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in
+detail:
+
+Each of the reserved MAC addresses specified in Table 8-1
+(01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be
+permanently configured in the FDB in C-VLAN components and ERs.
+
+Each of the reserved MAC addresses specified in Table 8-2
+(01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently
+configured in the FDB in S-VLAN components.
+
+Each of the reserved MAC addresses specified in Table 8-3
+(01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB
+in TPMR components.
+
+The FDB entries for reserved MAC addresses shall specify filtering for all
+Bridge Ports and all VIDs. Management shall not provide the capability to
+modify or remove entries for reserved MAC addresses.
+
+The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of
+propagation of PDUs within a Bridged Network, as follows:
+
+  The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that
+  no conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN)
+  component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward.
+  PDUs transmitted using this destination address, or any other addresses
+  that appear in Table 8-1, Table 8-2, and Table 8-3
+  (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can
+  therefore travel no further than those stations that can be reached via a
+  single individual LAN from the originating station.
+
+  The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an
+  address that no conformant S-VLAN component, C-VLAN component, or MAC
+  Bridge can forward; however, this address is relayed by a TPMR component.
+  PDUs using this destination address, or any of the other addresses that
+  appear in both Table 8-1 and Table 8-2 but not in Table 8-3
+  (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed
+  by any TPMRs but will propagate no further than the nearest S-VLAN
+  component, C-VLAN component, or MAC Bridge.
+
+  The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an
+  address that no conformant C-VLAN component, MAC Bridge can forward;
+  however, it is relayed by TPMR components and S-VLAN components. PDUs
+  using this destination address, or any of the other addresses that appear
+  in Table 8-1 but not in either Table 8-2 or Table 8-3
+  (01-80-C2-00-00-[00,0B,0C,0D,0F]), will be relayed by TPMR components and
+  S-VLAN components but will propagate no further than the nearest C-VLAN
+  component or MAC Bridge.
+
+Because the LLC Entity associated with each Bridge Port is provided via CPU
+port, we must not filter these frames but forward them to CPU port.
+
+In a Bridge, the transmission Port is majorly decided by ingress and egress
+rules, FDB, and spanning tree Port State functions of the Forwarding
+Process. For link-local frames, only CPU port should be designated as
+destination port in the FDB, and the other functions of the Forwarding
+Process must not interfere with the decision of the transmission Port. We
+call this process trapping frames to CPU port.
+
+Therefore, on the switch with CPU port architecture, link-local frames must
+be trapped to CPU port, and certain link-local frames received by a Port of
+a Bridge comprising a TPMR component or an S-VLAN component must be
+excluded from it.
+
+A Bridge of the switch with CPU port architecture cannot comprise a
+Two-Port MAC Relay (TPMR) component as a TPMR component supports only a
+subset of the functionality of a MAC Bridge. A Bridge comprising two Ports
+(Management Port doesn't count) of this architecture will either function
+as a standard MAC Bridge or a standard VLAN Bridge.
+
+Therefore, a Bridge of this architecture can only comprise S-VLAN
+components, C-VLAN components, or MAC Bridge components. Since there's no
+TPMR component, we don't need to relay PDUs using the destination addresses
+specified on the Nearest non-TPMR section, and the proportion of the
+Nearest Customer Bridge section where they must be relayed by TPMR
+components.
+
+One option to trap link-local frames to CPU port is to add static FDB
+entries with CPU port designated as destination port. However, because that
+Independent VLAN Learning (IVL) is being used on every VID, each entry only
+applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC
+Bridge component or a C-VLAN component, there would have to be 16 times
+4096 entries. This switch intellectual property can only hold a maximum of
+2048 entries. Using this option, there also isn't a mechanism to prevent
+link-local frames from being discarded when the spanning tree Port State of
+the reception Port is discarding.
+
+The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4
+registers. Whilst this applies to every VID, it doesn't contain all of the
+reserved MAC addresses without affecting the remaining Standard Group MAC
+Addresses. The REV_UN frame tag utilised using the RGAC4 register covers
+the remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination
+addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF
+destination addresses which may be relayed by MAC Bridges or VLAN Bridges.
+The latter option provides better but not complete conformance.
+
+This switch intellectual property also does not provide a mechanism to trap
+link-local frames with specific destination addresses to CPU port by
+Bridge, to conform to the filtering rules for the distinct Bridge
+components.
+
+Therefore, regardless of the type of the Bridge component, link-local
+frames with these destination addresses will be trapped to CPU port:
+
+01-80-C2-00-00-[00,01,02,03,0E]
+
+In a Bridge comprising a MAC Bridge component or a C-VLAN component:
+
+  Link-local frames with these destination addresses won't be trapped to
+  CPU port which won't conform to IEEE Std 802.1Q-2022:
+
+  01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F]
+
+In a Bridge comprising an S-VLAN component:
+
+  Link-local frames with these destination addresses will be trapped to CPU
+  port which won't conform to IEEE Std 802.1Q-2022:
+
+  01-80-C2-00-00-00
+
+  Link-local frames with these destination addresses won't be trapped to
+  CPU port which won't conform to IEEE Std 802.1Q-2022:
+
+  01-80-C2-00-00-[04,05,06,07,08,09,0A]
+
+Currently on this switch intellectual property, if the spanning tree Port
+State of the reception Port is discarding, link-local frames will be
+discarded.
+
+To trap link-local frames regardless of the spanning tree Port State, make
+the switch regard them as Bridge Protocol Data Units (BPDUs). This switch
+intellectual property only lets the frames regarded as BPDUs bypass the
+spanning tree Port State function of the Forwarding Process.
+
+With this change, the only remaining interference is the ingress rules.
+When the reception Port has no PVID assigned on software, VLAN-untagged
+frames won't be allowed in. There doesn't seem to be a mechanism on the
+switch intellectual property to have link-local frames bypass this function
+of the Forwarding Process.
+
+Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch")
+Reviewed-by: Daniel Golle <daniel@makrotopia.org>
+Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com>
+Link: https://lore.kernel.org/r/20240409-b4-for-net-mt7530-fix-link-local-when-stp-discarding-v2-1-07b1150164ac@arinc9.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/mt7530.c | 229 +++++++++++++++++++++++++++++++++------
+ drivers/net/dsa/mt7530.h |   5 +
+ 2 files changed, 200 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
+index f529bc6f56801..88f081672f6fb 100644
+--- a/drivers/net/dsa/mt7530.c
++++ b/drivers/net/dsa/mt7530.c
+@@ -999,20 +999,173 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
+       mutex_unlock(&priv->reg_mutex);
+ }
+-/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std
+- * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA
+- * must only be propagated to C-VLAN and MAC Bridge components. That means
+- * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports,
+- * these frames are supposed to be processed by the CPU (software). So we make
+- * the switch only forward them to the CPU port. And if received from a CPU
+- * port, forward to a single port. The software is responsible of making the
+- * switch conform to the latter by setting a single port as destination port on
+- * the special tag.
++/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL)
++ * of the Open Systems Interconnection basic reference model (OSI/RM) are
++ * described; the medium access control (MAC) and logical link control (LLC)
++ * sublayers. The MAC sublayer is the one facing the physical layer.
+  *
+- * This switch intellectual property cannot conform to this part of the standard
+- * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC
+- * DAs, it also includes :22-FF which the scope of propagation is not supposed
+- * to be restricted for these MAC DAs.
++ * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A
++ * Bridge component comprises a MAC Relay Entity for interconnecting the Ports
++ * of the Bridge, at least two Ports, and higher layer entities with at least a
++ * Spanning Tree Protocol Entity included.
++ *
++ * Each Bridge Port also functions as an end station and shall provide the MAC
++ * Service to an LLC Entity. Each instance of the MAC Service is provided to a
++ * distinct LLC Entity that supports protocol identification, multiplexing, and
++ * demultiplexing, for protocol data unit (PDU) transmission and reception by
++ * one or more higher layer entities.
++ *
++ * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC
++ * Entity associated with each Bridge Port is modeled as being directly
++ * connected to the attached Local Area Network (LAN).
++ *
++ * On the switch with CPU port architecture, CPU port functions as Management
++ * Port, and the Management Port functionality is provided by software which
++ * functions as an end station. Software is connected to an IEEE 802 LAN that is
++ * wholly contained within the system that incorporates the Bridge. Software
++ * provides access to the LLC Entity associated with each Bridge Port by the
++ * value of the source port field on the special tag on the frame received by
++ * software.
++ *
++ * We call frames that carry control information to determine the active
++ * topology and current extent of each Virtual Local Area Network (VLAN), i.e.,
++ * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration
++ * Protocol Data Units (MVRPDUs), and frames from other link constrained
++ * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and
++ * Link Layer Discovery Protocol (LLDP), link-local frames. They are not
++ * forwarded by a Bridge. Permanently configured entries in the filtering
++ * database (FDB) ensure that such frames are discarded by the Forwarding
++ * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail:
++ *
++ * Each of the reserved MAC addresses specified in Table 8-1
++ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be
++ * permanently configured in the FDB in C-VLAN components and ERs.
++ *
++ * Each of the reserved MAC addresses specified in Table 8-2
++ * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently
++ * configured in the FDB in S-VLAN components.
++ *
++ * Each of the reserved MAC addresses specified in Table 8-3
++ * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in
++ * TPMR components.
++ *
++ * The FDB entries for reserved MAC addresses shall specify filtering for all
++ * Bridge Ports and all VIDs. Management shall not provide the capability to
++ * modify or remove entries for reserved MAC addresses.
++ *
++ * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of
++ * propagation of PDUs within a Bridged Network, as follows:
++ *
++ *   The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no
++ *   conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN)
++ *   component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward.
++ *   PDUs transmitted using this destination address, or any other addresses
++ *   that appear in Table 8-1, Table 8-2, and Table 8-3
++ *   (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can
++ *   therefore travel no further than those stations that can be reached via a
++ *   single individual LAN from the originating station.
++ *
++ *   The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an
++ *   address that no conformant S-VLAN component, C-VLAN component, or MAC
++ *   Bridge can forward; however, this address is relayed by a TPMR component.
++ *   PDUs using this destination address, or any of the other addresses that
++ *   appear in both Table 8-1 and Table 8-2 but not in Table 8-3
++ *   (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by
++ *   any TPMRs but will propagate no further than the nearest S-VLAN component,
++ *   C-VLAN component, or MAC Bridge.
++ *
++ *   The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address
++ *   that no conformant C-VLAN component, MAC Bridge can forward; however, it is
++ *   relayed by TPMR components and S-VLAN components. PDUs using this
++ *   destination address, or any of the other addresses that appear in Table 8-1
++ *   but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]),
++ *   will be relayed by TPMR components and S-VLAN components but will propagate
++ *   no further than the nearest C-VLAN component or MAC Bridge.
++ *
++ * Because the LLC Entity associated with each Bridge Port is provided via CPU
++ * port, we must not filter these frames but forward them to CPU port.
++ *
++ * In a Bridge, the transmission Port is majorly decided by ingress and egress
++ * rules, FDB, and spanning tree Port State functions of the Forwarding Process.
++ * For link-local frames, only CPU port should be designated as destination port
++ * in the FDB, and the other functions of the Forwarding Process must not
++ * interfere with the decision of the transmission Port. We call this process
++ * trapping frames to CPU port.
++ *
++ * Therefore, on the switch with CPU port architecture, link-local frames must
++ * be trapped to CPU port, and certain link-local frames received by a Port of a
++ * Bridge comprising a TPMR component or an S-VLAN component must be excluded
++ * from it.
++ *
++ * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port
++ * MAC Relay (TPMR) component as a TPMR component supports only a subset of the
++ * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port
++ * doesn't count) of this architecture will either function as a standard MAC
++ * Bridge or a standard VLAN Bridge.
++ *
++ * Therefore, a Bridge of this architecture can only comprise S-VLAN components,
++ * C-VLAN components, or MAC Bridge components. Since there's no TPMR component,
++ * we don't need to relay PDUs using the destination addresses specified on the
++ * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge
++ * section where they must be relayed by TPMR components.
++ *
++ * One option to trap link-local frames to CPU port is to add static FDB entries
++ * with CPU port designated as destination port. However, because that
++ * Independent VLAN Learning (IVL) is being used on every VID, each entry only
++ * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC
++ * Bridge component or a C-VLAN component, there would have to be 16 times 4096
++ * entries. This switch intellectual property can only hold a maximum of 2048
++ * entries. Using this option, there also isn't a mechanism to prevent
++ * link-local frames from being discarded when the spanning tree Port State of
++ * the reception Port is discarding.
++ *
++ * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4
++ * registers. Whilst this applies to every VID, it doesn't contain all of the
++ * reserved MAC addresses without affecting the remaining Standard Group MAC
++ * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the
++ * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination
++ * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF
++ * destination addresses which may be relayed by MAC Bridges or VLAN Bridges.
++ * The latter option provides better but not complete conformance.
++ *
++ * This switch intellectual property also does not provide a mechanism to trap
++ * link-local frames with specific destination addresses to CPU port by Bridge,
++ * to conform to the filtering rules for the distinct Bridge components.
++ *
++ * Therefore, regardless of the type of the Bridge component, link-local frames
++ * with these destination addresses will be trapped to CPU port:
++ *
++ * 01-80-C2-00-00-[00,01,02,03,0E]
++ *
++ * In a Bridge comprising a MAC Bridge component or a C-VLAN component:
++ *
++ *   Link-local frames with these destination addresses won't be trapped to CPU
++ *   port which won't conform to IEEE Std 802.1Q-2022:
++ *
++ *   01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F]
++ *
++ * In a Bridge comprising an S-VLAN component:
++ *
++ *   Link-local frames with these destination addresses will be trapped to CPU
++ *   port which won't conform to IEEE Std 802.1Q-2022:
++ *
++ *   01-80-C2-00-00-00
++ *
++ *   Link-local frames with these destination addresses won't be trapped to CPU
++ *   port which won't conform to IEEE Std 802.1Q-2022:
++ *
++ *   01-80-C2-00-00-[04,05,06,07,08,09,0A]
++ *
++ * To trap link-local frames to CPU port as conformant as this switch
++ * intellectual property can allow, link-local frames are made to be regarded as
++ * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual
++ * property only lets the frames regarded as BPDUs bypass the spanning tree Port
++ * State function of the Forwarding Process.
++ *
++ * The only remaining interference is the ingress rules. When the reception Port
++ * has no PVID assigned on software, VLAN-untagged frames won't be allowed in.
++ * There doesn't seem to be a mechanism on the switch intellectual property to
++ * have link-local frames bypass this function of the Forwarding Process.
+  */
+ static void
+ mt753x_trap_frames(struct mt7530_priv *priv)
+@@ -1020,35 +1173,43 @@ mt753x_trap_frames(struct mt7530_priv *priv)
+       /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them
+        * VLAN-untagged.
+        */
+-      mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK |
+-                 MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
+-                 MT753X_BPDU_PORT_FW_MASK,
+-                 MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+-                 MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+-                 MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+-                 MT753X_BPDU_CPU_ONLY);
++      mt7530_rmw(priv, MT753X_BPC,
++                 MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK |
++                         MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
++                         MT753X_BPDU_PORT_FW_MASK,
++                 MT753X_PAE_BPDU_FR |
++                         MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
++                         MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
++                         MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
++                         MT753X_BPDU_CPU_ONLY);
+       /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress
+        * them VLAN-untagged.
+        */
+-      mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK |
+-                 MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK |
+-                 MT753X_R01_PORT_FW_MASK,
+-                 MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+-                 MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+-                 MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+-                 MT753X_BPDU_CPU_ONLY);
++      mt7530_rmw(priv, MT753X_RGAC1,
++                 MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK |
++                         MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR |
++                         MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK,
++                 MT753X_R02_BPDU_FR |
++                         MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
++                         MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
++                         MT753X_R01_BPDU_FR |
++                         MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
++                         MT753X_BPDU_CPU_ONLY);
+       /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress
+        * them VLAN-untagged.
+        */
+-      mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK |
+-                 MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK |
+-                 MT753X_R03_PORT_FW_MASK,
+-                 MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+-                 MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+-                 MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+-                 MT753X_BPDU_CPU_ONLY);
++      mt7530_rmw(priv, MT753X_RGAC2,
++                 MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK |
++                         MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR |
++                         MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK,
++                 MT753X_R0E_BPDU_FR |
++                         MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
++                         MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
++                         MT753X_R03_BPDU_FR |
++                         MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
++                         MT753X_BPDU_CPU_ONLY);
+ }
+ static int
+diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
+index 75bc9043c8c0a..ddefeb69afda1 100644
+--- a/drivers/net/dsa/mt7530.h
++++ b/drivers/net/dsa/mt7530.h
+@@ -65,6 +65,7 @@ enum mt753x_id {
+ /* Registers for BPDU and PAE frame control*/
+ #define MT753X_BPC                    0x24
++#define  MT753X_PAE_BPDU_FR           BIT(25)
+ #define  MT753X_PAE_EG_TAG_MASK               GENMASK(24, 22)
+ #define  MT753X_PAE_EG_TAG(x)         FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x)
+ #define  MT753X_PAE_PORT_FW_MASK      GENMASK(18, 16)
+@@ -75,20 +76,24 @@ enum mt753x_id {
+ /* Register for :01 and :02 MAC DA frame control */
+ #define MT753X_RGAC1                  0x28
++#define  MT753X_R02_BPDU_FR           BIT(25)
+ #define  MT753X_R02_EG_TAG_MASK               GENMASK(24, 22)
+ #define  MT753X_R02_EG_TAG(x)         FIELD_PREP(MT753X_R02_EG_TAG_MASK, x)
+ #define  MT753X_R02_PORT_FW_MASK      GENMASK(18, 16)
+ #define  MT753X_R02_PORT_FW(x)                FIELD_PREP(MT753X_R02_PORT_FW_MASK, x)
++#define  MT753X_R01_BPDU_FR           BIT(9)
+ #define  MT753X_R01_EG_TAG_MASK               GENMASK(8, 6)
+ #define  MT753X_R01_EG_TAG(x)         FIELD_PREP(MT753X_R01_EG_TAG_MASK, x)
+ #define  MT753X_R01_PORT_FW_MASK      GENMASK(2, 0)
+ /* Register for :03 and :0E MAC DA frame control */
+ #define MT753X_RGAC2                  0x2c
++#define  MT753X_R0E_BPDU_FR           BIT(25)
+ #define  MT753X_R0E_EG_TAG_MASK               GENMASK(24, 22)
+ #define  MT753X_R0E_EG_TAG(x)         FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x)
+ #define  MT753X_R0E_PORT_FW_MASK      GENMASK(18, 16)
+ #define  MT753X_R0E_PORT_FW(x)                FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x)
++#define  MT753X_R03_BPDU_FR           BIT(9)
+ #define  MT753X_R03_EG_TAG_MASK               GENMASK(8, 6)
+ #define  MT753X_R03_EG_TAG(x)         FIELD_PREP(MT753X_R03_EG_TAG_MASK, x)
+ #define  MT753X_R03_PORT_FW_MASK      GENMASK(2, 0)
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-fix-incorrect-descriptor-free-behavior.patch b/queue-6.6/net-ena-fix-incorrect-descriptor-free-behavior.patch
new file mode 100644 (file)
index 0000000..35cad99
--- /dev/null
@@ -0,0 +1,72 @@
+From b005f5abd617e090d4e2677190cb263fdfc36d29 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Apr 2024 09:13:57 +0000
+Subject: net: ena: Fix incorrect descriptor free behavior
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit bf02d9fe00632d22fa91d34749c7aacf397b6cde ]
+
+ENA has two types of TX queues:
+- queues which only process TX packets arriving from the network stack
+- queues which only process TX packets forwarded to it by XDP_REDIRECT
+  or XDP_TX instructions
+
+The ena_free_tx_bufs() cycles through all descriptors in a TX queue
+and unmaps + frees every descriptor that hasn't been acknowledged yet
+by the device (uncompleted TX transactions).
+The function assumes that the processed TX queue is necessarily from
+the first category listed above and ends up using napi_consume_skb()
+for descriptors belonging to an XDP specific queue.
+
+This patch solves a bug in which, in case of a VF reset, the
+descriptors aren't freed correctly, leading to crashes.
+
+Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action")
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 5178eb089eabe..fd34f01a60b5c 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1205,8 +1205,11 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
+ {
+       bool print_once = true;
++      bool is_xdp_ring;
+       u32 i;
++      is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
++
+       for (i = 0; i < tx_ring->ring_size; i++) {
+               struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
+@@ -1226,10 +1229,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
+               ena_unmap_tx_buff(tx_ring, tx_info);
+-              dev_kfree_skb_any(tx_info->skb);
++              if (is_xdp_ring)
++                      xdp_return_frame(tx_info->xdpf);
++              else
++                      dev_kfree_skb_any(tx_info->skb);
+       }
+-      netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+-                                                tx_ring->qid));
++
++      if (!is_xdp_ring)
++              netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
++                                                        tx_ring->qid));
+ }
+ static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-fix-potential-sign-extension-issue.patch b/queue-6.6/net-ena-fix-potential-sign-extension-issue.patch
new file mode 100644 (file)
index 0000000..f6fa260
--- /dev/null
@@ -0,0 +1,66 @@
+From bdc28f0c5ca5f0ff43d41fe00ba9dd5a1e8f43ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Apr 2024 09:13:55 +0000
+Subject: net: ena: Fix potential sign extension issue
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 713a85195aad25d8a26786a37b674e3e5ec09e3c ]
+
+Small unsigned types are promoted to larger signed types in
+the case of multiplication, the result of which may overflow.
+In case the result of such a multiplication has its MSB
+turned on, it will be sign extended with '1's.
+This changes the multiplication result.
+
+Code example of the phenomenon:
+-------------------------------
+u16 x, y;
+size_t z1, z2;
+
+x = y = 0xffff;
+printk("x=%x y=%x\n",x,y);
+
+z1 = x*y;
+z2 = (size_t)x*y;
+
+printk("z1=%lx z2=%lx\n", z1, z2);
+
+Output:
+-------
+x=ffff y=ffff
+z1=fffffffffffe0001 z2=fffe0001
+
+The expected result of ffff*ffff is fffe0001, and without the
+explicit casting to avoid the unwanted sign extension we got
+fffffffffffe0001.
+
+This commit adds an explicit casting to avoid the sign extension
+issue.
+
+Fixes: 689b2bdaaa14 ("net: ena: add functions for handling Low Latency Queues in ena_com")
+Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_com.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
+index 633b321d7fdd9..4db689372980e 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_com.c
++++ b/drivers/net/ethernet/amazon/ena/ena_com.c
+@@ -362,7 +362,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
+                       ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
+               io_sq->bounce_buf_ctrl.next_to_use = 0;
+-              size = io_sq->bounce_buf_ctrl.buffer_size *
++              size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
+                       io_sq->bounce_buf_ctrl.buffers_num;
+               dev_node = dev_to_node(ena_dev->dmadev);
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-move-xdp-code-to-its-new-files.patch b/queue-6.6/net-ena-move-xdp-code-to-its-new-files.patch
new file mode 100644 (file)
index 0000000..b655bbd
--- /dev/null
@@ -0,0 +1,1572 @@
+From 22c8c4c4c64db852f2d51a3f500c2966da39cbde Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Jan 2024 19:08:45 +0000
+Subject: net: ena: Move XDP code to its new files
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit d000574d02870710c62751148cbfe22993222b98 ]
+
+XDP system has a very large footprint in the driver's overall code.
+makes the whole driver's code much harder to read.
+
+Moving XDP code to dedicated files.
+
+This patch doesn't make any changes to the code itself and only
+cut-pastes the code into ena_xdp.c and ena_xdp.h files so the change
+is purely cosmetic.
+
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Link: https://lore.kernel.org/r/20240101190855.18739-2-darinzon@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 36a1ca01f045 ("net: ena: Set tx_info->xdpf value to NULL")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../device_drivers/ethernet/amazon/ena.rst    |   1 +
+ drivers/net/ethernet/amazon/ena/Makefile      |   2 +-
+ drivers/net/ethernet/amazon/ena/ena_ethtool.c |   1 +
+ drivers/net/ethernet/amazon/ena/ena_netdev.c  | 634 +-----------------
+ drivers/net/ethernet/amazon/ena/ena_netdev.h  |  82 +--
+ drivers/net/ethernet/amazon/ena/ena_xdp.c     | 465 +++++++++++++
+ drivers/net/ethernet/amazon/ena/ena_xdp.h     | 152 +++++
+ 7 files changed, 680 insertions(+), 657 deletions(-)
+ create mode 100644 drivers/net/ethernet/amazon/ena/ena_xdp.c
+ create mode 100644 drivers/net/ethernet/amazon/ena/ena_xdp.h
+
+diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
+index 5eaa3ab6c73e7..b842bcb14255b 100644
+--- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
++++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
+@@ -54,6 +54,7 @@ ena_common_defs.h   Common definitions for ena_com layer.
+ ena_regs_defs.h     Definition of ENA PCI memory-mapped (MMIO) registers.
+ ena_netdev.[ch]     Main Linux kernel driver.
+ ena_ethtool.c       ethtool callbacks.
++ena_xdp.[ch]        XDP files
+ ena_pci_id_tbl.h    Supported device IDs.
+ =================   ======================================================
+diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile
+index f1f752a8f7bb4..6ab615365172e 100644
+--- a/drivers/net/ethernet/amazon/ena/Makefile
++++ b/drivers/net/ethernet/amazon/ena/Makefile
+@@ -5,4 +5,4 @@
+ obj-$(CONFIG_ENA_ETHERNET) += ena.o
+-ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o
++ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o ena_xdp.o
+diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+index d671df4b76bc7..d901877544445 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
++++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+@@ -7,6 +7,7 @@
+ #include <linux/pci.h>
+ #include "ena_netdev.h"
++#include "ena_xdp.h"
+ struct ena_stats {
+       char name[ETH_GSTRING_LEN];
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index fd34f01a60b5c..1e74386829c42 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -19,8 +19,8 @@
+ #include <net/ip.h>
+ #include "ena_netdev.h"
+-#include <linux/bpf_trace.h>
+ #include "ena_pci_id_tbl.h"
++#include "ena_xdp.h"
+ MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
+ MODULE_DESCRIPTION(DEVICE_NAME);
+@@ -45,53 +45,6 @@ static void check_for_admin_com_state(struct ena_adapter *adapter);
+ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
+ static int ena_restore_device(struct ena_adapter *adapter);
+-static void ena_init_io_rings(struct ena_adapter *adapter,
+-                            int first_index, int count);
+-static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
+-                                 int count);
+-static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
+-                                int count);
+-static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
+-static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+-                                         int first_index,
+-                                         int count);
+-static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
+-static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
+-static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
+-static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
+-static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
+-static void ena_napi_disable_in_range(struct ena_adapter *adapter,
+-                                    int first_index, int count);
+-static void ena_napi_enable_in_range(struct ena_adapter *adapter,
+-                                   int first_index, int count);
+-static int ena_up(struct ena_adapter *adapter);
+-static void ena_down(struct ena_adapter *adapter);
+-static void ena_unmask_interrupt(struct ena_ring *tx_ring,
+-                               struct ena_ring *rx_ring);
+-static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+-                                    struct ena_ring *rx_ring);
+-static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+-                            struct ena_tx_buffer *tx_info);
+-static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+-                                          int first_index, int count);
+-static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
+-                                                int first_index, int count);
+-
+-/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */
+-static void ena_increase_stat(u64 *statp, u64 cnt,
+-                            struct u64_stats_sync *syncp)
+-{
+-      u64_stats_update_begin(syncp);
+-      (*statp) += cnt;
+-      u64_stats_update_end(syncp);
+-}
+-
+-static void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
+-{
+-      ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+-      ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
+-}
+-
+ static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
+ {
+       struct ena_adapter *adapter = netdev_priv(dev);
+@@ -135,12 +88,12 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu)
+       return ret;
+ }
+-static int ena_xmit_common(struct net_device *dev,
+-                         struct ena_ring *ring,
+-                         struct ena_tx_buffer *tx_info,
+-                         struct ena_com_tx_ctx *ena_tx_ctx,
+-                         u16 next_to_use,
+-                         u32 bytes)
++int ena_xmit_common(struct net_device *dev,
++                  struct ena_ring *ring,
++                  struct ena_tx_buffer *tx_info,
++                  struct ena_com_tx_ctx *ena_tx_ctx,
++                  u16 next_to_use,
++                  u32 bytes)
+ {
+       struct ena_adapter *adapter = netdev_priv(dev);
+       int rc, nb_hw_desc;
+@@ -186,467 +139,6 @@ static int ena_xmit_common(struct net_device *dev,
+       return 0;
+ }
+-/* This is the XDP napi callback. XDP queues use a separate napi callback
+- * than Rx/Tx queues.
+- */
+-static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
+-{
+-      struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+-      u32 xdp_work_done, xdp_budget;
+-      struct ena_ring *xdp_ring;
+-      int napi_comp_call = 0;
+-      int ret;
+-
+-      xdp_ring = ena_napi->xdp_ring;
+-
+-      xdp_budget = budget;
+-
+-      if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
+-          test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
+-              napi_complete_done(napi, 0);
+-              return 0;
+-      }
+-
+-      xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
+-
+-      /* If the device is about to reset or down, avoid unmask
+-       * the interrupt and return 0 so NAPI won't reschedule
+-       */
+-      if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
+-              napi_complete_done(napi, 0);
+-              ret = 0;
+-      } else if (xdp_budget > xdp_work_done) {
+-              napi_comp_call = 1;
+-              if (napi_complete_done(napi, xdp_work_done))
+-                      ena_unmask_interrupt(xdp_ring, NULL);
+-              ena_update_ring_numa_node(xdp_ring, NULL);
+-              ret = xdp_work_done;
+-      } else {
+-              ret = xdp_budget;
+-      }
+-
+-      u64_stats_update_begin(&xdp_ring->syncp);
+-      xdp_ring->tx_stats.napi_comp += napi_comp_call;
+-      xdp_ring->tx_stats.tx_poll++;
+-      u64_stats_update_end(&xdp_ring->syncp);
+-      xdp_ring->tx_stats.last_napi_jiffies = jiffies;
+-
+-      return ret;
+-}
+-
+-static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
+-                              struct ena_tx_buffer *tx_info,
+-                              struct xdp_frame *xdpf,
+-                              struct ena_com_tx_ctx *ena_tx_ctx)
+-{
+-      struct ena_adapter *adapter = xdp_ring->adapter;
+-      struct ena_com_buf *ena_buf;
+-      int push_len = 0;
+-      dma_addr_t dma;
+-      void *data;
+-      u32 size;
+-
+-      tx_info->xdpf = xdpf;
+-      data = tx_info->xdpf->data;
+-      size = tx_info->xdpf->len;
+-
+-      if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+-              /* Designate part of the packet for LLQ */
+-              push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
+-
+-              ena_tx_ctx->push_header = data;
+-
+-              size -= push_len;
+-              data += push_len;
+-      }
+-
+-      ena_tx_ctx->header_len = push_len;
+-
+-      if (size > 0) {
+-              dma = dma_map_single(xdp_ring->dev,
+-                                   data,
+-                                   size,
+-                                   DMA_TO_DEVICE);
+-              if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
+-                      goto error_report_dma_error;
+-
+-              tx_info->map_linear_data = 0;
+-
+-              ena_buf = tx_info->bufs;
+-              ena_buf->paddr = dma;
+-              ena_buf->len = size;
+-
+-              ena_tx_ctx->ena_bufs = ena_buf;
+-              ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
+-      }
+-
+-      return 0;
+-
+-error_report_dma_error:
+-      ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1,
+-                        &xdp_ring->syncp);
+-      netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
+-
+-      return -EINVAL;
+-}
+-
+-static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+-                            struct net_device *dev,
+-                            struct xdp_frame *xdpf,
+-                            int flags)
+-{
+-      struct ena_com_tx_ctx ena_tx_ctx = {};
+-      struct ena_tx_buffer *tx_info;
+-      u16 next_to_use, req_id;
+-      int rc;
+-
+-      next_to_use = xdp_ring->next_to_use;
+-      req_id = xdp_ring->free_ids[next_to_use];
+-      tx_info = &xdp_ring->tx_buffer_info[req_id];
+-      tx_info->num_of_bufs = 0;
+-
+-      rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
+-      if (unlikely(rc))
+-              return rc;
+-
+-      ena_tx_ctx.req_id = req_id;
+-
+-      rc = ena_xmit_common(dev,
+-                           xdp_ring,
+-                           tx_info,
+-                           &ena_tx_ctx,
+-                           next_to_use,
+-                           xdpf->len);
+-      if (rc)
+-              goto error_unmap_dma;
+-
+-      /* trigger the dma engine. ena_ring_tx_doorbell()
+-       * calls a memory barrier inside it.
+-       */
+-      if (flags & XDP_XMIT_FLUSH)
+-              ena_ring_tx_doorbell(xdp_ring);
+-
+-      return rc;
+-
+-error_unmap_dma:
+-      ena_unmap_tx_buff(xdp_ring, tx_info);
+-      tx_info->xdpf = NULL;
+-      return rc;
+-}
+-
+-static int ena_xdp_xmit(struct net_device *dev, int n,
+-                      struct xdp_frame **frames, u32 flags)
+-{
+-      struct ena_adapter *adapter = netdev_priv(dev);
+-      struct ena_ring *xdp_ring;
+-      int qid, i, nxmit = 0;
+-
+-      if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+-              return -EINVAL;
+-
+-      if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+-              return -ENETDOWN;
+-
+-      /* We assume that all rings have the same XDP program */
+-      if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog))
+-              return -ENXIO;
+-
+-      qid = smp_processor_id() % adapter->xdp_num_queues;
+-      qid += adapter->xdp_first_ring;
+-      xdp_ring = &adapter->tx_ring[qid];
+-
+-      /* Other CPU ids might try to send thorugh this queue */
+-      spin_lock(&xdp_ring->xdp_tx_lock);
+-
+-      for (i = 0; i < n; i++) {
+-              if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0))
+-                      break;
+-              nxmit++;
+-      }
+-
+-      /* Ring doorbell to make device aware of the packets */
+-      if (flags & XDP_XMIT_FLUSH)
+-              ena_ring_tx_doorbell(xdp_ring);
+-
+-      spin_unlock(&xdp_ring->xdp_tx_lock);
+-
+-      /* Return number of packets sent */
+-      return nxmit;
+-}
+-
+-static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+-{
+-      u32 verdict = ENA_XDP_PASS;
+-      struct bpf_prog *xdp_prog;
+-      struct ena_ring *xdp_ring;
+-      struct xdp_frame *xdpf;
+-      u64 *xdp_stat;
+-
+-      xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
+-
+-      if (!xdp_prog)
+-              goto out;
+-
+-      verdict = bpf_prog_run_xdp(xdp_prog, xdp);
+-
+-      switch (verdict) {
+-      case XDP_TX:
+-              xdpf = xdp_convert_buff_to_frame(xdp);
+-              if (unlikely(!xdpf)) {
+-                      trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+-                      xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+-                      verdict = ENA_XDP_DROP;
+-                      break;
+-              }
+-
+-              /* Find xmit queue */
+-              xdp_ring = rx_ring->xdp_ring;
+-
+-              /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
+-              spin_lock(&xdp_ring->xdp_tx_lock);
+-
+-              if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf,
+-                                     XDP_XMIT_FLUSH))
+-                      xdp_return_frame(xdpf);
+-
+-              spin_unlock(&xdp_ring->xdp_tx_lock);
+-              xdp_stat = &rx_ring->rx_stats.xdp_tx;
+-              verdict = ENA_XDP_TX;
+-              break;
+-      case XDP_REDIRECT:
+-              if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
+-                      xdp_stat = &rx_ring->rx_stats.xdp_redirect;
+-                      verdict = ENA_XDP_REDIRECT;
+-                      break;
+-              }
+-              trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+-              xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+-              verdict = ENA_XDP_DROP;
+-              break;
+-      case XDP_ABORTED:
+-              trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+-              xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+-              verdict = ENA_XDP_DROP;
+-              break;
+-      case XDP_DROP:
+-              xdp_stat = &rx_ring->rx_stats.xdp_drop;
+-              verdict = ENA_XDP_DROP;
+-              break;
+-      case XDP_PASS:
+-              xdp_stat = &rx_ring->rx_stats.xdp_pass;
+-              verdict = ENA_XDP_PASS;
+-              break;
+-      default:
+-              bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
+-              xdp_stat = &rx_ring->rx_stats.xdp_invalid;
+-              verdict = ENA_XDP_DROP;
+-      }
+-
+-      ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
+-out:
+-      return verdict;
+-}
+-
+-static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
+-{
+-      adapter->xdp_first_ring = adapter->num_io_queues;
+-      adapter->xdp_num_queues = adapter->num_io_queues;
+-
+-      ena_init_io_rings(adapter,
+-                        adapter->xdp_first_ring,
+-                        adapter->xdp_num_queues);
+-}
+-
+-static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
+-{
+-      u32 xdp_first_ring = adapter->xdp_first_ring;
+-      u32 xdp_num_queues = adapter->xdp_num_queues;
+-      int rc = 0;
+-
+-      rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
+-      if (rc)
+-              goto setup_err;
+-
+-      rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues);
+-      if (rc)
+-              goto create_err;
+-
+-      return 0;
+-
+-create_err:
+-      ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
+-setup_err:
+-      return rc;
+-}
+-
+-/* Provides a way for both kernel and bpf-prog to know
+- * more about the RX-queue a given XDP frame arrived on.
+- */
+-static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
+-{
+-      int rc;
+-
+-      rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0);
+-
+-      if (rc) {
+-              netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+-                        "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
+-                        rx_ring->qid, rc);
+-              goto err;
+-      }
+-
+-      rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
+-                                      NULL);
+-
+-      if (rc) {
+-              netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+-                        "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
+-                        rx_ring->qid, rc);
+-              xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+-      }
+-
+-err:
+-      return rc;
+-}
+-
+-static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
+-{
+-      xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
+-      xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+-}
+-
+-static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+-                                               struct bpf_prog *prog,
+-                                               int first, int count)
+-{
+-      struct bpf_prog *old_bpf_prog;
+-      struct ena_ring *rx_ring;
+-      int i = 0;
+-
+-      for (i = first; i < count; i++) {
+-              rx_ring = &adapter->rx_ring[i];
+-              old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog);
+-
+-              if (!old_bpf_prog && prog) {
+-                      ena_xdp_register_rxq_info(rx_ring);
+-                      rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
+-              } else if (old_bpf_prog && !prog) {
+-                      ena_xdp_unregister_rxq_info(rx_ring);
+-                      rx_ring->rx_headroom = NET_SKB_PAD;
+-              }
+-      }
+-}
+-
+-static void ena_xdp_exchange_program(struct ena_adapter *adapter,
+-                                   struct bpf_prog *prog)
+-{
+-      struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
+-
+-      ena_xdp_exchange_program_rx_in_range(adapter,
+-                                           prog,
+-                                           0,
+-                                           adapter->num_io_queues);
+-
+-      if (old_bpf_prog)
+-              bpf_prog_put(old_bpf_prog);
+-}
+-
+-static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
+-{
+-      bool was_up;
+-      int rc;
+-
+-      was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+-
+-      if (was_up)
+-              ena_down(adapter);
+-
+-      adapter->xdp_first_ring = 0;
+-      adapter->xdp_num_queues = 0;
+-      ena_xdp_exchange_program(adapter, NULL);
+-      if (was_up) {
+-              rc = ena_up(adapter);
+-              if (rc)
+-                      return rc;
+-      }
+-      return 0;
+-}
+-
+-static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
+-{
+-      struct ena_adapter *adapter = netdev_priv(netdev);
+-      struct bpf_prog *prog = bpf->prog;
+-      struct bpf_prog *old_bpf_prog;
+-      int rc, prev_mtu;
+-      bool is_up;
+-
+-      is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+-      rc = ena_xdp_allowed(adapter);
+-      if (rc == ENA_XDP_ALLOWED) {
+-              old_bpf_prog = adapter->xdp_bpf_prog;
+-              if (prog) {
+-                      if (!is_up) {
+-                              ena_init_all_xdp_queues(adapter);
+-                      } else if (!old_bpf_prog) {
+-                              ena_down(adapter);
+-                              ena_init_all_xdp_queues(adapter);
+-                      }
+-                      ena_xdp_exchange_program(adapter, prog);
+-
+-                      if (is_up && !old_bpf_prog) {
+-                              rc = ena_up(adapter);
+-                              if (rc)
+-                                      return rc;
+-                      }
+-                      xdp_features_set_redirect_target(netdev, false);
+-              } else if (old_bpf_prog) {
+-                      xdp_features_clear_redirect_target(netdev);
+-                      rc = ena_destroy_and_free_all_xdp_queues(adapter);
+-                      if (rc)
+-                              return rc;
+-              }
+-
+-              prev_mtu = netdev->max_mtu;
+-              netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
+-
+-              if (!old_bpf_prog)
+-                      netif_info(adapter, drv, adapter->netdev,
+-                                 "XDP program is set, changing the max_mtu from %d to %d",
+-                                 prev_mtu, netdev->max_mtu);
+-
+-      } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
+-              netif_err(adapter, drv, adapter->netdev,
+-                        "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
+-                        netdev->mtu, ENA_XDP_MAX_MTU);
+-              NL_SET_ERR_MSG_MOD(bpf->extack,
+-                                 "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
+-              return -EINVAL;
+-      } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
+-              netif_err(adapter, drv, adapter->netdev,
+-                        "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
+-                        adapter->num_io_queues, adapter->max_num_io_queues);
+-              NL_SET_ERR_MSG_MOD(bpf->extack,
+-                                 "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
+-              return -EINVAL;
+-      }
+-
+-      return 0;
+-}
+-
+-/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
+- * program as well as to query the current xdp program id.
+- */
+-static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+-{
+-      switch (bpf->command) {
+-      case XDP_SETUP_PROG:
+-              return ena_xdp_set(netdev, bpf);
+-      default:
+-              return -EINVAL;
+-      }
+-      return 0;
+-}
+-
+ static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
+ {
+ #ifdef CONFIG_RFS_ACCEL
+@@ -688,8 +180,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
+       u64_stats_init(&ring->syncp);
+ }
+-static void ena_init_io_rings(struct ena_adapter *adapter,
+-                            int first_index, int count)
++void ena_init_io_rings(struct ena_adapter *adapter,
++                     int first_index, int count)
+ {
+       struct ena_com_dev *ena_dev;
+       struct ena_ring *txr, *rxr;
+@@ -820,9 +312,8 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
+       tx_ring->push_buf_intermediate_buf = NULL;
+ }
+-static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+-                                         int first_index,
+-                                         int count)
++int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
++                                  int first_index, int count)
+ {
+       int i, rc = 0;
+@@ -845,8 +336,8 @@ static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+       return rc;
+ }
+-static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
+-                                                int first_index, int count)
++void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
++                                         int first_index, int count)
+ {
+       int i;
+@@ -859,7 +350,7 @@ static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
+  *
+  * Free all transmit software resources
+  */
+-static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
++void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
+ {
+       ena_free_all_io_tx_resources_in_range(adapter,
+                                             0,
+@@ -1169,8 +660,8 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
+               ena_free_rx_bufs(adapter, i);
+ }
+-static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+-                            struct ena_tx_buffer *tx_info)
++void ena_unmap_tx_buff(struct ena_ring *tx_ring,
++                     struct ena_tx_buffer *tx_info)
+ {
+       struct ena_com_buf *ena_buf;
+       u32 cnt;
+@@ -1280,8 +771,8 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
+       ena_destroy_all_rx_queues(adapter);
+ }
+-static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
+-                               struct ena_tx_buffer *tx_info, bool is_xdp)
++int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
++                        struct ena_tx_buffer *tx_info, bool is_xdp)
+ {
+       if (tx_info)
+               netif_err(ring->adapter,
+@@ -1313,17 +804,6 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
+       return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
+ }
+-static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
+-{
+-      struct ena_tx_buffer *tx_info;
+-
+-      tx_info = &xdp_ring->tx_buffer_info[req_id];
+-      if (likely(tx_info->xdpf))
+-              return 0;
+-
+-      return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
+-}
+-
+ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
+ {
+       struct netdev_queue *txq;
+@@ -1696,6 +1176,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u
+       return ret;
+ }
++
+ /* ena_clean_rx_irq - Cleanup RX irq
+  * @rx_ring: RX ring to clean
+  * @napi: napi handler
+@@ -1888,8 +1369,8 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
+       rx_ring->per_napi_packets = 0;
+ }
+-static void ena_unmask_interrupt(struct ena_ring *tx_ring,
+-                                      struct ena_ring *rx_ring)
++void ena_unmask_interrupt(struct ena_ring *tx_ring,
++                        struct ena_ring *rx_ring)
+ {
+       u32 rx_interval = tx_ring->smoothed_interval;
+       struct ena_eth_io_intr_reg intr_reg;
+@@ -1921,8 +1402,8 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
+       ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
+ }
+-static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+-                                           struct ena_ring *rx_ring)
++void ena_update_ring_numa_node(struct ena_ring *tx_ring,
++                             struct ena_ring *rx_ring)
+ {
+       int cpu = get_cpu();
+       int numa_node;
+@@ -1957,67 +1438,6 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+       put_cpu();
+ }
+-static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
+-{
+-      u32 total_done = 0;
+-      u16 next_to_clean;
+-      int tx_pkts = 0;
+-      u16 req_id;
+-      int rc;
+-
+-      if (unlikely(!xdp_ring))
+-              return 0;
+-      next_to_clean = xdp_ring->next_to_clean;
+-
+-      while (tx_pkts < budget) {
+-              struct ena_tx_buffer *tx_info;
+-              struct xdp_frame *xdpf;
+-
+-              rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
+-                                              &req_id);
+-              if (rc) {
+-                      if (unlikely(rc == -EINVAL))
+-                              handle_invalid_req_id(xdp_ring, req_id, NULL,
+-                                                    true);
+-                      break;
+-              }
+-
+-              /* validate that the request id points to a valid xdp_frame */
+-              rc = validate_xdp_req_id(xdp_ring, req_id);
+-              if (rc)
+-                      break;
+-
+-              tx_info = &xdp_ring->tx_buffer_info[req_id];
+-              xdpf = tx_info->xdpf;
+-
+-              tx_info->xdpf = NULL;
+-              tx_info->last_jiffies = 0;
+-              ena_unmap_tx_buff(xdp_ring, tx_info);
+-
+-              netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
+-                        "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
+-                        xdpf);
+-
+-              tx_pkts++;
+-              total_done += tx_info->tx_descs;
+-
+-              xdp_return_frame(xdpf);
+-              xdp_ring->free_ids[next_to_clean] = req_id;
+-              next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+-                                                   xdp_ring->ring_size);
+-      }
+-
+-      xdp_ring->next_to_clean = next_to_clean;
+-      ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
+-      ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
+-
+-      netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
+-                "tx_poll: q %d done. total pkts: %d\n",
+-                xdp_ring->qid, tx_pkts);
+-
+-      return tx_pkts;
+-}
+-
+ static int ena_io_poll(struct napi_struct *napi, int budget)
+ {
+       struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+@@ -2483,8 +1903,8 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
+       return rc;
+ }
+-static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+-                                          int first_index, int count)
++int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
++                                   int first_index, int count)
+ {
+       struct ena_com_dev *ena_dev = adapter->ena_dev;
+       int rc, i;
+@@ -2694,7 +2114,7 @@ static int create_queues_with_size_backoff(struct ena_adapter *adapter)
+       }
+ }
+-static int ena_up(struct ena_adapter *adapter)
++int ena_up(struct ena_adapter *adapter)
+ {
+       int io_queue_count, rc, i;
+@@ -2756,7 +2176,7 @@ static int ena_up(struct ena_adapter *adapter)
+       return rc;
+ }
+-static void ena_down(struct ena_adapter *adapter)
++void ena_down(struct ena_adapter *adapter)
+ {
+       int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+index 33c923e1261a3..041f08d20b450 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+@@ -110,19 +110,6 @@
+ #define ENA_MMIO_DISABLE_REG_READ     BIT(0)
+-/* The max MTU size is configured to be the ethernet frame size without
+- * the overhead of the ethernet header, which can have a VLAN header, and
+- * a frame check sequence (FCS).
+- * The buffer size we share with the device is defined to be ENA_PAGE_SIZE
+- */
+-
+-#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN -     \
+-                       VLAN_HLEN - XDP_PACKET_HEADROOM -              \
+-                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+-
+-#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
+-      ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))
+-
+ struct ena_irq {
+       irq_handler_t handler;
+       void *data;
+@@ -421,47 +408,44 @@ static inline void ena_reset_device(struct ena_adapter *adapter,
+       set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ }
+-enum ena_xdp_errors_t {
+-      ENA_XDP_ALLOWED = 0,
+-      ENA_XDP_CURRENT_MTU_TOO_LARGE,
+-      ENA_XDP_NO_ENOUGH_QUEUES,
+-};
+-
+-enum ENA_XDP_ACTIONS {
+-      ENA_XDP_PASS            = 0,
+-      ENA_XDP_TX              = BIT(0),
+-      ENA_XDP_REDIRECT        = BIT(1),
+-      ENA_XDP_DROP            = BIT(2)
+-};
+-
+-#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT)
+-
+-static inline bool ena_xdp_present(struct ena_adapter *adapter)
+-{
+-      return !!adapter->xdp_bpf_prog;
+-}
+-
+-static inline bool ena_xdp_present_ring(struct ena_ring *ring)
+-{
+-      return !!ring->xdp_bpf_prog;
+-}
++int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
++                        struct ena_tx_buffer *tx_info, bool is_xdp);
+-static inline bool ena_xdp_legal_queue_count(struct ena_adapter *adapter,
+-                                           u32 queues)
++/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */
++static inline void ena_increase_stat(u64 *statp, u64 cnt,
++                                   struct u64_stats_sync *syncp)
+ {
+-      return 2 * queues <= adapter->max_num_io_queues;
++      u64_stats_update_begin(syncp);
++      (*statp) += cnt;
++      u64_stats_update_end(syncp);
+ }
+-static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter)
++static inline void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
+ {
+-      enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED;
+-
+-      if (adapter->netdev->mtu > ENA_XDP_MAX_MTU)
+-              rc = ENA_XDP_CURRENT_MTU_TOO_LARGE;
+-      else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
+-              rc = ENA_XDP_NO_ENOUGH_QUEUES;
+-
+-      return rc;
++      ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
++      ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
+ }
++int ena_xmit_common(struct net_device *dev,
++                  struct ena_ring *ring,
++                  struct ena_tx_buffer *tx_info,
++                  struct ena_com_tx_ctx *ena_tx_ctx,
++                  u16 next_to_use,
++                  u32 bytes);
++void ena_unmap_tx_buff(struct ena_ring *tx_ring,
++                     struct ena_tx_buffer *tx_info);
++void ena_init_io_rings(struct ena_adapter *adapter,
++                     int first_index, int count);
++int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
++                                   int first_index, int count);
++int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
++                                  int first_index, int count);
++void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
++                                         int first_index, int count);
++void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
++void ena_down(struct ena_adapter *adapter);
++int ena_up(struct ena_adapter *adapter);
++void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring);
++void ena_update_ring_numa_node(struct ena_ring *tx_ring,
++                             struct ena_ring *rx_ring);
+ #endif /* !(ENA_H) */
+diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c
+new file mode 100644
+index 0000000000000..d0c8a2dc9a676
+--- /dev/null
++++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c
+@@ -0,0 +1,465 @@
++// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
++/*
++ * Copyright 2015-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
++ */
++
++#include "ena_xdp.h"
++
++static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
++{
++      struct ena_tx_buffer *tx_info;
++
++      tx_info = &xdp_ring->tx_buffer_info[req_id];
++      if (likely(tx_info->xdpf))
++              return 0;
++
++      return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
++}
++
++static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
++                              struct ena_tx_buffer *tx_info,
++                              struct xdp_frame *xdpf,
++                              struct ena_com_tx_ctx *ena_tx_ctx)
++{
++      struct ena_adapter *adapter = xdp_ring->adapter;
++      struct ena_com_buf *ena_buf;
++      int push_len = 0;
++      dma_addr_t dma;
++      void *data;
++      u32 size;
++
++      tx_info->xdpf = xdpf;
++      data = tx_info->xdpf->data;
++      size = tx_info->xdpf->len;
++
++      if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
++              /* Designate part of the packet for LLQ */
++              push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
++
++              ena_tx_ctx->push_header = data;
++
++              size -= push_len;
++              data += push_len;
++      }
++
++      ena_tx_ctx->header_len = push_len;
++
++      if (size > 0) {
++              dma = dma_map_single(xdp_ring->dev,
++                                   data,
++                                   size,
++                                   DMA_TO_DEVICE);
++              if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
++                      goto error_report_dma_error;
++
++              tx_info->map_linear_data = 0;
++
++              ena_buf = tx_info->bufs;
++              ena_buf->paddr = dma;
++              ena_buf->len = size;
++
++              ena_tx_ctx->ena_bufs = ena_buf;
++              ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
++      }
++
++      return 0;
++
++error_report_dma_error:
++      ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1,
++                        &xdp_ring->syncp);
++      netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
++
++      return -EINVAL;
++}
++
++int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
++                     struct net_device *dev,
++                     struct xdp_frame *xdpf,
++                     int flags)
++{
++      struct ena_com_tx_ctx ena_tx_ctx = {};
++      struct ena_tx_buffer *tx_info;
++      u16 next_to_use, req_id;
++      int rc;
++
++      next_to_use = xdp_ring->next_to_use;
++      req_id = xdp_ring->free_ids[next_to_use];
++      tx_info = &xdp_ring->tx_buffer_info[req_id];
++      tx_info->num_of_bufs = 0;
++
++      rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
++      if (unlikely(rc))
++              return rc;
++
++      ena_tx_ctx.req_id = req_id;
++
++      rc = ena_xmit_common(dev,
++                           xdp_ring,
++                           tx_info,
++                           &ena_tx_ctx,
++                           next_to_use,
++                           xdpf->len);
++      if (rc)
++              goto error_unmap_dma;
++
++      /* trigger the dma engine. ena_ring_tx_doorbell()
++       * calls a memory barrier inside it.
++       */
++      if (flags & XDP_XMIT_FLUSH)
++              ena_ring_tx_doorbell(xdp_ring);
++
++      return rc;
++
++error_unmap_dma:
++      ena_unmap_tx_buff(xdp_ring, tx_info);
++      tx_info->xdpf = NULL;
++      return rc;
++}
++
++int ena_xdp_xmit(struct net_device *dev, int n,
++               struct xdp_frame **frames, u32 flags)
++{
++      struct ena_adapter *adapter = netdev_priv(dev);
++      struct ena_ring *xdp_ring;
++      int qid, i, nxmit = 0;
++
++      if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
++              return -EINVAL;
++
++      if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
++              return -ENETDOWN;
++
++      /* We assume that all rings have the same XDP program */
++      if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog))
++              return -ENXIO;
++
++      qid = smp_processor_id() % adapter->xdp_num_queues;
++      qid += adapter->xdp_first_ring;
++      xdp_ring = &adapter->tx_ring[qid];
++
++      /* Other CPU ids might try to send thorugh this queue */
++      spin_lock(&xdp_ring->xdp_tx_lock);
++
++      for (i = 0; i < n; i++) {
++              if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0))
++                      break;
++              nxmit++;
++      }
++
++      /* Ring doorbell to make device aware of the packets */
++      if (flags & XDP_XMIT_FLUSH)
++              ena_ring_tx_doorbell(xdp_ring);
++
++      spin_unlock(&xdp_ring->xdp_tx_lock);
++
++      /* Return number of packets sent */
++      return nxmit;
++}
++
++static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
++{
++      adapter->xdp_first_ring = adapter->num_io_queues;
++      adapter->xdp_num_queues = adapter->num_io_queues;
++
++      ena_init_io_rings(adapter,
++                        adapter->xdp_first_ring,
++                        adapter->xdp_num_queues);
++}
++
++int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
++{
++      u32 xdp_first_ring = adapter->xdp_first_ring;
++      u32 xdp_num_queues = adapter->xdp_num_queues;
++      int rc = 0;
++
++      rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
++      if (rc)
++              goto setup_err;
++
++      rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues);
++      if (rc)
++              goto create_err;
++
++      return 0;
++
++create_err:
++      ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
++setup_err:
++      return rc;
++}
++
++/* Provides a way for both kernel and bpf-prog to know
++ * more about the RX-queue a given XDP frame arrived on.
++ */
++static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
++{
++      int rc;
++
++      rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0);
++
++      if (rc) {
++              netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
++                        "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
++                        rx_ring->qid, rc);
++              goto err;
++      }
++
++      rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL);
++
++      if (rc) {
++              netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
++                        "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
++                        rx_ring->qid, rc);
++              xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
++      }
++
++err:
++      return rc;
++}
++
++static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
++{
++      xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
++      xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
++}
++
++void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
++                                        struct bpf_prog *prog,
++                                        int first, int count)
++{
++      struct bpf_prog *old_bpf_prog;
++      struct ena_ring *rx_ring;
++      int i = 0;
++
++      for (i = first; i < count; i++) {
++              rx_ring = &adapter->rx_ring[i];
++              old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog);
++
++              if (!old_bpf_prog && prog) {
++                      ena_xdp_register_rxq_info(rx_ring);
++                      rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
++              } else if (old_bpf_prog && !prog) {
++                      ena_xdp_unregister_rxq_info(rx_ring);
++                      rx_ring->rx_headroom = NET_SKB_PAD;
++              }
++      }
++}
++
++static void ena_xdp_exchange_program(struct ena_adapter *adapter,
++                                   struct bpf_prog *prog)
++{
++      struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
++
++      ena_xdp_exchange_program_rx_in_range(adapter,
++                                           prog,
++                                           0,
++                                           adapter->num_io_queues);
++
++      if (old_bpf_prog)
++              bpf_prog_put(old_bpf_prog);
++}
++
++static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
++{
++      bool was_up;
++      int rc;
++
++      was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
++
++      if (was_up)
++              ena_down(adapter);
++
++      adapter->xdp_first_ring = 0;
++      adapter->xdp_num_queues = 0;
++      ena_xdp_exchange_program(adapter, NULL);
++      if (was_up) {
++              rc = ena_up(adapter);
++              if (rc)
++                      return rc;
++      }
++      return 0;
++}
++
++static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
++{
++      struct ena_adapter *adapter = netdev_priv(netdev);
++      struct bpf_prog *prog = bpf->prog;
++      struct bpf_prog *old_bpf_prog;
++      int rc, prev_mtu;
++      bool is_up;
++
++      is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
++      rc = ena_xdp_allowed(adapter);
++      if (rc == ENA_XDP_ALLOWED) {
++              old_bpf_prog = adapter->xdp_bpf_prog;
++              if (prog) {
++                      if (!is_up) {
++                              ena_init_all_xdp_queues(adapter);
++                      } else if (!old_bpf_prog) {
++                              ena_down(adapter);
++                              ena_init_all_xdp_queues(adapter);
++                      }
++                      ena_xdp_exchange_program(adapter, prog);
++
++                      if (is_up && !old_bpf_prog) {
++                              rc = ena_up(adapter);
++                              if (rc)
++                                      return rc;
++                      }
++                      xdp_features_set_redirect_target(netdev, false);
++              } else if (old_bpf_prog) {
++                      xdp_features_clear_redirect_target(netdev);
++                      rc = ena_destroy_and_free_all_xdp_queues(adapter);
++                      if (rc)
++                              return rc;
++              }
++
++              prev_mtu = netdev->max_mtu;
++              netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
++
++              if (!old_bpf_prog)
++                      netif_info(adapter, drv, adapter->netdev,
++                                 "XDP program is set, changing the max_mtu from %d to %d",
++                                 prev_mtu, netdev->max_mtu);
++
++      } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
++              netif_err(adapter, drv, adapter->netdev,
++                        "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
++                        netdev->mtu, ENA_XDP_MAX_MTU);
++              NL_SET_ERR_MSG_MOD(bpf->extack,
++                                 "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
++              return -EINVAL;
++      } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
++              netif_err(adapter, drv, adapter->netdev,
++                        "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
++                        adapter->num_io_queues, adapter->max_num_io_queues);
++              NL_SET_ERR_MSG_MOD(bpf->extack,
++                                 "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
++              return -EINVAL;
++      }
++
++      return 0;
++}
++
++/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
++ * program as well as to query the current xdp program id.
++ */
++int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
++{
++      switch (bpf->command) {
++      case XDP_SETUP_PROG:
++              return ena_xdp_set(netdev, bpf);
++      default:
++              return -EINVAL;
++      }
++      return 0;
++}
++
++static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
++{
++      u32 total_done = 0;
++      u16 next_to_clean;
++      int tx_pkts = 0;
++      u16 req_id;
++      int rc;
++
++      if (unlikely(!xdp_ring))
++              return 0;
++      next_to_clean = xdp_ring->next_to_clean;
++
++      while (tx_pkts < budget) {
++              struct ena_tx_buffer *tx_info;
++              struct xdp_frame *xdpf;
++
++              rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
++                                              &req_id);
++              if (rc) {
++                      if (unlikely(rc == -EINVAL))
++                              handle_invalid_req_id(xdp_ring, req_id, NULL,
++                                                    true);
++                      break;
++              }
++
++              /* validate that the request id points to a valid xdp_frame */
++              rc = validate_xdp_req_id(xdp_ring, req_id);
++              if (rc)
++                      break;
++
++              tx_info = &xdp_ring->tx_buffer_info[req_id];
++              xdpf = tx_info->xdpf;
++
++              tx_info->xdpf = NULL;
++              tx_info->last_jiffies = 0;
++              ena_unmap_tx_buff(xdp_ring, tx_info);
++
++              netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
++                        "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
++                        xdpf);
++
++              tx_pkts++;
++              total_done += tx_info->tx_descs;
++
++              xdp_return_frame(xdpf);
++              xdp_ring->free_ids[next_to_clean] = req_id;
++              next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
++                                                   xdp_ring->ring_size);
++      }
++
++      xdp_ring->next_to_clean = next_to_clean;
++      ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
++      ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
++
++      netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
++                "tx_poll: q %d done. total pkts: %d\n",
++                xdp_ring->qid, tx_pkts);
++
++      return tx_pkts;
++}
++
++/* This is the XDP napi callback. XDP queues use a separate napi callback
++ * than Rx/Tx queues.
++ */
++int ena_xdp_io_poll(struct napi_struct *napi, int budget)
++{
++      struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
++      u32 xdp_work_done, xdp_budget;
++      struct ena_ring *xdp_ring;
++      int napi_comp_call = 0;
++      int ret;
++
++      xdp_ring = ena_napi->xdp_ring;
++
++      xdp_budget = budget;
++
++      if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
++          test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
++              napi_complete_done(napi, 0);
++              return 0;
++      }
++
++      xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
++
++      /* If the device is about to reset or down, avoid unmask
++       * the interrupt and return 0 so NAPI won't reschedule
++       */
++      if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
++              napi_complete_done(napi, 0);
++              ret = 0;
++      } else if (xdp_budget > xdp_work_done) {
++              napi_comp_call = 1;
++              if (napi_complete_done(napi, xdp_work_done))
++                      ena_unmask_interrupt(xdp_ring, NULL);
++              ena_update_ring_numa_node(xdp_ring, NULL);
++              ret = xdp_work_done;
++      } else {
++              ret = xdp_budget;
++      }
++
++      u64_stats_update_begin(&xdp_ring->syncp);
++      xdp_ring->tx_stats.napi_comp += napi_comp_call;
++      xdp_ring->tx_stats.tx_poll++;
++      u64_stats_update_end(&xdp_ring->syncp);
++      xdp_ring->tx_stats.last_napi_jiffies = jiffies;
++
++      return ret;
++}
+diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.h b/drivers/net/ethernet/amazon/ena/ena_xdp.h
+new file mode 100644
+index 0000000000000..80c7496081088
+--- /dev/null
++++ b/drivers/net/ethernet/amazon/ena/ena_xdp.h
+@@ -0,0 +1,152 @@
++/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
++/*
++ * Copyright 2015-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
++ */
++
++#ifndef ENA_XDP_H
++#define ENA_XDP_H
++
++#include "ena_netdev.h"
++#include <linux/bpf_trace.h>
++
++/* The max MTU size is configured to be the ethernet frame size without
++ * the overhead of the ethernet header, which can have a VLAN header, and
++ * a frame check sequence (FCS).
++ * The buffer size we share with the device is defined to be ENA_PAGE_SIZE
++ */
++#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN -     \
++                       VLAN_HLEN - XDP_PACKET_HEADROOM -              \
++                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
++
++#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
++      ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))
++
++enum ENA_XDP_ACTIONS {
++      ENA_XDP_PASS            = 0,
++      ENA_XDP_TX              = BIT(0),
++      ENA_XDP_REDIRECT        = BIT(1),
++      ENA_XDP_DROP            = BIT(2)
++};
++
++#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT)
++
++int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter);
++void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
++                                        struct bpf_prog *prog,
++                                        int first, int count);
++int ena_xdp_io_poll(struct napi_struct *napi, int budget);
++int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
++                     struct net_device *dev,
++                     struct xdp_frame *xdpf,
++                     int flags);
++int ena_xdp_xmit(struct net_device *dev, int n,
++               struct xdp_frame **frames, u32 flags);
++int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf);
++
++enum ena_xdp_errors_t {
++      ENA_XDP_ALLOWED = 0,
++      ENA_XDP_CURRENT_MTU_TOO_LARGE,
++      ENA_XDP_NO_ENOUGH_QUEUES,
++};
++
++static inline bool ena_xdp_present(struct ena_adapter *adapter)
++{
++      return !!adapter->xdp_bpf_prog;
++}
++
++static inline bool ena_xdp_present_ring(struct ena_ring *ring)
++{
++      return !!ring->xdp_bpf_prog;
++}
++
++static inline bool ena_xdp_legal_queue_count(struct ena_adapter *adapter,
++                                           u32 queues)
++{
++      return 2 * queues <= adapter->max_num_io_queues;
++}
++
++static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter)
++{
++      enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED;
++
++      if (adapter->netdev->mtu > ENA_XDP_MAX_MTU)
++              rc = ENA_XDP_CURRENT_MTU_TOO_LARGE;
++      else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
++              rc = ENA_XDP_NO_ENOUGH_QUEUES;
++
++      return rc;
++}
++
++static inline int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
++{
++      u32 verdict = ENA_XDP_PASS;
++      struct bpf_prog *xdp_prog;
++      struct ena_ring *xdp_ring;
++      struct xdp_frame *xdpf;
++      u64 *xdp_stat;
++
++      xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
++
++      if (!xdp_prog)
++              return verdict;
++
++      verdict = bpf_prog_run_xdp(xdp_prog, xdp);
++
++      switch (verdict) {
++      case XDP_TX:
++              xdpf = xdp_convert_buff_to_frame(xdp);
++              if (unlikely(!xdpf)) {
++                      trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
++                      xdp_stat = &rx_ring->rx_stats.xdp_aborted;
++                      verdict = ENA_XDP_DROP;
++                      break;
++              }
++
++              /* Find xmit queue */
++              xdp_ring = rx_ring->xdp_ring;
++
++              /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
++              spin_lock(&xdp_ring->xdp_tx_lock);
++
++              if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf,
++                                     XDP_XMIT_FLUSH))
++                      xdp_return_frame(xdpf);
++
++              spin_unlock(&xdp_ring->xdp_tx_lock);
++              xdp_stat = &rx_ring->rx_stats.xdp_tx;
++              verdict = ENA_XDP_TX;
++              break;
++      case XDP_REDIRECT:
++              if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
++                      xdp_stat = &rx_ring->rx_stats.xdp_redirect;
++                      verdict = ENA_XDP_REDIRECT;
++                      break;
++              }
++              trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
++              xdp_stat = &rx_ring->rx_stats.xdp_aborted;
++              verdict = ENA_XDP_DROP;
++              break;
++      case XDP_ABORTED:
++              trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
++              xdp_stat = &rx_ring->rx_stats.xdp_aborted;
++              verdict = ENA_XDP_DROP;
++              break;
++      case XDP_DROP:
++              xdp_stat = &rx_ring->rx_stats.xdp_drop;
++              verdict = ENA_XDP_DROP;
++              break;
++      case XDP_PASS:
++              xdp_stat = &rx_ring->rx_stats.xdp_pass;
++              verdict = ENA_XDP_PASS;
++              break;
++      default:
++              bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
++              xdp_stat = &rx_ring->rx_stats.xdp_invalid;
++              verdict = ENA_XDP_DROP;
++      }
++
++      ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
++
++      return verdict;
++}
++#endif /* ENA_XDP_H */
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-pass-ena_adapter-instead-of-net_device-to-en.patch b/queue-6.6/net-ena-pass-ena_adapter-instead-of-net_device-to-en.patch
new file mode 100644 (file)
index 0000000..4f9fe84
--- /dev/null
@@ -0,0 +1,142 @@
+From 78640f5060238fc5a513329cbb8ef00a25691020 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Jan 2024 19:08:46 +0000
+Subject: net: ena: Pass ena_adapter instead of net_device to ena_xmit_common()
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 39a044f4dcfee1c776603a6589b6fb98a9e222f2 ]
+
+This change will enable the ability to use ena_xmit_common()
+in functions that don't have a net_device pointer.
+While it can be retrieved by dereferencing
+ena_adapter (adapter->netdev), there's no reason to do it in
+fast path code where this pointer is only needed for
+debug prints.
+
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Link: https://lore.kernel.org/r/20240101190855.18739-3-darinzon@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 36a1ca01f045 ("net: ena: Set tx_info->xdpf value to NULL")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 9 ++++-----
+ drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 +-
+ drivers/net/ethernet/amazon/ena/ena_xdp.c    | 6 +++---
+ drivers/net/ethernet/amazon/ena/ena_xdp.h    | 4 ++--
+ 4 files changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 1e74386829c42..8868494929c78 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -88,19 +88,18 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu)
+       return ret;
+ }
+-int ena_xmit_common(struct net_device *dev,
++int ena_xmit_common(struct ena_adapter *adapter,
+                   struct ena_ring *ring,
+                   struct ena_tx_buffer *tx_info,
+                   struct ena_com_tx_ctx *ena_tx_ctx,
+                   u16 next_to_use,
+                   u32 bytes)
+ {
+-      struct ena_adapter *adapter = netdev_priv(dev);
+       int rc, nb_hw_desc;
+       if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
+                                               ena_tx_ctx))) {
+-              netif_dbg(adapter, tx_queued, dev,
++              netif_dbg(adapter, tx_queued, adapter->netdev,
+                         "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
+                         ring->qid);
+               ena_ring_tx_doorbell(ring);
+@@ -115,7 +114,7 @@ int ena_xmit_common(struct net_device *dev,
+        * ena_com_prepare_tx() are fatal and therefore require a device reset.
+        */
+       if (unlikely(rc)) {
+-              netif_err(adapter, tx_queued, dev,
++              netif_err(adapter, tx_queued, adapter->netdev,
+                         "Failed to prepare tx bufs\n");
+               ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1,
+                                 &ring->syncp);
+@@ -2607,7 +2606,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
+       /* set flags and meta data */
+       ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
+-      rc = ena_xmit_common(dev,
++      rc = ena_xmit_common(adapter,
+                            tx_ring,
+                            tx_info,
+                            &ena_tx_ctx,
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+index 041f08d20b450..236d1f859a783 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+@@ -426,7 +426,7 @@ static inline void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
+       ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
+ }
+-int ena_xmit_common(struct net_device *dev,
++int ena_xmit_common(struct ena_adapter *adapter,
+                   struct ena_ring *ring,
+                   struct ena_tx_buffer *tx_info,
+                   struct ena_com_tx_ctx *ena_tx_ctx,
+diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c
+index d0c8a2dc9a676..42370fa027733 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_xdp.c
++++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c
+@@ -73,7 +73,7 @@ static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
+ }
+ int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+-                     struct net_device *dev,
++                     struct ena_adapter *adapter,
+                      struct xdp_frame *xdpf,
+                      int flags)
+ {
+@@ -93,7 +93,7 @@ int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+       ena_tx_ctx.req_id = req_id;
+-      rc = ena_xmit_common(dev,
++      rc = ena_xmit_common(adapter,
+                            xdp_ring,
+                            tx_info,
+                            &ena_tx_ctx,
+@@ -141,7 +141,7 @@ int ena_xdp_xmit(struct net_device *dev, int n,
+       spin_lock(&xdp_ring->xdp_tx_lock);
+       for (i = 0; i < n; i++) {
+-              if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0))
++              if (ena_xdp_xmit_frame(xdp_ring, adapter, frames[i], 0))
+                       break;
+               nxmit++;
+       }
+diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.h b/drivers/net/ethernet/amazon/ena/ena_xdp.h
+index 80c7496081088..6e472ba6ce1ba 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_xdp.h
++++ b/drivers/net/ethernet/amazon/ena/ena_xdp.h
+@@ -36,7 +36,7 @@ void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+                                         int first, int count);
+ int ena_xdp_io_poll(struct napi_struct *napi, int budget);
+ int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+-                     struct net_device *dev,
++                     struct ena_adapter *adapter,
+                      struct xdp_frame *xdpf,
+                      int flags);
+ int ena_xdp_xmit(struct net_device *dev, int n,
+@@ -108,7 +108,7 @@ static inline int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp
+               /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
+               spin_lock(&xdp_ring->xdp_tx_lock);
+-              if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf,
++              if (ena_xdp_xmit_frame(xdp_ring, rx_ring->adapter, xdpf,
+                                      XDP_XMIT_FLUSH))
+                       xdp_return_frame(xdpf);
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-set-tx_info-xdpf-value-to-null.patch b/queue-6.6/net-ena-set-tx_info-xdpf-value-to-null.patch
new file mode 100644 (file)
index 0000000..f8731fd
--- /dev/null
@@ -0,0 +1,71 @@
+From 6c63816c12792528c866b132e8a0c705a8cd80eb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Apr 2024 09:13:58 +0000
+Subject: net: ena: Set tx_info->xdpf value to NULL
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 36a1ca01f0452f2549420e7279c2588729bd94df ]
+
+The patch mentioned in the `Fixes` tag removed the explicit assignment
+of tx_info->xdpf to NULL with the justification that there's no need
+to set tx_info->xdpf to NULL and tx_info->num_of_bufs to 0 in case
+of a mapping error. Both values won't be used once the mapping function
+returns an error, and their values would be overridden by the next
+transmitted packet.
+
+While both values do indeed get overridden in the next transmission
+call, the value of tx_info->xdpf is also used to check whether a TX
+descriptor's transmission has been completed (i.e. a completion for it
+was polled).
+
+An example scenario:
+1. Mapping failed, tx_info->xdpf wasn't set to NULL
+2. A VF reset occurred leading to IO resource destruction and
+   a call to ena_free_tx_bufs() function
+3. Although the descriptor whose mapping failed was freed by the
+   transmission function, it still passes the check
+     if (!tx_info->skb)
+
+   (skb and xdp_frame are in a union)
+4. The xdp_frame associated with the descriptor is freed twice
+
+This patch returns the assignment of NULL to tx_info->xdpf to make the
+cleaning function knows that the descriptor is already freed.
+
+Fixes: 504fd6a5390c ("net: ena: fix DMA mapping function issues in XDP")
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_xdp.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c
+index 363e361cc5aa8..25de2f511649f 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_xdp.c
++++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c
+@@ -89,7 +89,7 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
+       rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx);
+       if (unlikely(rc))
+-              return rc;
++              goto err;
+       ena_tx_ctx.req_id = req_id;
+@@ -112,7 +112,9 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
+ error_unmap_dma:
+       ena_unmap_tx_buff(tx_ring, tx_info);
++err:
+       tx_info->xdpf = NULL;
++
+       return rc;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-use-tx_ring-instead-of-xdp_ring-for-xdp-chan.patch b/queue-6.6/net-ena-use-tx_ring-instead-of-xdp_ring-for-xdp-chan.patch
new file mode 100644 (file)
index 0000000..7eb2ccf
--- /dev/null
@@ -0,0 +1,391 @@
+From a6e7f4b37cc040cc8bcd742d3a4b02829493e26e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Jan 2024 19:08:49 +0000
+Subject: net: ena: Use tx_ring instead of xdp_ring for XDP channel TX
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 911a8c960110b03ed519ce43ea6c9990a0ee0ceb ]
+
+When an XDP program is loaded the existing channels in the driver split
+into two halves:
+- The first half of the channels contain RX and TX rings, these queues
+  are used for receiving traffic and sending packets originating from
+  kernel.
+- The second half of the channels contain only a TX ring. These queues
+  are used for sending packets that were redirected using XDP_TX
+  or XDP_REDIRECT.
+
+Referring to the queues in the second half of the channels as "xdp_ring"
+can be confusing and may give the impression that ENA has the capability
+to generate an additional special queue.
+
+This patch ensures that the xdp_ring field is exclusively used to
+describe the XDP TX queue that a specific RX queue needs to utilize when
+forwarding packets with XDP TX and XDP REDIRECT, preserving the
+integrity of the xdp_ring field in ena_ring.
+
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Link: https://lore.kernel.org/r/20240101190855.18739-6-darinzon@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 36a1ca01f045 ("net: ena: Set tx_info->xdpf value to NULL")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c |  12 +-
+ drivers/net/ethernet/amazon/ena/ena_netdev.h |   1 -
+ drivers/net/ethernet/amazon/ena/ena_xdp.c    | 111 +++++++++----------
+ drivers/net/ethernet/amazon/ena/ena_xdp.h    |   2 +-
+ 4 files changed, 61 insertions(+), 65 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 8868494929c78..b239e473d59fa 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1753,8 +1753,8 @@ static void ena_del_napi_in_range(struct ena_adapter *adapter,
+       for (i = first_index; i < first_index + count; i++) {
+               netif_napi_del(&adapter->ena_napi[i].napi);
+-              WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) &&
+-                      adapter->ena_napi[i].xdp_ring);
++              WARN_ON(ENA_IS_XDP_INDEX(adapter, i) &&
++                      adapter->ena_napi[i].rx_ring);
+       }
+ }
+@@ -1769,12 +1769,10 @@ static void ena_init_napi_in_range(struct ena_adapter *adapter,
+               netif_napi_add(adapter->netdev, &napi->napi,
+                              ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll);
+-              if (!ENA_IS_XDP_INDEX(adapter, i)) {
++              if (!ENA_IS_XDP_INDEX(adapter, i))
+                       napi->rx_ring = &adapter->rx_ring[i];
+-                      napi->tx_ring = &adapter->tx_ring[i];
+-              } else {
+-                      napi->xdp_ring = &adapter->tx_ring[i];
+-              }
++
++              napi->tx_ring = &adapter->tx_ring[i];
+               napi->qid = i;
+       }
+ }
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+index 236d1f859a783..b364febab011e 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
+@@ -125,7 +125,6 @@ struct ena_napi {
+       struct napi_struct napi;
+       struct ena_ring *tx_ring;
+       struct ena_ring *rx_ring;
+-      struct ena_ring *xdp_ring;
+       u32 qid;
+       struct dim dim;
+ };
+diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c
+index 42370fa027733..363e361cc5aa8 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_xdp.c
++++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c
+@@ -5,23 +5,23 @@
+ #include "ena_xdp.h"
+-static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
++static int validate_xdp_req_id(struct ena_ring *tx_ring, u16 req_id)
+ {
+       struct ena_tx_buffer *tx_info;
+-      tx_info = &xdp_ring->tx_buffer_info[req_id];
++      tx_info = &tx_ring->tx_buffer_info[req_id];
+       if (likely(tx_info->xdpf))
+               return 0;
+-      return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
++      return handle_invalid_req_id(tx_ring, req_id, tx_info, true);
+ }
+-static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
++static int ena_xdp_tx_map_frame(struct ena_ring *tx_ring,
+                               struct ena_tx_buffer *tx_info,
+                               struct xdp_frame *xdpf,
+                               struct ena_com_tx_ctx *ena_tx_ctx)
+ {
+-      struct ena_adapter *adapter = xdp_ring->adapter;
++      struct ena_adapter *adapter = tx_ring->adapter;
+       struct ena_com_buf *ena_buf;
+       int push_len = 0;
+       dma_addr_t dma;
+@@ -32,9 +32,9 @@ static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
+       data = tx_info->xdpf->data;
+       size = tx_info->xdpf->len;
+-      if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
++      if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+               /* Designate part of the packet for LLQ */
+-              push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
++              push_len = min_t(u32, size, tx_ring->tx_max_header_size);
+               ena_tx_ctx->push_header = data;
+@@ -45,11 +45,11 @@ static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
+       ena_tx_ctx->header_len = push_len;
+       if (size > 0) {
+-              dma = dma_map_single(xdp_ring->dev,
++              dma = dma_map_single(tx_ring->dev,
+                                    data,
+                                    size,
+                                    DMA_TO_DEVICE);
+-              if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
++              if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
+                       goto error_report_dma_error;
+               tx_info->map_linear_data = 0;
+@@ -65,14 +65,14 @@ static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
+       return 0;
+ error_report_dma_error:
+-      ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1,
+-                        &xdp_ring->syncp);
++      ena_increase_stat(&tx_ring->tx_stats.dma_mapping_err, 1,
++                        &tx_ring->syncp);
+       netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
+       return -EINVAL;
+ }
+-int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
++int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
+                      struct ena_adapter *adapter,
+                      struct xdp_frame *xdpf,
+                      int flags)
+@@ -82,19 +82,19 @@ int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+       u16 next_to_use, req_id;
+       int rc;
+-      next_to_use = xdp_ring->next_to_use;
+-      req_id = xdp_ring->free_ids[next_to_use];
+-      tx_info = &xdp_ring->tx_buffer_info[req_id];
++      next_to_use = tx_ring->next_to_use;
++      req_id = tx_ring->free_ids[next_to_use];
++      tx_info = &tx_ring->tx_buffer_info[req_id];
+       tx_info->num_of_bufs = 0;
+-      rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
++      rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx);
+       if (unlikely(rc))
+               return rc;
+       ena_tx_ctx.req_id = req_id;
+       rc = ena_xmit_common(adapter,
+-                           xdp_ring,
++                           tx_ring,
+                            tx_info,
+                            &ena_tx_ctx,
+                            next_to_use,
+@@ -106,12 +106,12 @@ int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+        * calls a memory barrier inside it.
+        */
+       if (flags & XDP_XMIT_FLUSH)
+-              ena_ring_tx_doorbell(xdp_ring);
++              ena_ring_tx_doorbell(tx_ring);
+       return rc;
+ error_unmap_dma:
+-      ena_unmap_tx_buff(xdp_ring, tx_info);
++      ena_unmap_tx_buff(tx_ring, tx_info);
+       tx_info->xdpf = NULL;
+       return rc;
+ }
+@@ -120,7 +120,7 @@ int ena_xdp_xmit(struct net_device *dev, int n,
+                struct xdp_frame **frames, u32 flags)
+ {
+       struct ena_adapter *adapter = netdev_priv(dev);
+-      struct ena_ring *xdp_ring;
++      struct ena_ring *tx_ring;
+       int qid, i, nxmit = 0;
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+@@ -135,22 +135,22 @@ int ena_xdp_xmit(struct net_device *dev, int n,
+       qid = smp_processor_id() % adapter->xdp_num_queues;
+       qid += adapter->xdp_first_ring;
+-      xdp_ring = &adapter->tx_ring[qid];
++      tx_ring = &adapter->tx_ring[qid];
+       /* Other CPU ids might try to send thorugh this queue */
+-      spin_lock(&xdp_ring->xdp_tx_lock);
++      spin_lock(&tx_ring->xdp_tx_lock);
+       for (i = 0; i < n; i++) {
+-              if (ena_xdp_xmit_frame(xdp_ring, adapter, frames[i], 0))
++              if (ena_xdp_xmit_frame(tx_ring, adapter, frames[i], 0))
+                       break;
+               nxmit++;
+       }
+       /* Ring doorbell to make device aware of the packets */
+       if (flags & XDP_XMIT_FLUSH)
+-              ena_ring_tx_doorbell(xdp_ring);
++              ena_ring_tx_doorbell(tx_ring);
+-      spin_unlock(&xdp_ring->xdp_tx_lock);
++      spin_unlock(&tx_ring->xdp_tx_lock);
+       /* Return number of packets sent */
+       return nxmit;
+@@ -355,7 +355,7 @@ int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+       return 0;
+ }
+-static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
++static int ena_clean_xdp_irq(struct ena_ring *tx_ring, u32 budget)
+ {
+       u32 total_done = 0;
+       u16 next_to_clean;
+@@ -363,55 +363,54 @@ static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
+       u16 req_id;
+       int rc;
+-      if (unlikely(!xdp_ring))
++      if (unlikely(!tx_ring))
+               return 0;
+-      next_to_clean = xdp_ring->next_to_clean;
++      next_to_clean = tx_ring->next_to_clean;
+       while (tx_pkts < budget) {
+               struct ena_tx_buffer *tx_info;
+               struct xdp_frame *xdpf;
+-              rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
++              rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
+                                               &req_id);
+               if (rc) {
+                       if (unlikely(rc == -EINVAL))
+-                              handle_invalid_req_id(xdp_ring, req_id, NULL,
+-                                                    true);
++                              handle_invalid_req_id(tx_ring, req_id, NULL, true);
+                       break;
+               }
+               /* validate that the request id points to a valid xdp_frame */
+-              rc = validate_xdp_req_id(xdp_ring, req_id);
++              rc = validate_xdp_req_id(tx_ring, req_id);
+               if (rc)
+                       break;
+-              tx_info = &xdp_ring->tx_buffer_info[req_id];
++              tx_info = &tx_ring->tx_buffer_info[req_id];
+               xdpf = tx_info->xdpf;
+               tx_info->xdpf = NULL;
+               tx_info->last_jiffies = 0;
+-              ena_unmap_tx_buff(xdp_ring, tx_info);
++              ena_unmap_tx_buff(tx_ring, tx_info);
+-              netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
+-                        "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
++              netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
++                        "tx_poll: q %d skb %p completed\n", tx_ring->qid,
+                         xdpf);
+               tx_pkts++;
+               total_done += tx_info->tx_descs;
+               xdp_return_frame(xdpf);
+-              xdp_ring->free_ids[next_to_clean] = req_id;
++              tx_ring->free_ids[next_to_clean] = req_id;
+               next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+-                                                   xdp_ring->ring_size);
++                                                   tx_ring->ring_size);
+       }
+-      xdp_ring->next_to_clean = next_to_clean;
+-      ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
+-      ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
++      tx_ring->next_to_clean = next_to_clean;
++      ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
++      ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
+-      netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
++      netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+                 "tx_poll: q %d done. total pkts: %d\n",
+-                xdp_ring->qid, tx_pkts);
++                tx_ring->qid, tx_pkts);
+       return tx_pkts;
+ }
+@@ -423,43 +422,43 @@ int ena_xdp_io_poll(struct napi_struct *napi, int budget)
+ {
+       struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+       u32 xdp_work_done, xdp_budget;
+-      struct ena_ring *xdp_ring;
++      struct ena_ring *tx_ring;
+       int napi_comp_call = 0;
+       int ret;
+-      xdp_ring = ena_napi->xdp_ring;
++      tx_ring = ena_napi->tx_ring;
+       xdp_budget = budget;
+-      if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
+-          test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
++      if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
++          test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
+               napi_complete_done(napi, 0);
+               return 0;
+       }
+-      xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
++      xdp_work_done = ena_clean_xdp_irq(tx_ring, xdp_budget);
+       /* If the device is about to reset or down, avoid unmask
+        * the interrupt and return 0 so NAPI won't reschedule
+        */
+-      if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
++      if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags))) {
+               napi_complete_done(napi, 0);
+               ret = 0;
+       } else if (xdp_budget > xdp_work_done) {
+               napi_comp_call = 1;
+               if (napi_complete_done(napi, xdp_work_done))
+-                      ena_unmask_interrupt(xdp_ring, NULL);
+-              ena_update_ring_numa_node(xdp_ring, NULL);
++                      ena_unmask_interrupt(tx_ring, NULL);
++              ena_update_ring_numa_node(tx_ring, NULL);
+               ret = xdp_work_done;
+       } else {
+               ret = xdp_budget;
+       }
+-      u64_stats_update_begin(&xdp_ring->syncp);
+-      xdp_ring->tx_stats.napi_comp += napi_comp_call;
+-      xdp_ring->tx_stats.tx_poll++;
+-      u64_stats_update_end(&xdp_ring->syncp);
+-      xdp_ring->tx_stats.last_napi_jiffies = jiffies;
++      u64_stats_update_begin(&tx_ring->syncp);
++      tx_ring->tx_stats.napi_comp += napi_comp_call;
++      tx_ring->tx_stats.tx_poll++;
++      u64_stats_update_end(&tx_ring->syncp);
++      tx_ring->tx_stats.last_napi_jiffies = jiffies;
+       return ret;
+ }
+diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.h b/drivers/net/ethernet/amazon/ena/ena_xdp.h
+index 6e472ba6ce1ba..3fa8e80b18a9e 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_xdp.h
++++ b/drivers/net/ethernet/amazon/ena/ena_xdp.h
+@@ -35,7 +35,7 @@ void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+                                         struct bpf_prog *prog,
+                                         int first, int count);
+ int ena_xdp_io_poll(struct napi_struct *napi, int budget);
+-int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
++int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
+                      struct ena_adapter *adapter,
+                      struct xdp_frame *xdpf,
+                      int flags);
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-wrong-missing-io-completions-check-order.patch b/queue-6.6/net-ena-wrong-missing-io-completions-check-order.patch
new file mode 100644 (file)
index 0000000..6b500f1
--- /dev/null
@@ -0,0 +1,108 @@
+From ad8cc1f690e27f7742ceb57e035ebb98d03dbc79 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Apr 2024 09:13:56 +0000
+Subject: net: ena: Wrong missing IO completions check order
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit f7e417180665234fdb7af2ebe33d89aaa434d16f ]
+
+Missing IO completions check is called every second (HZ jiffies).
+This commit fixes several issues with this check:
+
+1. Duplicate queues check:
+   Max of 4 queues are scanned on each check due to monitor budget.
+   Once reaching the budget, this check exits under the assumption that
+   the next check will continue to scan the remainder of the queues,
+   but in practice, next check will first scan the last already scanned
+   queue which is not necessary and may cause the full queue scan to
+   last a couple of seconds longer.
+   The fix is to start every check with the next queue to scan.
+   For example, on 8 IO queues:
+   Bug: [0,1,2,3], [3,4,5,6], [6,7]
+   Fix: [0,1,2,3], [4,5,6,7]
+
+2. Unbalanced queues check:
+   In case the number of active IO queues is not a multiple of budget,
+   there will be checks which don't utilize the full budget
+   because the full scan exits when reaching the last queue id.
+   The fix is to run every TX completion check with exact queue budget
+   regardless of the queue id.
+   For example, on 7 IO queues:
+   Bug: [0,1,2,3], [4,5,6], [0,1,2,3]
+   Fix: [0,1,2,3], [4,5,6,0], [1,2,3,4]
+   The budget may be lowered in case the number of IO queues is less
+   than the budget (4) to make sure there are no duplicate queues on
+   the same check.
+   For example, on 3 IO queues:
+   Bug: [0,1,2,0], [1,2,0,1]
+   Fix: [0,1,2], [0,1,2]
+
+Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
+Signed-off-by: Amit Bernstein <amitbern@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 21 +++++++++++---------
+ 1 file changed, 12 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index cc39707a80598..5178eb089eabe 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -3982,10 +3982,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
+ {
+       struct ena_ring *tx_ring;
+       struct ena_ring *rx_ring;
+-      int i, budget, rc;
++      int qid, budget, rc;
+       int io_queue_count;
+       io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
++
+       /* Make sure the driver doesn't turn the device in other process */
+       smp_rmb();
+@@ -3998,27 +3999,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
+       if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
+               return;
+-      budget = ENA_MONITORED_TX_QUEUES;
++      budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
+-      for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
+-              tx_ring = &adapter->tx_ring[i];
+-              rx_ring = &adapter->rx_ring[i];
++      qid = adapter->last_monitored_tx_qid;
++
++      while (budget) {
++              qid = (qid + 1) % io_queue_count;
++
++              tx_ring = &adapter->tx_ring[qid];
++              rx_ring = &adapter->rx_ring[qid];
+               rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
+               if (unlikely(rc))
+                       return;
+-              rc =  !ENA_IS_XDP_INDEX(adapter, i) ?
++              rc =  !ENA_IS_XDP_INDEX(adapter, qid) ?
+                       check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
+               if (unlikely(rc))
+                       return;
+               budget--;
+-              if (!budget)
+-                      break;
+       }
+-      adapter->last_monitored_tx_qid = i % io_queue_count;
++      adapter->last_monitored_tx_qid = qid;
+ }
+ /* trigger napi schedule after 2 consecutive detections */
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ks8851-handle-softirqs-at-the-end-of-irq-thread-.patch b/queue-6.6/net-ks8851-handle-softirqs-at-the-end-of-irq-thread-.patch
new file mode 100644 (file)
index 0000000..6b1e96d
--- /dev/null
@@ -0,0 +1,101 @@
+From ed4c3ca7c9d96274bb5dd5be39cf118869f8b4dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 22:30:40 +0200
+Subject: net: ks8851: Handle softirqs at the end of IRQ thread to fix hang
+
+From: Marek Vasut <marex@denx.de>
+
+[ Upstream commit be0384bf599cf1eb8d337517feeb732d71f75a6f ]
+
+The ks8851_irq() thread may call ks8851_rx_pkts() in case there are
+any packets in the MAC FIFO, which calls netif_rx(). This netif_rx()
+implementation is guarded by local_bh_disable() and local_bh_enable().
+The local_bh_enable() may call do_softirq() to run softirqs in case
+any are pending. One of the softirqs is net_rx_action, which ultimately
+reaches the driver .start_xmit callback. If that happens, the system
+hangs. The entire call chain is below:
+
+ks8851_start_xmit_par from netdev_start_xmit
+netdev_start_xmit from dev_hard_start_xmit
+dev_hard_start_xmit from sch_direct_xmit
+sch_direct_xmit from __dev_queue_xmit
+__dev_queue_xmit from __neigh_update
+__neigh_update from neigh_update
+neigh_update from arp_process.constprop.0
+arp_process.constprop.0 from __netif_receive_skb_one_core
+__netif_receive_skb_one_core from process_backlog
+process_backlog from __napi_poll.constprop.0
+__napi_poll.constprop.0 from net_rx_action
+net_rx_action from __do_softirq
+__do_softirq from call_with_stack
+call_with_stack from do_softirq
+do_softirq from __local_bh_enable_ip
+__local_bh_enable_ip from netif_rx
+netif_rx from ks8851_irq
+ks8851_irq from irq_thread_fn
+irq_thread_fn from irq_thread
+irq_thread from kthread
+kthread from ret_from_fork
+
+The hang happens because ks8851_irq() first locks a spinlock in
+ks8851_par.c ks8851_lock_par() spin_lock_irqsave(&ksp->lock, ...)
+and with that spinlock locked, calls netif_rx(). Once the execution
+reaches ks8851_start_xmit_par(), it calls ks8851_lock_par() again
+which attempts to claim the already locked spinlock again, and the
+hang happens.
+
+Move the do_softirq() call outside of the spinlock protected section
+of ks8851_irq() by disabling BHs around the entire spinlock protected
+section of ks8851_irq() handler. Place local_bh_enable() outside of
+the spinlock protected section, so that it can trigger do_softirq()
+without the ks8851_par.c ks8851_lock_par() spinlock being held, and
+safely call ks8851_start_xmit_par() without attempting to lock the
+already locked spinlock.
+
+Since ks8851_irq() is protected by local_bh_disable()/local_bh_enable()
+now, replace netif_rx() with __netif_rx() which is not duplicating the
+local_bh_disable()/local_bh_enable() calls.
+
+Fixes: 797047f875b5 ("net: ks8851: Implement Parallel bus operations")
+Signed-off-by: Marek Vasut <marex@denx.de>
+Link: https://lore.kernel.org/r/20240405203204.82062-2-marex@denx.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/micrel/ks8851_common.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
+index 896d43bb8883d..d4cdf3d4f5525 100644
+--- a/drivers/net/ethernet/micrel/ks8851_common.c
++++ b/drivers/net/ethernet/micrel/ks8851_common.c
+@@ -299,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
+                                       ks8851_dbg_dumpkkt(ks, rxpkt);
+                               skb->protocol = eth_type_trans(skb, ks->netdev);
+-                              netif_rx(skb);
++                              __netif_rx(skb);
+                               ks->netdev->stats.rx_packets++;
+                               ks->netdev->stats.rx_bytes += rxlen;
+@@ -330,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
+       unsigned long flags;
+       unsigned int status;
++      local_bh_disable();
++
+       ks8851_lock(ks, &flags);
+       status = ks8851_rdreg16(ks, KS_ISR);
+@@ -406,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
+       if (status & IRQ_LCI)
+               mii_check_link(&ks->mii);
++      local_bh_enable();
++
+       return IRQ_HANDLED;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ks8851-inline-ks8851_rx_skb.patch b/queue-6.6/net-ks8851-inline-ks8851_rx_skb.patch
new file mode 100644 (file)
index 0000000..601dff6
--- /dev/null
@@ -0,0 +1,138 @@
+From 09b8a234d0df98ee5f45bbe69a6a9e674d09fae1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 22:30:39 +0200
+Subject: net: ks8851: Inline ks8851_rx_skb()
+
+From: Marek Vasut <marex@denx.de>
+
+[ Upstream commit f96f700449b6d190e06272f1cf732ae8e45b73df ]
+
+Both ks8851_rx_skb_par() and ks8851_rx_skb_spi() call netif_rx(skb),
+inline the netif_rx(skb) call directly into ks8851_common.c and drop
+the .rx_skb callback and ks8851_rx_skb() wrapper. This removes one
+indirect call from the driver, no functional change otherwise.
+
+Signed-off-by: Marek Vasut <marex@denx.de>
+Link: https://lore.kernel.org/r/20240405203204.82062-1-marex@denx.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: be0384bf599c ("net: ks8851: Handle softirqs at the end of IRQ thread to fix hang")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/micrel/ks8851.h        |  3 ---
+ drivers/net/ethernet/micrel/ks8851_common.c | 12 +-----------
+ drivers/net/ethernet/micrel/ks8851_par.c    | 11 -----------
+ drivers/net/ethernet/micrel/ks8851_spi.c    | 11 -----------
+ 4 files changed, 1 insertion(+), 36 deletions(-)
+
+diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h
+index e5ec0a363aff8..31f75b4a67fd7 100644
+--- a/drivers/net/ethernet/micrel/ks8851.h
++++ b/drivers/net/ethernet/micrel/ks8851.h
+@@ -368,7 +368,6 @@ union ks8851_tx_hdr {
+  * @rdfifo: FIFO read callback
+  * @wrfifo: FIFO write callback
+  * @start_xmit: start_xmit() implementation callback
+- * @rx_skb: rx_skb() implementation callback
+  * @flush_tx_work: flush_tx_work() implementation callback
+  *
+  * The @statelock is used to protect information in the structure which may
+@@ -423,8 +422,6 @@ struct ks8851_net {
+                                         struct sk_buff *txp, bool irq);
+       netdev_tx_t             (*start_xmit)(struct sk_buff *skb,
+                                             struct net_device *dev);
+-      void                    (*rx_skb)(struct ks8851_net *ks,
+-                                        struct sk_buff *skb);
+       void                    (*flush_tx_work)(struct ks8851_net *ks);
+ };
+diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
+index 0bf13b38b8f5b..896d43bb8883d 100644
+--- a/drivers/net/ethernet/micrel/ks8851_common.c
++++ b/drivers/net/ethernet/micrel/ks8851_common.c
+@@ -231,16 +231,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
+                  rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]);
+ }
+-/**
+- * ks8851_rx_skb - receive skbuff
+- * @ks: The device state.
+- * @skb: The skbuff
+- */
+-static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
+-{
+-      ks->rx_skb(ks, skb);
+-}
+-
+ /**
+  * ks8851_rx_pkts - receive packets from the host
+  * @ks: The device information.
+@@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
+                                       ks8851_dbg_dumpkkt(ks, rxpkt);
+                               skb->protocol = eth_type_trans(skb, ks->netdev);
+-                              ks8851_rx_skb(ks, skb);
++                              netif_rx(skb);
+                               ks->netdev->stats.rx_packets++;
+                               ks->netdev->stats.rx_bytes += rxlen;
+diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c
+index 7f49042484bdc..96fb0ffcedb90 100644
+--- a/drivers/net/ethernet/micrel/ks8851_par.c
++++ b/drivers/net/ethernet/micrel/ks8851_par.c
+@@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp,
+       iowrite16_rep(ksp->hw_addr, txp->data, len / 2);
+ }
+-/**
+- * ks8851_rx_skb_par - receive skbuff
+- * @ks: The device state.
+- * @skb: The skbuff
+- */
+-static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb)
+-{
+-      netif_rx(skb);
+-}
+-
+ static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks)
+ {
+       return ks8851_rdreg16_par(ks, KS_TXQCR);
+@@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev)
+       ks->rdfifo = ks8851_rdfifo_par;
+       ks->wrfifo = ks8851_wrfifo_par;
+       ks->start_xmit = ks8851_start_xmit_par;
+-      ks->rx_skb = ks8851_rx_skb_par;
+ #define STD_IRQ (IRQ_LCI |    /* Link Change */       \
+                IRQ_RXI |      /* RX done */           \
+diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
+index 88e26c120b483..4dcbff789b19d 100644
+--- a/drivers/net/ethernet/micrel/ks8851_spi.c
++++ b/drivers/net/ethernet/micrel/ks8851_spi.c
+@@ -298,16 +298,6 @@ static unsigned int calc_txlen(unsigned int len)
+       return ALIGN(len + 4, 4);
+ }
+-/**
+- * ks8851_rx_skb_spi - receive skbuff
+- * @ks: The device state
+- * @skb: The skbuff
+- */
+-static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
+-{
+-      netif_rx(skb);
+-}
+-
+ /**
+  * ks8851_tx_work - process tx packet(s)
+  * @work: The work strucutre what was scheduled.
+@@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
+       ks->rdfifo = ks8851_rdfifo_spi;
+       ks->wrfifo = ks8851_wrfifo_spi;
+       ks->start_xmit = ks8851_start_xmit_spi;
+-      ks->rx_skb = ks8851_rx_skb_spi;
+       ks->flush_tx_work = ks8851_flush_tx_work_spi;
+ #define STD_IRQ (IRQ_LCI |    /* Link Change */       \
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5-correctly-compare-pkt-reformat-ids.patch b/queue-6.6/net-mlx5-correctly-compare-pkt-reformat-ids.patch
new file mode 100644 (file)
index 0000000..c921989
--- /dev/null
@@ -0,0 +1,74 @@
+From 45ee88b36c4e917668c42a859d83b21be7b2fc6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:08:13 +0300
+Subject: net/mlx5: Correctly compare pkt reformat ids
+
+From: Cosmin Ratiu <cratiu@nvidia.com>
+
+[ Upstream commit 9eca93f4d5ab03905516a68683674d9c50ff95bd ]
+
+struct mlx5_pkt_reformat contains a naked union of a u32 id and a
+dr_action pointer which is used when the action is SW-managed (when
+pkt_reformat.owner is set to MLX5_FLOW_RESOURCE_OWNER_SW). Using id
+directly in that case is incorrect, as it maps to the least significant
+32 bits of the 64-bit pointer in mlx5_fs_dr_action and not to the pkt
+reformat id allocated in firmware.
+
+For the purpose of comparing whether two rules are identical,
+interpreting the least significant 32 bits of the mlx5_fs_dr_action
+pointer as an id mostly works... until it breaks horribly and produces
+the outcome described in [1].
+
+This patch fixes mlx5_flow_dests_cmp to correctly compare ids using
+mlx5_fs_dr_action_get_pkt_reformat_id for the SW-managed rules.
+
+Link: https://lore.kernel.org/netdev/ea5264d6-6b55-4449-a602-214c6f509c1e@163.com/T/#u [1]
+
+Fixes: 6a48faeeca10 ("net/mlx5: Add direct rule fs_cmd implementation")
+Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240409190820.227554-6-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+index 54f535d5d250f..e2f7cecce6f1a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+@@ -1664,6 +1664,16 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
+       return err;
+ }
++static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1,
++                                struct mlx5_pkt_reformat *p2)
++{
++      return p1->owner == p2->owner &&
++              (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ?
++               p1->id == p2->id :
++               mlx5_fs_dr_action_get_pkt_reformat_id(p1) ==
++               mlx5_fs_dr_action_get_pkt_reformat_id(p2));
++}
++
+ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+                               struct mlx5_flow_destination *d2)
+ {
+@@ -1675,8 +1685,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+                    ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
+                     (d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
+                    ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
+-                    (d1->vport.pkt_reformat->id ==
+-                     d2->vport.pkt_reformat->id) : true)) ||
++                    mlx5_pkt_reformat_cmp(d1->vport.pkt_reformat,
++                                          d2->vport.pkt_reformat) : true)) ||
+                   (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+                    d1->ft == d2->ft) ||
+                   (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5-offset-comp-irq-index-in-name-by-one.patch b/queue-6.6/net-mlx5-offset-comp-irq-index-in-name-by-one.patch
new file mode 100644 (file)
index 0000000..03cef94
--- /dev/null
@@ -0,0 +1,62 @@
+From 4d145c20ce04b3de9db5c11fdce0627ed6d6d17b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:08:11 +0300
+Subject: net/mlx5: offset comp irq index in name by one
+
+From: Michael Liang <mliang@purestorage.com>
+
+[ Upstream commit 9f7e8fbb91f8fa29548e2f6ab50c03b628c67ede ]
+
+The mlx5 comp irq name scheme is changed a little bit between
+commit 3663ad34bc70 ("net/mlx5: Shift control IRQ to the last index")
+and commit 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation").
+The index in the comp irq name used to start from 0 but now it starts
+from 1. There is nothing critical here, but it's harmless to change
+back to the old behavior, a.k.a starting from 0.
+
+Fixes: 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation")
+Reviewed-by: Mohamed Khalfella <mkhalfella@purestorage.com>
+Reviewed-by: Yuanyuan Zhong <yzhong@purestorage.com>
+Signed-off-by: Michael Liang <mliang@purestorage.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240409190820.227554-4-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+index 4dcf995cb1a20..6bac8ad70ba60 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+@@ -19,6 +19,7 @@
+ #define MLX5_IRQ_CTRL_SF_MAX 8
+ /* min num of vectors for SFs to be enabled */
+ #define MLX5_IRQ_VEC_COMP_BASE_SF 2
++#define MLX5_IRQ_VEC_COMP_BASE 1
+ #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
+ #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
+@@ -246,6 +247,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
+               return;
+       }
++      vecidx -= MLX5_IRQ_VEC_COMP_BASE;
+       snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
+ }
+@@ -585,7 +587,7 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
+       struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
+       struct mlx5_irq_pool *pool = table->pcif_pool;
+       struct irq_affinity_desc af_desc;
+-      int offset = 1;
++      int offset = MLX5_IRQ_VEC_COMP_BASE;
+       if (!pool->xa_num_irqs.max)
+               offset = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5-properly-link-new-fs-rules-into-the-tree.patch b/queue-6.6/net-mlx5-properly-link-new-fs-rules-into-the-tree.patch
new file mode 100644 (file)
index 0000000..8838280
--- /dev/null
@@ -0,0 +1,66 @@
+From 74c487805e9ccb04fb77e3ad787518065e42e86b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:08:12 +0300
+Subject: net/mlx5: Properly link new fs rules into the tree
+
+From: Cosmin Ratiu <cratiu@nvidia.com>
+
+[ Upstream commit 7c6782ad4911cbee874e85630226ed389ff2e453 ]
+
+Previously, add_rule_fg would only add newly created rules from the
+handle into the tree when they had a refcount of 1. On the other hand,
+create_flow_handle tries hard to find and reference already existing
+identical rules instead of creating new ones.
+
+These two behaviors can result in a situation where create_flow_handle
+1) creates a new rule and references it, then
+2) in a subsequent step during the same handle creation references it
+   again,
+resulting in a rule with a refcount of 2 that is not linked into the
+tree, will have a NULL parent and root and will result in a crash when
+the flow group is deleted because del_sw_hw_rule, invoked on rule
+deletion, assumes node->parent is != NULL.
+
+This happened in the wild, due to another bug related to incorrect
+handling of duplicate pkt_reformat ids, which lead to the code in
+create_flow_handle incorrectly referencing a just-added rule in the same
+flow handle, resulting in the problem described above. Full details are
+at [1].
+
+This patch changes add_rule_fg to add new rules without parents into
+the tree, properly initializing them and avoiding the crash. This makes
+it more consistent with how rules are added to an FTE in
+create_flow_handle.
+
+Fixes: 74491de93712 ("net/mlx5: Add multi dest support")
+Link: https://lore.kernel.org/netdev/ea5264d6-6b55-4449-a602-214c6f509c1e@163.com/T/#u [1]
+Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240409190820.227554-5-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+index a13b9c2bd144b..54f535d5d250f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+@@ -1808,8 +1808,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
+       }
+       trace_mlx5_fs_set_fte(fte, false);
++      /* Link newly added rules into the tree. */
+       for (i = 0; i < handle->num_rules; i++) {
+-              if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
++              if (!handle->rule[i]->node.parent) {
+                       tree_add_node(&handle->rule[i]->node, &fte->node);
+                       trace_mlx5_fs_add_rule(handle->rule[i]);
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5-register-devlink-first-under-devlink-lock.patch b/queue-6.6/net-mlx5-register-devlink-first-under-devlink-lock.patch
new file mode 100644 (file)
index 0000000..b80f375
--- /dev/null
@@ -0,0 +1,180 @@
+From 53a056acec6bf34681f2b85b85493bf67d8ab6d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:08:10 +0300
+Subject: net/mlx5: Register devlink first under devlink lock
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit c6e77aa9dd82bc18a89bf49418f8f7e961cfccc8 ]
+
+In case device is having a non fatal FW error during probe, the
+driver will report the error to user via devlink. This will trigger
+a WARN_ON, since mlx5 is calling devlink_register() last.
+In order to avoid the WARN_ON[1], change mlx5 to invoke devl_register()
+first under devlink lock.
+
+[1]
+WARNING: CPU: 5 PID: 227 at net/devlink/health.c:483 devlink_recover_notify.constprop.0+0xb8/0xc0
+CPU: 5 PID: 227 Comm: kworker/u16:3 Not tainted 6.4.0-rc5_for_upstream_min_debug_2023_06_12_12_38 #1
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+Workqueue: mlx5_health0000:08:00.0 mlx5_fw_reporter_err_work [mlx5_core]
+RIP: 0010:devlink_recover_notify.constprop.0+0xb8/0xc0
+Call Trace:
+ <TASK>
+ ? __warn+0x79/0x120
+ ? devlink_recover_notify.constprop.0+0xb8/0xc0
+ ? report_bug+0x17c/0x190
+ ? handle_bug+0x3c/0x60
+ ? exc_invalid_op+0x14/0x70
+ ? asm_exc_invalid_op+0x16/0x20
+ ? devlink_recover_notify.constprop.0+0xb8/0xc0
+ devlink_health_report+0x4a/0x1c0
+ mlx5_fw_reporter_err_work+0xa4/0xd0 [mlx5_core]
+ process_one_work+0x1bb/0x3c0
+ ? process_one_work+0x3c0/0x3c0
+ worker_thread+0x4d/0x3c0
+ ? process_one_work+0x3c0/0x3c0
+ kthread+0xc6/0xf0
+ ? kthread_complete_and_exit+0x20/0x20
+ ret_from_fork+0x1f/0x30
+ </TASK>
+
+Fixes: cf530217408e ("devlink: Notify users when objects are accessible")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240409190820.227554-3-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/main.c    | 37 ++++++++++---------
+ .../mellanox/mlx5/core/sf/dev/driver.c        |  1 -
+ 2 files changed, 20 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 6ca91c0e8a6a5..9710ddac1f1a8 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1469,6 +1469,14 @@ int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev)
+       if (err)
+               goto err_register;
++      err = mlx5_crdump_enable(dev);
++      if (err)
++              mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err);
++
++      err = mlx5_hwmon_dev_register(dev);
++      if (err)
++              mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
++
+       mutex_unlock(&dev->intf_state_mutex);
+       return 0;
+@@ -1494,7 +1502,10 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
+       int err;
+       devl_lock(devlink);
++      devl_register(devlink);
+       err = mlx5_init_one_devl_locked(dev);
++      if (err)
++              devl_unregister(devlink);
+       devl_unlock(devlink);
+       return err;
+ }
+@@ -1506,6 +1517,8 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
+       devl_lock(devlink);
+       mutex_lock(&dev->intf_state_mutex);
++      mlx5_hwmon_dev_unregister(dev);
++      mlx5_crdump_disable(dev);
+       mlx5_unregister_device(dev);
+       if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+@@ -1523,6 +1536,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
+       mlx5_function_teardown(dev, true);
+ out:
+       mutex_unlock(&dev->intf_state_mutex);
++      devl_unregister(devlink);
+       devl_unlock(devlink);
+ }
+@@ -1669,16 +1683,20 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev)
+       }
+       devl_lock(devlink);
++      devl_register(devlink);
++
+       err = mlx5_devlink_params_register(priv_to_devlink(dev));
+-      devl_unlock(devlink);
+       if (err) {
+               mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
+               goto query_hca_caps_err;
+       }
++      devl_unlock(devlink);
+       return 0;
+ query_hca_caps_err:
++      devl_unregister(devlink);
++      devl_unlock(devlink);
+       mlx5_function_disable(dev, true);
+ out:
+       dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+@@ -1691,6 +1709,7 @@ void mlx5_uninit_one_light(struct mlx5_core_dev *dev)
+       devl_lock(devlink);
+       mlx5_devlink_params_unregister(priv_to_devlink(dev));
++      devl_unregister(devlink);
+       devl_unlock(devlink);
+       if (dev->state != MLX5_DEVICE_STATE_UP)
+               return;
+@@ -1932,16 +1951,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+               goto err_init_one;
+       }
+-      err = mlx5_crdump_enable(dev);
+-      if (err)
+-              dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
+-
+-      err = mlx5_hwmon_dev_register(dev);
+-      if (err)
+-              mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
+-
+       pci_save_state(pdev);
+-      devlink_register(devlink);
+       return 0;
+ err_init_one:
+@@ -1962,16 +1972,9 @@ static void remove_one(struct pci_dev *pdev)
+       struct devlink *devlink = priv_to_devlink(dev);
+       set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
+-      /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
+-       * devlink notify APIs.
+-       * Hence, we must drain them before unregistering the devlink.
+-       */
+       mlx5_drain_fw_reset(dev);
+       mlx5_drain_health_wq(dev);
+-      devlink_unregister(devlink);
+       mlx5_sriov_disable(pdev, false);
+-      mlx5_hwmon_dev_unregister(dev);
+-      mlx5_crdump_disable(dev);
+       mlx5_uninit_one(dev);
+       mlx5_pci_close(dev);
+       mlx5_mdev_uninit(dev);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+index 69e270b5aa82d..30218f37d5285 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+@@ -75,7 +75,6 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
+       devlink = priv_to_devlink(mdev);
+       set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
+       mlx5_drain_health_wq(mdev);
+-      devlink_unregister(devlink);
+       if (mlx5_dev_is_lightweight(mdev))
+               mlx5_uninit_one_light(mdev);
+       else
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5-sf-stop-waiting-for-fw-as-teardown-was-call.patch b/queue-6.6/net-mlx5-sf-stop-waiting-for-fw-as-teardown-was-call.patch
new file mode 100644 (file)
index 0000000..ad3adf5
--- /dev/null
@@ -0,0 +1,69 @@
+From 0eae8e93240ef66feae0a0b934a068cc38bbd3a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jan 2024 14:24:09 +0200
+Subject: net/mlx5: SF, Stop waiting for FW as teardown was called
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+[ Upstream commit 137cef6d55564fb687d12fbc5f85be43ff7b53a7 ]
+
+When PF/VF teardown is called the driver sets the flag
+MLX5_BREAK_FW_WAIT to stop waiting for FW loading and initializing. Same
+should be applied to SF driver teardown to cut waiting time. On
+mlx5_sf_dev_remove() set the flag before draining health WQ as recovery
+flow may also wait for FW reloading while it is not relevant anymore.
+
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Aya Levin <ayal@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Stable-dep-of: c6e77aa9dd82 ("net/mlx5: Register devlink first under devlink lock")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/sf/dev/driver.c        | 21 ++++++++++++-------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+index 8fe82f1191bb9..69e270b5aa82d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+@@ -69,24 +69,29 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
+ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
+ {
+       struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+-      struct devlink *devlink = priv_to_devlink(sf_dev->mdev);
++      struct mlx5_core_dev *mdev = sf_dev->mdev;
++      struct devlink *devlink;
+-      mlx5_drain_health_wq(sf_dev->mdev);
++      devlink = priv_to_devlink(mdev);
++      set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
++      mlx5_drain_health_wq(mdev);
+       devlink_unregister(devlink);
+-      if (mlx5_dev_is_lightweight(sf_dev->mdev))
+-              mlx5_uninit_one_light(sf_dev->mdev);
++      if (mlx5_dev_is_lightweight(mdev))
++              mlx5_uninit_one_light(mdev);
+       else
+-              mlx5_uninit_one(sf_dev->mdev);
+-      iounmap(sf_dev->mdev->iseg);
+-      mlx5_mdev_uninit(sf_dev->mdev);
++              mlx5_uninit_one(mdev);
++      iounmap(mdev->iseg);
++      mlx5_mdev_uninit(mdev);
+       mlx5_devlink_free(devlink);
+ }
+ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
+ {
+       struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
++      struct mlx5_core_dev *mdev = sf_dev->mdev;
+-      mlx5_unload_one(sf_dev->mdev, false);
++      set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
++      mlx5_unload_one(mdev, false);
+ }
+ static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = {
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-do-not-produce-metadata-freelist-entries-i.patch b/queue-6.6/net-mlx5e-do-not-produce-metadata-freelist-entries-i.patch
new file mode 100644 (file)
index 0000000..dc591aa
--- /dev/null
@@ -0,0 +1,84 @@
+From bcad0d11c6aea7e3de8bb00dedf5d9a85986907c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:08:17 +0300
+Subject: net/mlx5e: Do not produce metadata freelist entries in Tx port ts WQE
+ xmit
+
+From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+
+[ Upstream commit 86b0ca5b118d3a0bae5e5645a13e66f8a4f6c525 ]
+
+Free Tx port timestamping metadata entries in the NAPI poll context and
+consume metadata enties in the WQE xmit path. Do not free a Tx port
+timestamping metadata entry in the WQE xmit path even in the error path to
+avoid a race between two metadata entry producers.
+
+Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs")
+Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240409190820.227554-10-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h | 8 +++++++-
+ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c  | 7 +++----
+ 2 files changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+index 7b700d0f956a8..b171cd8f11e04 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+@@ -95,9 +95,15 @@ static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo *
+ }
+ static inline u8
++mlx5e_ptp_metadata_fifo_peek(struct mlx5e_ptp_metadata_fifo *fifo)
++{
++      return fifo->data[fifo->mask & fifo->cc];
++}
++
++static inline void
+ mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo)
+ {
+-      return fifo->data[fifo->mask & fifo->cc++];
++      fifo->cc++;
+ }
+ static inline void
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+index 1ead69c5f5fa3..24cbd44dae93c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+@@ -398,6 +398,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+                    (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) {
+               u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
++              mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist);
++
+               mlx5e_skb_cb_hwtstamp_init(skb);
+               mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
+                                          metadata_index);
+@@ -496,9 +498,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ err_drop:
+       stats->dropped++;
+-      if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
+-              mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
+-                                           be32_to_cpu(eseg->flow_table_metadata));
+       dev_kfree_skb_any(skb);
+       mlx5e_tx_flush(sq);
+ }
+@@ -657,7 +656,7 @@ static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
+ {
+       if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+               eseg->flow_table_metadata =
+-                      cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist));
++                      cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist));
+ }
+ static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-fix-mlx5e_priv_init-cleanup-flow.patch b/queue-6.6/net-mlx5e-fix-mlx5e_priv_init-cleanup-flow.patch
new file mode 100644 (file)
index 0000000..935788f
--- /dev/null
@@ -0,0 +1,109 @@
+From ec08a24a24b9a37377ff3a027be4c70862fdacac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:08:15 +0300
+Subject: net/mlx5e: Fix mlx5e_priv_init() cleanup flow
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit ecb829459a841198e142f72fadab56424ae96519 ]
+
+When mlx5e_priv_init() fails, the cleanup flow calls mlx5e_selq_cleanup which
+calls mlx5e_selq_apply() that assures that the `priv->state_lock` is held using
+lockdep_is_held().
+
+Acquire the state_lock in mlx5e_selq_cleanup().
+
+Kernel log:
+=============================
+WARNING: suspicious RCU usage
+6.8.0-rc3_net_next_841a9b5 #1 Not tainted
+-----------------------------
+drivers/net/ethernet/mellanox/mlx5/core/en/selq.c:124 suspicious rcu_dereference_protected() usage!
+
+other info that might help us debug this:
+
+rcu_scheduler_active = 2, debug_locks = 1
+2 locks held by systemd-modules/293:
+ #0: ffffffffa05067b0 (devices_rwsem){++++}-{3:3}, at: ib_register_client+0x109/0x1b0 [ib_core]
+ #1: ffff8881096c65c0 (&device->client_data_rwsem){++++}-{3:3}, at: add_client_context+0x104/0x1c0 [ib_core]
+
+stack backtrace:
+CPU: 4 PID: 293 Comm: systemd-modules Not tainted 6.8.0-rc3_net_next_841a9b5 #1
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x8a/0xa0
+ lockdep_rcu_suspicious+0x154/0x1a0
+ mlx5e_selq_apply+0x94/0xa0 [mlx5_core]
+ mlx5e_selq_cleanup+0x3a/0x60 [mlx5_core]
+ mlx5e_priv_init+0x2be/0x2f0 [mlx5_core]
+ mlx5_rdma_setup_rn+0x7c/0x1a0 [mlx5_core]
+ rdma_init_netdev+0x4e/0x80 [ib_core]
+ ? mlx5_rdma_netdev_free+0x70/0x70 [mlx5_core]
+ ipoib_intf_init+0x64/0x550 [ib_ipoib]
+ ipoib_intf_alloc+0x4e/0xc0 [ib_ipoib]
+ ipoib_add_one+0xb0/0x360 [ib_ipoib]
+ add_client_context+0x112/0x1c0 [ib_core]
+ ib_register_client+0x166/0x1b0 [ib_core]
+ ? 0xffffffffa0573000
+ ipoib_init_module+0xeb/0x1a0 [ib_ipoib]
+ do_one_initcall+0x61/0x250
+ do_init_module+0x8a/0x270
+ init_module_from_file+0x8b/0xd0
+ idempotent_init_module+0x17d/0x230
+ __x64_sys_finit_module+0x61/0xb0
+ do_syscall_64+0x71/0x140
+ entry_SYSCALL_64_after_hwframe+0x46/0x4e
+ </TASK>
+
+Fixes: 8bf30be75069 ("net/mlx5e: Introduce select queue parameters")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240409190820.227554-8-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/selq.c | 2 ++
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 --
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
+index f675b1926340f..f66bbc8464645 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
+@@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
+ void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
+ {
++      mutex_lock(selq->state_lock);
+       WARN_ON_ONCE(selq->is_prepared);
+       kvfree(selq->standby);
+@@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
+       kvfree(selq->standby);
+       selq->standby = NULL;
++      mutex_unlock(selq->state_lock);
+ }
+ void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index c3961c2bbc57c..d49c348f89d28 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -5694,9 +5694,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
+       kfree(priv->tx_rates);
+       kfree(priv->txq2sq);
+       destroy_workqueue(priv->wq);
+-      mutex_lock(&priv->state_lock);
+       mlx5e_selq_cleanup(&priv->selq);
+-      mutex_unlock(&priv->state_lock);
+       free_cpumask_var(priv->scratchpad.cpumask);
+       for (i = 0; i < priv->htb_max_qos_sqs; i++)
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-htb-fix-inconsistencies-with-qos-sqs-numbe.patch b/queue-6.6/net-mlx5e-htb-fix-inconsistencies-with-qos-sqs-numbe.patch
new file mode 100644 (file)
index 0000000..a176661
--- /dev/null
@@ -0,0 +1,83 @@
+From 6c4a95237b78a4ebdae92cb2f4575620d6b49495 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 22:08:16 +0300
+Subject: net/mlx5e: HTB, Fix inconsistencies with QoS SQs number
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit 2f436f1869771d46e1a9f85738d5a1a7c5653a4e ]
+
+When creating a new HTB class while the interface is down,
+the variable that follows the number of QoS SQs (htb_max_qos_sqs)
+may not be consistent with the number of HTB classes.
+
+Previously, we compared these two values to ensure that
+the node_qid is lower than the number of QoS SQs, and we
+allocated stats for that SQ when they are equal.
+
+Change the check to compare the node_qid with the current
+number of leaf nodes and fix the checking conditions to
+ensure allocation of stats_list and stats for each node.
+
+Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://lore.kernel.org/r/20240409190820.227554-9-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en/qos.c  | 33 ++++++++++---------
+ 1 file changed, 17 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+index 244bc15a42abf..d9acc37afe1c8 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+@@ -82,24 +82,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+       txq_ix = mlx5e_qid_from_qos(chs, node_qid);
+-      WARN_ON(node_qid > priv->htb_max_qos_sqs);
+-      if (node_qid == priv->htb_max_qos_sqs) {
+-              struct mlx5e_sq_stats *stats, **stats_list = NULL;
+-
+-              if (priv->htb_max_qos_sqs == 0) {
+-                      stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+-                                            sizeof(*stats_list),
+-                                            GFP_KERNEL);
+-                      if (!stats_list)
+-                              return -ENOMEM;
+-              }
++      WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb));
++      if (!priv->htb_qos_sq_stats) {
++              struct mlx5e_sq_stats **stats_list;
++
++              stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
++                                    sizeof(*stats_list), GFP_KERNEL);
++              if (!stats_list)
++                      return -ENOMEM;
++
++              WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
++      }
++
++      if (!priv->htb_qos_sq_stats[node_qid]) {
++              struct mlx5e_sq_stats *stats;
++
+               stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+-              if (!stats) {
+-                      kvfree(stats_list);
++              if (!stats)
+                       return -ENOMEM;
+-              }
+-              if (stats_list)
+-                      WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
++
+               WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
+               /* Order htb_max_qos_sqs increment after writing the array pointer.
+                * Pairs with smp_load_acquire in en_stats.c.
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-openvswitch-fix-unwanted-error-log-on-timeout-po.patch b/queue-6.6/net-openvswitch-fix-unwanted-error-log-on-timeout-po.patch
new file mode 100644 (file)
index 0000000..3a00095
--- /dev/null
@@ -0,0 +1,60 @@
+From 3189f562e3a6163dafb4984a97f63c1d514d5a7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Apr 2024 22:38:01 +0200
+Subject: net: openvswitch: fix unwanted error log on timeout policy probing
+
+From: Ilya Maximets <i.maximets@ovn.org>
+
+[ Upstream commit 4539f91f2a801c0c028c252bffae56030cfb2cae ]
+
+On startup, ovs-vswitchd probes different datapath features including
+support for timeout policies.  While probing, it tries to execute
+certain operations with OVS_PACKET_ATTR_PROBE or OVS_FLOW_ATTR_PROBE
+attributes set.  These attributes tell the openvswitch module to not
+log any errors when they occur as it is expected that some of the
+probes will fail.
+
+For some reason, setting the timeout policy ignores the PROBE attribute
+and logs a failure anyway.  This is causing the following kernel log
+on each re-start of ovs-vswitchd:
+
+  kernel: Failed to associated timeout policy `ovs_test_tp'
+
+Fix that by using the same logging macro that all other messages are
+using.  The message will still be printed at info level when needed
+and will be rate limited, but with a net rate limiter instead of
+generic printk one.
+
+The nf_ct_set_timeout() itself will still print some info messages,
+but at least this change makes logging in openvswitch module more
+consistent.
+
+Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action")
+Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
+Acked-by: Eelco Chaudron <echaudro@redhat.com>
+Link: https://lore.kernel.org/r/20240403203803.2137962-1-i.maximets@ovn.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/openvswitch/conntrack.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
+index 3019a4406ca4f..74b63cdb59923 100644
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -1380,8 +1380,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
+       if (ct_info.timeout[0]) {
+               if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
+                                     ct_info.timeout))
+-                      pr_info_ratelimited("Failed to associated timeout "
+-                                          "policy `%s'\n", ct_info.timeout);
++                      OVS_NLERR(log,
++                                "Failed to associated timeout policy '%s'",
++                                ct_info.timeout);
+               else
+                       ct_info.nf_ct_timeout = rcu_dereference(
+                               nf_ct_timeout_find(ct_info.ct)->timeout);
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-sparx5-fix-wrong-config-being-used-when-reconfig.patch b/queue-6.6/net-sparx5-fix-wrong-config-being-used-when-reconfig.patch
new file mode 100644 (file)
index 0000000..4464f7b
--- /dev/null
@@ -0,0 +1,47 @@
+From 7b27272df5a59b34fcc3e96cbdf12becdce7e0ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 12:41:59 +0200
+Subject: net: sparx5: fix wrong config being used when reconfiguring PCS
+
+From: Daniel Machon <daniel.machon@microchip.com>
+
+[ Upstream commit 33623113a48ea906f1955cbf71094f6aa4462e8f ]
+
+The wrong port config is being used if the PCS is reconfigured. Fix this
+by correctly using the new config instead of the old one.
+
+Fixes: 946e7fd5053a ("net: sparx5: add port module support")
+Signed-off-by: Daniel Machon <daniel.machon@microchip.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240409-link-mode-reconfiguration-fix-v2-1-db6a507f3627@microchip.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/microchip/sparx5/sparx5_port.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
+index 3a1b1a1f5a195..60dd2fd603a85 100644
+--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
+@@ -731,7 +731,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5,
+       bool sgmii = false, inband_aneg = false;
+       int err;
+-      if (port->conf.inband) {
++      if (conf->inband) {
+               if (conf->portmode == PHY_INTERFACE_MODE_SGMII ||
+                   conf->portmode == PHY_INTERFACE_MODE_QSGMII)
+                       inband_aneg = true; /* Cisco-SGMII in-band-aneg */
+@@ -948,7 +948,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5,
+       if (err)
+               return -EINVAL;
+-      if (port->conf.inband) {
++      if (conf->inband) {
+               /* Enable/disable 1G counters in ASM */
+               spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev),
+                        ASM_PORT_CFG_CSC_STAT_DIS,
+-- 
+2.43.0
+
diff --git a/queue-6.6/netfilter-complete-validation-of-user-input.patch b/queue-6.6/netfilter-complete-validation-of-user-input.patch
new file mode 100644 (file)
index 0000000..fca91a6
--- /dev/null
@@ -0,0 +1,102 @@
+From e6d38f8383d6de74284a9163679e3df5cca0c7e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 12:07:41 +0000
+Subject: netfilter: complete validation of user input
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 65acf6e0501ac8880a4f73980d01b5d27648b956 ]
+
+In my recent commit, I missed that do_replace() handlers
+use copy_from_sockptr() (which I fixed), followed
+by unsafe copy_from_sockptr_offset() calls.
+
+In all functions, we can perform the @optlen validation
+before even calling xt_alloc_table_info() with the following
+check:
+
+if ((u64)optlen < (u64)tmp.size + sizeof(tmp))
+        return -EINVAL;
+
+Fixes: 0c83842df40f ("netfilter: validate user input for expected length")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Link: https://lore.kernel.org/r/20240409120741.3538135-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/netfilter/arp_tables.c | 4 ++++
+ net/ipv4/netfilter/ip_tables.c  | 4 ++++
+ net/ipv6/netfilter/ip6_tables.c | 4 ++++
+ 3 files changed, 12 insertions(+)
+
+diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
+index b150c9929b12e..14365b20f1c5c 100644
+--- a/net/ipv4/netfilter/arp_tables.c
++++ b/net/ipv4/netfilter/arp_tables.c
+@@ -966,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
+               return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
++      if ((u64)len < (u64)tmp.size + sizeof(tmp))
++              return -EINVAL;
+       tmp.name[sizeof(tmp.name)-1] = 0;
+@@ -1266,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
+               return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
++      if ((u64)len < (u64)tmp.size + sizeof(tmp))
++              return -EINVAL;
+       tmp.name[sizeof(tmp.name)-1] = 0;
+diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
+index 4876707595781..fe89a056eb06c 100644
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -1118,6 +1118,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
+               return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
++      if ((u64)len < (u64)tmp.size + sizeof(tmp))
++              return -EINVAL;
+       tmp.name[sizeof(tmp.name)-1] = 0;
+@@ -1504,6 +1506,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
+               return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
++      if ((u64)len < (u64)tmp.size + sizeof(tmp))
++              return -EINVAL;
+       tmp.name[sizeof(tmp.name)-1] = 0;
+diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
+index 636b360311c53..131f7bb2110d3 100644
+--- a/net/ipv6/netfilter/ip6_tables.c
++++ b/net/ipv6/netfilter/ip6_tables.c
+@@ -1135,6 +1135,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
+               return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
++      if ((u64)len < (u64)tmp.size + sizeof(tmp))
++              return -EINVAL;
+       tmp.name[sizeof(tmp.name)-1] = 0;
+@@ -1513,6 +1515,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
+               return -ENOMEM;
+       if (tmp.num_counters == 0)
+               return -EINVAL;
++      if ((u64)len < (u64)tmp.size + sizeof(tmp))
++              return -EINVAL;
+       tmp.name[sizeof(tmp.name)-1] = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.6/nouveau-fix-function-cast-warning.patch b/queue-6.6/nouveau-fix-function-cast-warning.patch
new file mode 100644 (file)
index 0000000..0ad84a0
--- /dev/null
@@ -0,0 +1,51 @@
+From be70f7f7f6b5e67d42c015104200085fc0a9f3a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Apr 2024 18:02:25 +0200
+Subject: nouveau: fix function cast warning
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 185fdb4697cc9684a02f2fab0530ecdd0c2f15d4 ]
+
+Calling a function through an incompatible pointer type causes breaks
+kcfi, so clang warns about the assignment:
+
+drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c:73:10: error: cast from 'void (*)(const void *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict]
+   73 |         .fini = (void(*)(void *))kfree,
+
+Avoid this with a trivial wrapper.
+
+Fixes: c39f472e9f14 ("drm/nouveau: remove symlinks, move core/ to nvkm/ (no code changes)")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240404160234.2923554-1-arnd@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
+index 4bf486b571013..cb05f7f48a98b 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
+@@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name)
+       return ERR_PTR(-EINVAL);
+ }
++static void of_fini(void *p)
++{
++      kfree(p);
++}
++
+ const struct nvbios_source
+ nvbios_of = {
+       .name = "OpenFirmware",
+       .init = of_init,
+-      .fini = (void(*)(void *))kfree,
++      .fini = of_fini,
+       .read = of_read,
+       .size = of_size,
+       .rw = false,
+-- 
+2.43.0
+
diff --git a/queue-6.6/octeontx2-af-fix-nix-sq-mode-and-bp-config.patch b/queue-6.6/octeontx2-af-fix-nix-sq-mode-and-bp-config.patch
new file mode 100644 (file)
index 0000000..b218c45
--- /dev/null
@@ -0,0 +1,59 @@
+From aba7281a7d953edbf4d1eb72230b0e104d75102a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Apr 2024 12:06:43 +0530
+Subject: octeontx2-af: Fix NIX SQ mode and BP config
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit faf23006185e777db18912685922c5ddb2df383f ]
+
+NIX SQ mode and link backpressure configuration is required for
+all platforms. But in current driver this code is wrongly placed
+under specific platform check. This patch fixes the issue by
+moving the code out of platform check.
+
+Fixes: 5d9b976d4480 ("octeontx2-af: Support fixed transmit scheduler topology")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Link: https://lore.kernel.org/r/20240408063643.26288-1-gakula@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 20 +++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 58744313f0eb6..f6f6d7c04e8bf 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -4465,18 +4465,18 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
+                */
+               rvu_write64(rvu, blkaddr, NIX_AF_CFG,
+                           rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL);
++      }
+-              /* Set chan/link to backpressure TL3 instead of TL2 */
+-              rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
++      /* Set chan/link to backpressure TL3 instead of TL2 */
++      rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
+-              /* Disable SQ manager's sticky mode operation (set TM6 = 0)
+-               * This sticky mode is known to cause SQ stalls when multiple
+-               * SQs are mapped to same SMQ and transmitting pkts at a time.
+-               */
+-              cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
+-              cfg &= ~BIT_ULL(15);
+-              rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
+-      }
++      /* Disable SQ manager's sticky mode operation (set TM6 = 0)
++       * This sticky mode is known to cause SQ stalls when multiple
++       * SQs are mapped to same SMQ and transmitting pkts at a time.
++       */
++      cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
++      cfg &= ~BIT_ULL(15);
++      rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
+       ltdefs = rvu->kpu.lt_def;
+       /* Calibrate X2P bus to check if CGX/LBK links are fine */
+-- 
+2.43.0
+
diff --git a/queue-6.6/octeontx2-pf-fix-transmit-scheduler-resource-leak.patch b/queue-6.6/octeontx2-pf-fix-transmit-scheduler-resource-leak.patch
new file mode 100644 (file)
index 0000000..15765e3
--- /dev/null
@@ -0,0 +1,41 @@
+From dabc19e7cbb134803c12deb8e0129a1bd5d9335c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Apr 2024 16:54:27 +0530
+Subject: octeontx2-pf: Fix transmit scheduler resource leak
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit bccb798e07f8bb8b91212fe8ed1e421685449076 ]
+
+Inorder to support shaping and scheduling, Upon class creation
+Netdev driver allocates trasmit schedulers.
+
+The previous patch which added support for Round robin scheduling has
+a bug due to which driver is not freeing transmit schedulers post
+class deletion.
+
+This patch fixes the same.
+
+Fixes: 47a9656f168a ("octeontx2-pf: htb offload support for Round Robin scheduling")
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/qos.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+index 1e77bbf5d22a1..1723e9912ae07 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+@@ -382,6 +382,7 @@ static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent,
+               otx2_qos_read_txschq_cfg_tl(node, cfg);
+               cnt = cfg->static_node_pos[node->level];
+               cfg->schq_contig_list[node->level][cnt] = node->schq;
++              cfg->schq_index_used[node->level][cnt] = true;
+               cfg->schq_contig[node->level]++;
+               cfg->static_node_pos[node->level]++;
+               otx2_qos_read_txschq_cfg_schq(node, cfg);
+-- 
+2.43.0
+
diff --git a/queue-6.6/revert-drm-qxl-simplify-qxl_fence_wait.patch b/queue-6.6/revert-drm-qxl-simplify-qxl_fence_wait.patch
new file mode 100644 (file)
index 0000000..cd0b05d
--- /dev/null
@@ -0,0 +1,115 @@
+From 1aa0301fd32440f33c1d86ccb9ecee0d95cbee68 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Apr 2024 19:14:48 +0100
+Subject: Revert "drm/qxl: simplify qxl_fence_wait"
+
+From: Alex Constantino <dreaming.about.electric.sheep@gmail.com>
+
+[ Upstream commit 07ed11afb68d94eadd4ffc082b97c2331307c5ea ]
+
+This reverts commit 5a838e5d5825c85556011478abde708251cc0776.
+
+Changes from commit 5a838e5d5825 ("drm/qxl: simplify qxl_fence_wait") would
+result in a '[TTM] Buffer eviction failed' exception whenever it reached a
+timeout.
+Due to a dependency to DMA_FENCE_WARN this also restores some code deleted
+by commit d72277b6c37d ("dma-buf: nuke DMA_FENCE_TRACE macros v2").
+
+Fixes: 5a838e5d5825 ("drm/qxl: simplify qxl_fence_wait")
+Link: https://lore.kernel.org/regressions/ZTgydqRlK6WX_b29@eldamar.lan/
+Reported-by: Timo Lindfors <timo.lindfors@iki.fi>
+Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1054514
+Signed-off-by: Alex Constantino <dreaming.about.electric.sheep@gmail.com>
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240404181448.1643-2-dreaming.about.electric.sheep@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/qxl/qxl_release.c | 50 +++++++++++++++++++++++++++----
+ include/linux/dma-fence.h         |  7 +++++
+ 2 files changed, 52 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
+index 368d26da0d6a2..9febc8b73f09e 100644
+--- a/drivers/gpu/drm/qxl/qxl_release.c
++++ b/drivers/gpu/drm/qxl/qxl_release.c
+@@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr,
+                          signed long timeout)
+ {
+       struct qxl_device *qdev;
++      struct qxl_release *release;
++      int count = 0, sc = 0;
++      bool have_drawable_releases;
+       unsigned long cur, end = jiffies + timeout;
+       qdev = container_of(fence->lock, struct qxl_device, release_lock);
++      release = container_of(fence, struct qxl_release, base);
++      have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
+-      if (!wait_event_timeout(qdev->release_event,
+-                              (dma_fence_is_signaled(fence) ||
+-                               (qxl_io_notify_oom(qdev), 0)),
+-                              timeout))
+-              return 0;
++retry:
++      sc++;
++
++      if (dma_fence_is_signaled(fence))
++              goto signaled;
++
++      qxl_io_notify_oom(qdev);
++
++      for (count = 0; count < 11; count++) {
++              if (!qxl_queue_garbage_collect(qdev, true))
++                      break;
++
++              if (dma_fence_is_signaled(fence))
++                      goto signaled;
++      }
++
++      if (dma_fence_is_signaled(fence))
++              goto signaled;
++
++      if (have_drawable_releases || sc < 4) {
++              if (sc > 2)
++                      /* back off */
++                      usleep_range(500, 1000);
++
++              if (time_after(jiffies, end))
++                      return 0;
++
++              if (have_drawable_releases && sc > 300) {
++                      DMA_FENCE_WARN(fence,
++                                     "failed to wait on release %llu after spincount %d\n",
++                                     fence->context & ~0xf0000000, sc);
++                      goto signaled;
++              }
++              goto retry;
++      }
++      /*
++       * yeah, original sync_obj_wait gave up after 3 spins when
++       * have_drawable_releases is not set.
++       */
++signaled:
+       cur = jiffies;
+       if (time_after(cur, end))
+               return 0;
+diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
+index b3772edca2e6e..fd4a823ce3cdb 100644
+--- a/include/linux/dma-fence.h
++++ b/include/linux/dma-fence.h
+@@ -681,4 +681,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence)
+       return dma_fence_is_array(fence) || dma_fence_is_chain(fence);
+ }
++#define DMA_FENCE_WARN(f, fmt, args...) \
++      do {                                                            \
++              struct dma_fence *__ff = (f);                           \
++              pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\
++                       ##args);                                       \
++      } while (0)
++
+ #endif /* __LINUX_DMA_FENCE_H */
+-- 
+2.43.0
+
diff --git a/queue-6.6/revert-s390-ism-fix-receive-message-buffer-allocatio.patch b/queue-6.6/revert-s390-ism-fix-receive-message-buffer-allocatio.patch
new file mode 100644 (file)
index 0000000..c41c4ac
--- /dev/null
@@ -0,0 +1,94 @@
+From 24359c768c89d3176bf7dcbcdcc4d3111846191c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 13:37:53 +0200
+Subject: Revert "s390/ism: fix receive message buffer allocation"
+
+From: Gerd Bayer <gbayer@linux.ibm.com>
+
+[ Upstream commit d51dc8dd6ab6f93a894ff8b38d3b8d02c98eb9fb ]
+
+This reverts commit 58effa3476536215530c9ec4910ffc981613b413.
+Review was not finished on this patch. So it's not ready for
+upstreaming.
+
+Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
+Link: https://lore.kernel.org/r/20240409113753.2181368-1-gbayer@linux.ibm.com
+Fixes: 58effa347653 ("s390/ism: fix receive message buffer allocation")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/s390/net/ism_drv.c | 38 +++++++++-----------------------------
+ 1 file changed, 9 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
+index 477d7b003c3f0..81aabbfbbe2ca 100644
+--- a/drivers/s390/net/ism_drv.c
++++ b/drivers/s390/net/ism_drv.c
+@@ -14,8 +14,6 @@
+ #include <linux/err.h>
+ #include <linux/ctype.h>
+ #include <linux/processor.h>
+-#include <linux/dma-mapping.h>
+-#include <linux/mm.h>
+ #include "ism.h"
+@@ -293,15 +291,13 @@ static int ism_read_local_gid(struct ism_dev *ism)
+ static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
+ {
+       clear_bit(dmb->sba_idx, ism->sba_bitmap);
+-      dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len,
+-                     DMA_FROM_DEVICE);
+-      folio_put(virt_to_folio(dmb->cpu_addr));
++      dma_free_coherent(&ism->pdev->dev, dmb->dmb_len,
++                        dmb->cpu_addr, dmb->dma_addr);
+ }
+ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
+ {
+       unsigned long bit;
+-      int rc;
+       if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev))
+               return -EINVAL;
+@@ -318,30 +314,14 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
+           test_and_set_bit(dmb->sba_idx, ism->sba_bitmap))
+               return -EINVAL;
+-      dmb->cpu_addr =
+-              folio_address(folio_alloc(GFP_KERNEL | __GFP_NOWARN |
+-                                        __GFP_NOMEMALLOC | __GFP_NORETRY,
+-                                        get_order(dmb->dmb_len)));
++      dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
++                                         &dmb->dma_addr,
++                                         GFP_KERNEL | __GFP_NOWARN |
++                                         __GFP_NOMEMALLOC | __GFP_NORETRY);
++      if (!dmb->cpu_addr)
++              clear_bit(dmb->sba_idx, ism->sba_bitmap);
+-      if (!dmb->cpu_addr) {
+-              rc = -ENOMEM;
+-              goto out_bit;
+-      }
+-      dmb->dma_addr = dma_map_page(&ism->pdev->dev,
+-                                   virt_to_page(dmb->cpu_addr), 0,
+-                                   dmb->dmb_len, DMA_FROM_DEVICE);
+-      if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) {
+-              rc = -ENOMEM;
+-              goto out_free;
+-      }
+-
+-      return 0;
+-
+-out_free:
+-      kfree(dmb->cpu_addr);
+-out_bit:
+-      clear_bit(dmb->sba_idx, ism->sba_bitmap);
+-      return rc;
++      return dmb->cpu_addr ? 0 : -ENOMEM;
+ }
+ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
+-- 
+2.43.0
+
diff --git a/queue-6.6/s390-ism-fix-receive-message-buffer-allocation.patch b/queue-6.6/s390-ism-fix-receive-message-buffer-allocation.patch
new file mode 100644 (file)
index 0000000..0ee270d
--- /dev/null
@@ -0,0 +1,110 @@
+From 2863f58d750f29312a75cd9c9436712e13137322 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 13:16:06 +0200
+Subject: s390/ism: fix receive message buffer allocation
+
+From: Gerd Bayer <gbayer@linux.ibm.com>
+
+[ Upstream commit 58effa3476536215530c9ec4910ffc981613b413 ]
+
+Since [1], dma_alloc_coherent() does not accept requests for GFP_COMP
+anymore, even on archs that may be able to fulfill this. Functionality that
+relied on the receive buffer being a compound page broke at that point:
+The SMC-D protocol, that utilizes the ism device driver, passes receive
+buffers to the splice processor in a struct splice_pipe_desc with a
+single entry list of struct pages. As the buffer is no longer a compound
+page, the splice processor now rejects requests to handle more than a
+page worth of data.
+
+Replace dma_alloc_coherent() and allocate a buffer with folio_alloc and
+create a DMA map for it with dma_map_page(). Since only receive buffers
+on ISM devices use DMA, qualify the mapping as FROM_DEVICE.
+Since ISM devices are available on arch s390, only and on that arch all
+DMA is coherent, there is no need to introduce and export some kind of
+dma_sync_to_cpu() method to be called by the SMC-D protocol layer.
+
+Analogously, replace dma_free_coherent by a two step dma_unmap_page,
+then folio_put to free the receive buffer.
+
+[1] https://lore.kernel.org/all/20221113163535.884299-1-hch@lst.de/
+
+Fixes: c08004eede4b ("s390/ism: don't pass bogus GFP_ flags to dma_alloc_coherent")
+Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/s390/net/ism_drv.c | 38 +++++++++++++++++++++++++++++---------
+ 1 file changed, 29 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
+index 81aabbfbbe2ca..477d7b003c3f0 100644
+--- a/drivers/s390/net/ism_drv.c
++++ b/drivers/s390/net/ism_drv.c
+@@ -14,6 +14,8 @@
+ #include <linux/err.h>
+ #include <linux/ctype.h>
+ #include <linux/processor.h>
++#include <linux/dma-mapping.h>
++#include <linux/mm.h>
+ #include "ism.h"
+@@ -291,13 +293,15 @@ static int ism_read_local_gid(struct ism_dev *ism)
+ static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
+ {
+       clear_bit(dmb->sba_idx, ism->sba_bitmap);
+-      dma_free_coherent(&ism->pdev->dev, dmb->dmb_len,
+-                        dmb->cpu_addr, dmb->dma_addr);
++      dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len,
++                     DMA_FROM_DEVICE);
++      folio_put(virt_to_folio(dmb->cpu_addr));
+ }
+ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
+ {
+       unsigned long bit;
++      int rc;
+       if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev))
+               return -EINVAL;
+@@ -314,14 +318,30 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
+           test_and_set_bit(dmb->sba_idx, ism->sba_bitmap))
+               return -EINVAL;
+-      dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
+-                                         &dmb->dma_addr,
+-                                         GFP_KERNEL | __GFP_NOWARN |
+-                                         __GFP_NOMEMALLOC | __GFP_NORETRY);
+-      if (!dmb->cpu_addr)
+-              clear_bit(dmb->sba_idx, ism->sba_bitmap);
++      dmb->cpu_addr =
++              folio_address(folio_alloc(GFP_KERNEL | __GFP_NOWARN |
++                                        __GFP_NOMEMALLOC | __GFP_NORETRY,
++                                        get_order(dmb->dmb_len)));
+-      return dmb->cpu_addr ? 0 : -ENOMEM;
++      if (!dmb->cpu_addr) {
++              rc = -ENOMEM;
++              goto out_bit;
++      }
++      dmb->dma_addr = dma_map_page(&ism->pdev->dev,
++                                   virt_to_page(dmb->cpu_addr), 0,
++                                   dmb->dmb_len, DMA_FROM_DEVICE);
++      if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) {
++              rc = -ENOMEM;
++              goto out_free;
++      }
++
++      return 0;
++
++out_free:
++      kfree(dmb->cpu_addr);
++out_bit:
++      clear_bit(dmb->sba_idx, ism->sba_bitmap);
++      return rc;
+ }
+ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
+-- 
+2.43.0
+
diff --git a/queue-6.6/scsi-hisi_sas-modify-the-deadline-for-ata_wait_after.patch b/queue-6.6/scsi-hisi_sas-modify-the-deadline-for-ata_wait_after.patch
new file mode 100644 (file)
index 0000000..db1df47
--- /dev/null
@@ -0,0 +1,43 @@
+From 5f30307dc246484c8973d309cf9c1a6cb328ecf7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Apr 2024 11:55:13 +0800
+Subject: scsi: hisi_sas: Modify the deadline for ata_wait_after_reset()
+
+From: Xiang Chen <chenxiang66@hisilicon.com>
+
+[ Upstream commit 0098c55e0881f0b32591f2110410d5c8b7f9bd5a ]
+
+We found that the second parameter of function ata_wait_after_reset() is
+incorrectly used. We call smp_ata_check_ready_type() to poll the device
+type until the 30s timeout, so the correct deadline should be (jiffies +
+30000).
+
+Fixes: 3c2673a09cf1 ("scsi: hisi_sas: Fix SATA devices missing issue during I_T nexus reset")
+Co-developed-by: xiabing <xiabing12@h-partners.com>
+Signed-off-by: xiabing <xiabing12@h-partners.com>
+Co-developed-by: Yihang Li <liyihang9@huawei.com>
+Signed-off-by: Yihang Li <liyihang9@huawei.com>
+Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
+Link: https://lore.kernel.org/r/20240402035513.2024241-3-chenxiang66@hisilicon.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/hisi_sas/hisi_sas_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
+index b155ac800979c..e4363b8c6ad26 100644
+--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
++++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
+@@ -1792,7 +1792,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
+       if (dev_is_sata(device)) {
+               struct ata_link *link = &device->sata_dev.ap->link;
+-              rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT,
++              rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT,
+                                         smp_ata_check_ready_type);
+       } else {
+               msleep(2000);
+-- 
+2.43.0
+
diff --git a/queue-6.6/scsi-qla2xxx-fix-off-by-one-in-qla_edif_app_getstats.patch b/queue-6.6/scsi-qla2xxx-fix-off-by-one-in-qla_edif_app_getstats.patch
new file mode 100644 (file)
index 0000000..503369e
--- /dev/null
@@ -0,0 +1,39 @@
+From 5ba66c2ea6876f93a365c2569cdafaeb5e28a3bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Apr 2024 12:56:54 +0300
+Subject: scsi: qla2xxx: Fix off by one in qla_edif_app_getstats()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit 4406e4176f47177f5e51b4cc7e6a7a2ff3dbfbbd ]
+
+The app_reply->elem[] array is allocated earlier in this function and it
+has app_req.num_ports elements.  Thus this > comparison needs to be >= to
+prevent memory corruption.
+
+Fixes: 7878f22a2e03 ("scsi: qla2xxx: edif: Add getfcinfo and statistic bsgs")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Link: https://lore.kernel.org/r/5c125b2f-92dd-412b-9b6f-fc3a3207bd60@moroto.mountain
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_edif.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c
+index 26e6b3e3af431..dcde55c8ee5de 100644
+--- a/drivers/scsi/qla2xxx/qla_edif.c
++++ b/drivers/scsi/qla2xxx/qla_edif.c
+@@ -1100,7 +1100,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
+               list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+                       if (fcport->edif.enable) {
+-                              if (pcnt > app_req.num_ports)
++                              if (pcnt >= app_req.num_ports)
+                                       break;
+                               app_reply->elem[pcnt].rekey_count =
+-- 
+2.43.0
+
index 74aa647d23667cff84e1d96f985609ab67994e84..b7b7149da93f191a9541242e92dbe8bae6728df6 100644 (file)
@@ -12,3 +12,67 @@ platform-chrome-cros_ec_uart-properly-fix-race-condition.patch
 acpi-scan-do-not-increase-dep_unmet-for-already-met-dependencies.patch
 pm-s2idle-make-sure-cpus-will-wakeup-directly-on-resume.patch
 media-cec-core-remove-length-check-of-timer-status.patch
+arm-omap2-fix-bogus-mmc-gpio-labels-on-nokia-n8x0.patch
+arm-omap2-fix-n810-mmc-gpiod-table.patch
+mmc-omap-fix-broken-slot-switch-lookup.patch
+mmc-omap-fix-deferred-probe.patch
+mmc-omap-restore-original-power-up-down-steps.patch
+arm-omap2-fix-usb-regression-on-nokia-n8x0.patch
+firmware-arm_scmi-make-raw-debugfs-entries-non-seeka.patch
+cxl-mem-fix-for-the-index-of-clear-event-record-hand.patch
+cxl-core-regs-fix-usage-of-map-reg_type-in-cxl_decod.patch
+drm-msm-dpu-don-t-allow-overriding-data-from-catalog.patch
+arm64-dts-imx8-ss-conn-fix-usdhc-wrong-lpcg-clock-or.patch
+cxl-core-fix-initialization-of-mbox_cmd.size_out-in-.patch
+revert-drm-qxl-simplify-qxl_fence_wait.patch
+nouveau-fix-function-cast-warning.patch
+scsi-hisi_sas-modify-the-deadline-for-ata_wait_after.patch
+scsi-qla2xxx-fix-off-by-one-in-qla_edif_app_getstats.patch
+net-openvswitch-fix-unwanted-error-log-on-timeout-po.patch
+u64_stats-fix-u64_stats_init-for-lockdep-when-used-r.patch
+xsk-validate-user-input-for-xdp_-umem-completion-_fi.patch
+octeontx2-pf-fix-transmit-scheduler-resource-leak.patch
+block-fix-q-blkg_list-corruption-during-disk-rebind.patch
+geneve-fix-header-validation-in-geneve-6-_xmit_skb.patch
+s390-ism-fix-receive-message-buffer-allocation.patch
+bnxt_en-fix-possible-memory-leak-in-bnxt_rdma_aux_de.patch
+bnxt_en-fix-error-recovery-for-roce-ulp-client.patch
+bnxt_en-reset-ptp-tx_avail-after-possible-firmware-r.patch
+net-ks8851-inline-ks8851_rx_skb.patch
+net-ks8851-handle-softirqs-at-the-end-of-irq-thread-.patch
+af_unix-clear-stale-u-oob_skb.patch
+octeontx2-af-fix-nix-sq-mode-and-bp-config.patch
+ipv6-fib-hide-unused-pn-variable.patch
+ipv4-route-avoid-unused-but-set-variable-warning.patch
+ipv6-fix-race-condition-between-ipv6_get_ifaddr-and-.patch
+bluetooth-iso-align-broadcast-sync_timeout-with-conn.patch
+bluetooth-iso-don-t-reject-bt_iso_qos-if-parameters-.patch
+bluetooth-hci_sync-use-qos-to-determine-which-phy-to.patch
+bluetooth-hci_sync-fix-using-the-same-interval-and-w.patch
+bluetooth-sco-fix-not-validating-setsockopt-user-inp.patch
+bluetooth-l2cap-don-t-double-set-the-hci_conn_mgmt_c.patch
+netfilter-complete-validation-of-user-input.patch
+net-mlx5-sf-stop-waiting-for-fw-as-teardown-was-call.patch
+net-mlx5-register-devlink-first-under-devlink-lock.patch
+net-mlx5-offset-comp-irq-index-in-name-by-one.patch
+net-mlx5-properly-link-new-fs-rules-into-the-tree.patch
+net-mlx5-correctly-compare-pkt-reformat-ids.patch
+net-mlx5e-fix-mlx5e_priv_init-cleanup-flow.patch
+net-mlx5e-htb-fix-inconsistencies-with-qos-sqs-numbe.patch
+net-mlx5e-do-not-produce-metadata-freelist-entries-i.patch
+net-sparx5-fix-wrong-config-being-used-when-reconfig.patch
+revert-s390-ism-fix-receive-message-buffer-allocatio.patch
+net-dsa-mt7530-trap-link-local-frames-regardless-of-.patch
+af_unix-do-not-use-atomic-ops-for-unix_sk-sk-infligh.patch
+af_unix-fix-garbage-collector-racing-against-connect.patch
+net-ena-fix-potential-sign-extension-issue.patch
+net-ena-wrong-missing-io-completions-check-order.patch
+net-ena-fix-incorrect-descriptor-free-behavior.patch
+net-ena-move-xdp-code-to-its-new-files.patch
+net-ena-pass-ena_adapter-instead-of-net_device-to-en.patch
+net-ena-use-tx_ring-instead-of-xdp_ring-for-xdp-chan.patch
+net-ena-set-tx_info-xdpf-value-to-null.patch
+tracing-fix-ftrace_record_recursion_size-kconfig-ent.patch
+tracing-hide-unused-ftrace_event_id_fops.patch
+iommu-vt-d-fix-wrong-use-of-pasid-config.patch
+iommu-vt-d-allocate-local-memory-for-page-request-qu.patch
diff --git a/queue-6.6/tracing-fix-ftrace_record_recursion_size-kconfig-ent.patch b/queue-6.6/tracing-fix-ftrace_record_recursion_size-kconfig-ent.patch
new file mode 100644 (file)
index 0000000..2d5501c
--- /dev/null
@@ -0,0 +1,42 @@
+From 2258268f9b9c6d58cb1afcf419a659cc4152e8a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Mar 2024 17:48:01 +0530
+Subject: tracing: Fix FTRACE_RECORD_RECURSION_SIZE Kconfig entry
+
+From: Prasad Pandit <pjp@fedoraproject.org>
+
+[ Upstream commit d96c36004e31e2baaf8ea1b449b7d0b2c2bfb41a ]
+
+Fix FTRACE_RECORD_RECURSION_SIZE entry, replace tab with
+a space character. It helps Kconfig parsers to read file
+without error.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240322121801.1803948-1-ppandit@redhat.com
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Fixes: 773c16705058 ("ftrace: Add recording of functions that caused recursion")
+Signed-off-by: Prasad Pandit <pjp@fedoraproject.org>
+Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
+index 61c541c36596d..47345bf1d4a9f 100644
+--- a/kernel/trace/Kconfig
++++ b/kernel/trace/Kconfig
+@@ -965,7 +965,7 @@ config FTRACE_RECORD_RECURSION
+ config FTRACE_RECORD_RECURSION_SIZE
+       int "Max number of recursed functions to record"
+-      default 128
++      default 128
+       depends on FTRACE_RECORD_RECURSION
+       help
+         This defines the limit of number of functions that can be
+-- 
+2.43.0
+
diff --git a/queue-6.6/tracing-hide-unused-ftrace_event_id_fops.patch b/queue-6.6/tracing-hide-unused-ftrace_event_id_fops.patch
new file mode 100644 (file)
index 0000000..f859bef
--- /dev/null
@@ -0,0 +1,76 @@
+From fa7a7f04958a546ece5c48f37071ac5ce539c221 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Apr 2024 10:06:24 +0200
+Subject: tracing: hide unused ftrace_event_id_fops
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 5281ec83454d70d98b71f1836fb16512566c01cd ]
+
+When CONFIG_PERF_EVENTS, a 'make W=1' build produces a warning about the
+unused ftrace_event_id_fops variable:
+
+kernel/trace/trace_events.c:2155:37: error: 'ftrace_event_id_fops' defined but not used [-Werror=unused-const-variable=]
+ 2155 | static const struct file_operations ftrace_event_id_fops = {
+
+Hide this in the same #ifdef as the reference to it.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240403080702.3509288-7-arnd@kernel.org
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Zheng Yejian <zhengyejian1@huawei.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Jinjie Ruan <ruanjinjie@huawei.com>
+Cc: Clément Léger <cleger@rivosinc.com>
+Cc: Dan Carpenter <dan.carpenter@linaro.org>
+Cc: "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com>
+Fixes: 620a30e97feb ("tracing: Don't pass file_operations array to event_create_dir()")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_events.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 941a394d39118..99f1308122866 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -1668,6 +1668,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
+       return 0;
+ }
++#ifdef CONFIG_PERF_EVENTS
+ static ssize_t
+ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+ {
+@@ -1682,6 +1683,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+       return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
+ }
++#endif
+ static ssize_t
+ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
+@@ -2126,10 +2128,12 @@ static const struct file_operations ftrace_event_format_fops = {
+       .release = seq_release,
+ };
++#ifdef CONFIG_PERF_EVENTS
+ static const struct file_operations ftrace_event_id_fops = {
+       .read = event_id_read,
+       .llseek = default_llseek,
+ };
++#endif
+ static const struct file_operations ftrace_event_filter_fops = {
+       .open = tracing_open_file_tr,
+-- 
+2.43.0
+
diff --git a/queue-6.6/u64_stats-fix-u64_stats_init-for-lockdep-when-used-r.patch b/queue-6.6/u64_stats-fix-u64_stats_init-for-lockdep-when-used-r.patch
new file mode 100644 (file)
index 0000000..fa30d9b
--- /dev/null
@@ -0,0 +1,56 @@
+From f55ddb6fdc2f588c0b7c262d4d97f4264ffa30aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Apr 2024 09:57:40 +0200
+Subject: u64_stats: fix u64_stats_init() for lockdep when used repeatedly in
+ one file
+
+From: Petr Tesarik <petr@tesarici.cz>
+
+[ Upstream commit 38a15d0a50e0a43778561a5861403851f0b0194c ]
+
+Fix bogus lockdep warnings if multiple u64_stats_sync variables are
+initialized in the same file.
+
+With CONFIG_LOCKDEP, seqcount_init() is a macro which declares:
+
+       static struct lock_class_key __key;
+
+Since u64_stats_init() is a function (albeit an inline one), all calls
+within the same file end up using the same instance, effectively treating
+them all as a single lock-class.
+
+Fixes: 9464ca650008 ("net: make u64_stats_init() a function")
+Closes: https://lore.kernel.org/netdev/ea1567d9-ce66-45e6-8168-ac40a47d1821@roeck-us.net/
+Signed-off-by: Petr Tesarik <petr@tesarici.cz>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240404075740.30682-1-petr@tesarici.cz
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/u64_stats_sync.h | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
+index ffe48e69b3f3a..457879938fc19 100644
+--- a/include/linux/u64_stats_sync.h
++++ b/include/linux/u64_stats_sync.h
+@@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p)
+       p->v++;
+ }
+-static inline void u64_stats_init(struct u64_stats_sync *syncp)
+-{
+-      seqcount_init(&syncp->seq);
+-}
++#define u64_stats_init(syncp)                         \
++      do {                                            \
++              struct u64_stats_sync *__s = (syncp);   \
++              seqcount_init(&__s->seq);               \
++      } while (0)
+ static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
+ {
+-- 
+2.43.0
+
diff --git a/queue-6.6/xsk-validate-user-input-for-xdp_-umem-completion-_fi.patch b/queue-6.6/xsk-validate-user-input-for-xdp_-umem-completion-_fi.patch
new file mode 100644 (file)
index 0000000..ff89371
--- /dev/null
@@ -0,0 +1,176 @@
+From 86cd2ac2face5ad70967e9f1e3fd8e4e7f6b38ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Apr 2024 20:27:38 +0000
+Subject: xsk: validate user input for XDP_{UMEM|COMPLETION}_FILL_RING
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 237f3cf13b20db183d3706d997eedc3c49eacd44 ]
+
+syzbot reported an illegal copy in xsk_setsockopt() [1]
+
+Make sure to validate setsockopt() @optlen parameter.
+
+[1]
+
+ BUG: KASAN: slab-out-of-bounds in copy_from_sockptr_offset include/linux/sockptr.h:49 [inline]
+ BUG: KASAN: slab-out-of-bounds in copy_from_sockptr include/linux/sockptr.h:55 [inline]
+ BUG: KASAN: slab-out-of-bounds in xsk_setsockopt+0x909/0xa40 net/xdp/xsk.c:1420
+Read of size 4 at addr ffff888028c6cde3 by task syz-executor.0/7549
+
+CPU: 0 PID: 7549 Comm: syz-executor.0 Not tainted 6.8.0-syzkaller-08951-gfe46a7dd189e #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
+Call Trace:
+ <TASK>
+  __dump_stack lib/dump_stack.c:88 [inline]
+  dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114
+  print_address_description mm/kasan/report.c:377 [inline]
+  print_report+0x169/0x550 mm/kasan/report.c:488
+  kasan_report+0x143/0x180 mm/kasan/report.c:601
+  copy_from_sockptr_offset include/linux/sockptr.h:49 [inline]
+  copy_from_sockptr include/linux/sockptr.h:55 [inline]
+  xsk_setsockopt+0x909/0xa40 net/xdp/xsk.c:1420
+  do_sock_setsockopt+0x3af/0x720 net/socket.c:2311
+  __sys_setsockopt+0x1ae/0x250 net/socket.c:2334
+  __do_sys_setsockopt net/socket.c:2343 [inline]
+  __se_sys_setsockopt net/socket.c:2340 [inline]
+  __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340
+ do_syscall_64+0xfb/0x240
+ entry_SYSCALL_64_after_hwframe+0x6d/0x75
+RIP: 0033:0x7fb40587de69
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007fb40665a0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
+RAX: ffffffffffffffda RBX: 00007fb4059abf80 RCX: 00007fb40587de69
+RDX: 0000000000000005 RSI: 000000000000011b RDI: 0000000000000006
+RBP: 00007fb4058ca47a R08: 0000000000000002 R09: 0000000000000000
+R10: 0000000020001980 R11: 0000000000000246 R12: 0000000000000000
+R13: 000000000000000b R14: 00007fb4059abf80 R15: 00007fff57ee4d08
+ </TASK>
+
+Allocated by task 7549:
+  kasan_save_stack mm/kasan/common.c:47 [inline]
+  kasan_save_track+0x3f/0x80 mm/kasan/common.c:68
+  poison_kmalloc_redzone mm/kasan/common.c:370 [inline]
+  __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:387
+  kasan_kmalloc include/linux/kasan.h:211 [inline]
+  __do_kmalloc_node mm/slub.c:3966 [inline]
+  __kmalloc+0x233/0x4a0 mm/slub.c:3979
+  kmalloc include/linux/slab.h:632 [inline]
+  __cgroup_bpf_run_filter_setsockopt+0xd2f/0x1040 kernel/bpf/cgroup.c:1869
+  do_sock_setsockopt+0x6b4/0x720 net/socket.c:2293
+  __sys_setsockopt+0x1ae/0x250 net/socket.c:2334
+  __do_sys_setsockopt net/socket.c:2343 [inline]
+  __se_sys_setsockopt net/socket.c:2340 [inline]
+  __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340
+ do_syscall_64+0xfb/0x240
+ entry_SYSCALL_64_after_hwframe+0x6d/0x75
+
+The buggy address belongs to the object at ffff888028c6cde0
+ which belongs to the cache kmalloc-8 of size 8
+The buggy address is located 1 bytes to the right of
+ allocated 2-byte region [ffff888028c6cde0, ffff888028c6cde2)
+
+The buggy address belongs to the physical page:
+page:ffffea0000a31b00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888028c6c9c0 pfn:0x28c6c
+anon flags: 0xfff00000000800(slab|node=0|zone=1|lastcpupid=0x7ff)
+page_type: 0xffffffff()
+raw: 00fff00000000800 ffff888014c41280 0000000000000000 dead000000000001
+raw: ffff888028c6c9c0 0000000080800057 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+page_owner tracks the page as allocated
+page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112cc0(GFP_USER|__GFP_NOWARN|__GFP_NORETRY), pid 6648, tgid 6644 (syz-executor.0), ts 133906047828, free_ts 133859922223
+  set_page_owner include/linux/page_owner.h:31 [inline]
+  post_alloc_hook+0x1ea/0x210 mm/page_alloc.c:1533
+  prep_new_page mm/page_alloc.c:1540 [inline]
+  get_page_from_freelist+0x33ea/0x3580 mm/page_alloc.c:3311
+  __alloc_pages+0x256/0x680 mm/page_alloc.c:4569
+  __alloc_pages_node include/linux/gfp.h:238 [inline]
+  alloc_pages_node include/linux/gfp.h:261 [inline]
+  alloc_slab_page+0x5f/0x160 mm/slub.c:2175
+  allocate_slab mm/slub.c:2338 [inline]
+  new_slab+0x84/0x2f0 mm/slub.c:2391
+  ___slab_alloc+0xc73/0x1260 mm/slub.c:3525
+  __slab_alloc mm/slub.c:3610 [inline]
+  __slab_alloc_node mm/slub.c:3663 [inline]
+  slab_alloc_node mm/slub.c:3835 [inline]
+  __do_kmalloc_node mm/slub.c:3965 [inline]
+  __kmalloc_node+0x2db/0x4e0 mm/slub.c:3973
+  kmalloc_node include/linux/slab.h:648 [inline]
+  __vmalloc_area_node mm/vmalloc.c:3197 [inline]
+  __vmalloc_node_range+0x5f9/0x14a0 mm/vmalloc.c:3392
+  __vmalloc_node mm/vmalloc.c:3457 [inline]
+  vzalloc+0x79/0x90 mm/vmalloc.c:3530
+  bpf_check+0x260/0x19010 kernel/bpf/verifier.c:21162
+  bpf_prog_load+0x1667/0x20f0 kernel/bpf/syscall.c:2895
+  __sys_bpf+0x4ee/0x810 kernel/bpf/syscall.c:5631
+  __do_sys_bpf kernel/bpf/syscall.c:5738 [inline]
+  __se_sys_bpf kernel/bpf/syscall.c:5736 [inline]
+  __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5736
+ do_syscall_64+0xfb/0x240
+ entry_SYSCALL_64_after_hwframe+0x6d/0x75
+page last free pid 6650 tgid 6647 stack trace:
+  reset_page_owner include/linux/page_owner.h:24 [inline]
+  free_pages_prepare mm/page_alloc.c:1140 [inline]
+  free_unref_page_prepare+0x95d/0xa80 mm/page_alloc.c:2346
+  free_unref_page_list+0x5a3/0x850 mm/page_alloc.c:2532
+  release_pages+0x2117/0x2400 mm/swap.c:1042
+  tlb_batch_pages_flush mm/mmu_gather.c:98 [inline]
+  tlb_flush_mmu_free mm/mmu_gather.c:293 [inline]
+  tlb_flush_mmu+0x34d/0x4e0 mm/mmu_gather.c:300
+  tlb_finish_mmu+0xd4/0x200 mm/mmu_gather.c:392
+  exit_mmap+0x4b6/0xd40 mm/mmap.c:3300
+  __mmput+0x115/0x3c0 kernel/fork.c:1345
+  exit_mm+0x220/0x310 kernel/exit.c:569
+  do_exit+0x99e/0x27e0 kernel/exit.c:865
+  do_group_exit+0x207/0x2c0 kernel/exit.c:1027
+  get_signal+0x176e/0x1850 kernel/signal.c:2907
+  arch_do_signal_or_restart+0x96/0x860 arch/x86/kernel/signal.c:310
+  exit_to_user_mode_loop kernel/entry/common.c:105 [inline]
+  exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline]
+  __syscall_exit_to_user_mode_work kernel/entry/common.c:201 [inline]
+  syscall_exit_to_user_mode+0xc9/0x360 kernel/entry/common.c:212
+  do_syscall_64+0x10a/0x240 arch/x86/entry/common.c:89
+ entry_SYSCALL_64_after_hwframe+0x6d/0x75
+
+Memory state around the buggy address:
+ ffff888028c6cc80: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc
+ ffff888028c6cd00: fa fc fc fc fa fc fc fc 00 fc fc fc 06 fc fc fc
+>ffff888028c6cd80: fa fc fc fc fa fc fc fc fa fc fc fc 02 fc fc fc
+                                                       ^
+ ffff888028c6ce00: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc
+ ffff888028c6ce80: fa fc fc fc fa fc fc fc fa fc fc fc fa fc fc fc
+
+Fixes: 423f38329d26 ("xsk: add umem fill queue support and mmap")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: "Björn Töpel" <bjorn@kernel.org>
+Cc: Magnus Karlsson <magnus.karlsson@intel.com>
+Cc: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/r/20240404202738.3634547-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xdp/xsk.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
+index 2c3ba42bfcdcb..93c802cfb9c6a 100644
+--- a/net/xdp/xsk.c
++++ b/net/xdp/xsk.c
+@@ -1332,6 +1332,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
+               struct xsk_queue **q;
+               int entries;
++              if (optlen < sizeof(entries))
++                      return -EINVAL;
+               if (copy_from_sockptr(&entries, optval, sizeof(entries)))
+                       return -EFAULT;
+-- 
+2.43.0
+