]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.6
authorSasha Levin <sashal@kernel.org>
Sat, 16 Dec 2023 03:36:31 +0000 (22:36 -0500)
committerSasha Levin <sashal@kernel.org>
Sat, 16 Dec 2023 03:36:31 +0000 (22:36 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
51 files changed:
queue-6.6/appletalk-fix-use-after-free-in-atalk_ioctl.patch [new file with mode: 0644]
queue-6.6/atm-fix-use-after-free-in-do_vcc_ioctl.patch [new file with mode: 0644]
queue-6.6/atm-solos-pci-fix-potential-deadlock-on-cli_queue_lo.patch [new file with mode: 0644]
queue-6.6/atm-solos-pci-fix-potential-deadlock-on-tx_queue_loc.patch [new file with mode: 0644]
queue-6.6/bnxt_en-clear-resource-reservation-during-resume.patch [new file with mode: 0644]
queue-6.6/bnxt_en-fix-hwtstamp_filter_all-packet-timestamp-log.patch [new file with mode: 0644]
queue-6.6/bnxt_en-fix-skb-recycling-logic-in-bnxt_deliver_skb.patch [new file with mode: 0644]
queue-6.6/bnxt_en-fix-wrong-return-value-check-in-bnxt_close_n.patch [new file with mode: 0644]
queue-6.6/dpaa2-switch-do-not-ask-for-mdb-vlan-and-fdb-replay.patch [new file with mode: 0644]
queue-6.6/dpaa2-switch-fix-size-of-the-dma_unmap.patch [new file with mode: 0644]
queue-6.6/iavf-fix-iavf_shutdown-to-call-iavf_remove-instead-i.patch [new file with mode: 0644]
queue-6.6/iavf-handle-ntuple-on-off-based-on-new-state-machine.patch [new file with mode: 0644]
queue-6.6/iavf-introduce-new-state-machines-for-flow-director.patch [new file with mode: 0644]
queue-6.6/net-atlantic-fix-double-free-in-ring-reinit-logic.patch [new file with mode: 0644]
queue-6.6/net-ena-destroy-correct-number-of-xdp-queues-upon-fa.patch [new file with mode: 0644]
queue-6.6/net-ena-fix-dma-syncing-in-xdp-path-when-swiotlb-is-.patch [new file with mode: 0644]
queue-6.6/net-ena-fix-xdp-drops-handling-due-to-multibuf-packe.patch [new file with mode: 0644]
queue-6.6/net-ena-fix-xdp-redirection-error.patch [new file with mode: 0644]
queue-6.6/net-fec-correct-queue-selection.patch [new file with mode: 0644]
queue-6.6/net-ipv6-support-reporting-otherwise-unknown-prefix-.patch [new file with mode: 0644]
queue-6.6/net-mlx5-fix-a-null-vs-is_err-check.patch [new file with mode: 0644]
queue-6.6/net-mlx5-nack-sync-reset-request-when-hotplug-is-ena.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-check-netdev-pointer-before-checking-its-n.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-check-the-number-of-elements-before-walk-t.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-disable-ipsec-offload-support-if-not-fw-st.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-ensure-that-ipsec-sequence-packet-number-s.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-fix-possible-deadlock-on-mlx5e_tx_timeout_.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-honor-user-choice-of-ipsec-replay-window-s.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-reduce-eswitch-mode_lock-protection-contex.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-tc-don-t-offload-post-action-rule-if-not-s.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-tidy-up-ipsec-nat-t-sa-discovery.patch [new file with mode: 0644]
queue-6.6/net-mlx5e-unify-esw-and-normal-ipsec-status-table-cr.patch [new file with mode: 0644]
queue-6.6/net-remove-acked-syn-flag-from-packet-in-the-transmi.patch [new file with mode: 0644]
queue-6.6/net-rose-fix-use-after-free-in-rose_ioctl.patch [new file with mode: 0644]
queue-6.6/net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch [new file with mode: 0644]
queue-6.6/net-stmmac-dwmac-qcom-ethqos-fix-drops-in-10m-sgmii-.patch [new file with mode: 0644]
queue-6.6/net-stmmac-handle-disabled-mdio-busses-from-devicetr.patch [new file with mode: 0644]
queue-6.6/octeon_ep-explicitly-test-for-firmware-ready-value.patch [new file with mode: 0644]
queue-6.6/octeontx2-af-fix-a-use-after-free-in-rvu_nix_registe.patch [new file with mode: 0644]
queue-6.6/octeontx2-af-fix-pause-frame-configuration.patch [new file with mode: 0644]
queue-6.6/octeontx2-af-update-rss-algorithm-index.patch [new file with mode: 0644]
queue-6.6/octeontx2-pf-fix-promisc-mcam-entry-action.patch [new file with mode: 0644]
queue-6.6/qca_debug-fix-ethtool-g-iface-tx-behavior.patch [new file with mode: 0644]
queue-6.6/qca_debug-prevent-crash-on-tx-ring-changes.patch [new file with mode: 0644]
queue-6.6/qca_spi-fix-reset-behavior.patch [new file with mode: 0644]
queue-6.6/qed-fix-a-potential-use-after-free-in-qed_cxt_tables.patch [new file with mode: 0644]
queue-6.6/rdma-mlx5-send-events-from-ib-driver-about-device-af.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/sign-file-fix-incorrect-return-values-check.patch [new file with mode: 0644]
queue-6.6/stmmac-dwmac-loongson-make-sure-mdio-is-initialized-.patch [new file with mode: 0644]
queue-6.6/vsock-virtio-fix-unsigned-integer-wrap-around-in-vir.patch [new file with mode: 0644]

diff --git a/queue-6.6/appletalk-fix-use-after-free-in-atalk_ioctl.patch b/queue-6.6/appletalk-fix-use-after-free-in-atalk_ioctl.patch
new file mode 100644 (file)
index 0000000..c780327
--- /dev/null
@@ -0,0 +1,55 @@
+From 51593a249e733948eedc81bfb6b1ea520bc07e77 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Dec 2023 23:10:56 -0500
+Subject: appletalk: Fix Use-After-Free in atalk_ioctl
+
+From: Hyunwoo Kim <v4bel@theori.io>
+
+[ Upstream commit 189ff16722ee36ced4d2a2469d4ab65a8fee4198 ]
+
+Because atalk_ioctl() accesses sk->sk_receive_queue
+without holding a sk->sk_receive_queue.lock, it can
+cause a race with atalk_recvmsg().
+A use-after-free for skb occurs with the following flow.
+```
+atalk_ioctl() -> skb_peek()
+atalk_recvmsg() -> skb_recv_datagram() -> skb_free_datagram()
+```
+Add sk->sk_receive_queue.lock to atalk_ioctl() to fix this issue.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Hyunwoo Kim <v4bel@theori.io>
+Link: https://lore.kernel.org/r/20231213041056.GA519680@v4bel-B760M-AORUS-ELITE-AX
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/appletalk/ddp.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
+index 8978fb6212ffb..b070a89912000 100644
+--- a/net/appletalk/ddp.c
++++ b/net/appletalk/ddp.c
+@@ -1811,15 +1811,14 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+               break;
+       }
+       case TIOCINQ: {
+-              /*
+-               * These two are safe on a single CPU system as only
+-               * user tasks fiddle here
+-               */
+-              struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
++              struct sk_buff *skb;
+               long amount = 0;
++              spin_lock_irq(&sk->sk_receive_queue.lock);
++              skb = skb_peek(&sk->sk_receive_queue);
+               if (skb)
+                       amount = skb->len - sizeof(struct ddpehdr);
++              spin_unlock_irq(&sk->sk_receive_queue.lock);
+               rc = put_user(amount, (int __user *)argp);
+               break;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/atm-fix-use-after-free-in-do_vcc_ioctl.patch b/queue-6.6/atm-fix-use-after-free-in-do_vcc_ioctl.patch
new file mode 100644 (file)
index 0000000..6890052
--- /dev/null
@@ -0,0 +1,55 @@
+From bc174b4d9ab9de29fd1e8328cc52e33a61be4244 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Dec 2023 04:42:10 -0500
+Subject: atm: Fix Use-After-Free in do_vcc_ioctl
+
+From: Hyunwoo Kim <v4bel@theori.io>
+
+[ Upstream commit 24e90b9e34f9e039f56b5f25f6e6eb92cdd8f4b3 ]
+
+Because do_vcc_ioctl() accesses sk->sk_receive_queue
+without holding a sk->sk_receive_queue.lock, it can
+cause a race with vcc_recvmsg().
+A use-after-free for skb occurs with the following flow.
+```
+do_vcc_ioctl() -> skb_peek()
+vcc_recvmsg() -> skb_recv_datagram() -> skb_free_datagram()
+```
+Add sk->sk_receive_queue.lock to do_vcc_ioctl() to fix this issue.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Hyunwoo Kim <v4bel@theori.io>
+Link: https://lore.kernel.org/r/20231209094210.GA403126@v4bel-B760M-AORUS-ELITE-AX
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/atm/ioctl.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
+index 838ebf0cabbfb..f81f8d56f5c0c 100644
+--- a/net/atm/ioctl.c
++++ b/net/atm/ioctl.c
+@@ -73,14 +73,17 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
+       case SIOCINQ:
+       {
+               struct sk_buff *skb;
++              int amount;
+               if (sock->state != SS_CONNECTED) {
+                       error = -EINVAL;
+                       goto done;
+               }
++              spin_lock_irq(&sk->sk_receive_queue.lock);
+               skb = skb_peek(&sk->sk_receive_queue);
+-              error = put_user(skb ? skb->len : 0,
+-                               (int __user *)argp) ? -EFAULT : 0;
++              amount = skb ? skb->len : 0;
++              spin_unlock_irq(&sk->sk_receive_queue.lock);
++              error = put_user(amount, (int __user *)argp) ? -EFAULT : 0;
+               goto done;
+       }
+       case ATM_SETSC:
+-- 
+2.43.0
+
diff --git a/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-cli_queue_lo.patch b/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-cli_queue_lo.patch
new file mode 100644 (file)
index 0000000..330be97
--- /dev/null
@@ -0,0 +1,55 @@
+From 4a6feb340d4952edc01932d371baeb65dfdfdae9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 12:34:37 +0000
+Subject: atm: solos-pci: Fix potential deadlock on &cli_queue_lock
+
+From: Chengfeng Ye <dg573847474@gmail.com>
+
+[ Upstream commit d5dba32b8f6cb39be708b726044ba30dbc088b30 ]
+
+As &card->cli_queue_lock is acquired under softirq context along the
+following call chain from solos_bh(), other acquisition of the same
+lock inside process context should disable at least bh to avoid double
+lock.
+
+<deadlock #1>
+console_show()
+--> spin_lock(&card->cli_queue_lock)
+<interrupt>
+   --> solos_bh()
+   --> spin_lock(&card->cli_queue_lock)
+
+This flaw was found by an experimental static analysis tool I am
+developing for irq-related deadlock.
+
+To prevent the potential deadlock, the patch uses spin_lock_bh()
+on the card->cli_queue_lock under process context code consistently
+to prevent the possible deadlock scenario.
+
+Fixes: 9c54004ea717 ("atm: Driver for Solos PCI ADSL2+ card.")
+Signed-off-by: Chengfeng Ye <dg573847474@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/atm/solos-pci.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
+index 94fbc3abe60e6..95f768b28a5e6 100644
+--- a/drivers/atm/solos-pci.c
++++ b/drivers/atm/solos-pci.c
+@@ -449,9 +449,9 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr,
+       struct sk_buff *skb;
+       unsigned int len;
+-      spin_lock(&card->cli_queue_lock);
++      spin_lock_bh(&card->cli_queue_lock);
+       skb = skb_dequeue(&card->cli_queue[SOLOS_CHAN(atmdev)]);
+-      spin_unlock(&card->cli_queue_lock);
++      spin_unlock_bh(&card->cli_queue_lock);
+       if(skb == NULL)
+               return sprintf(buf, "No data.\n");
+-- 
+2.43.0
+
diff --git a/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-tx_queue_loc.patch b/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-tx_queue_loc.patch
new file mode 100644 (file)
index 0000000..09996ab
--- /dev/null
@@ -0,0 +1,61 @@
+From 398deac409c3f92bd3728297a28093f164c8aa2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 12:34:53 +0000
+Subject: atm: solos-pci: Fix potential deadlock on &tx_queue_lock
+
+From: Chengfeng Ye <dg573847474@gmail.com>
+
+[ Upstream commit 15319a4e8ee4b098118591c6ccbd17237f841613 ]
+
+As &card->tx_queue_lock is acquired under softirq context along the
+following call chain from solos_bh(), other acquisition of the same
+lock inside process context should disable at least bh to avoid double
+lock.
+
+<deadlock #2>
+pclose()
+--> spin_lock(&card->tx_queue_lock)
+<interrupt>
+   --> solos_bh()
+   --> fpga_tx()
+   --> spin_lock(&card->tx_queue_lock)
+
+This flaw was found by an experimental static analysis tool I am
+developing for irq-related deadlock.
+
+To prevent the potential deadlock, the patch uses spin_lock_bh()
+on &card->tx_queue_lock under process context code consistently to
+prevent the possible deadlock scenario.
+
+Fixes: 213e85d38912 ("solos-pci: clean up pclose() function")
+Signed-off-by: Chengfeng Ye <dg573847474@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/atm/solos-pci.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
+index 95f768b28a5e6..d3c30a28c410e 100644
+--- a/drivers/atm/solos-pci.c
++++ b/drivers/atm/solos-pci.c
+@@ -956,14 +956,14 @@ static void pclose(struct atm_vcc *vcc)
+       struct pkt_hdr *header;
+       /* Remove any yet-to-be-transmitted packets from the pending queue */
+-      spin_lock(&card->tx_queue_lock);
++      spin_lock_bh(&card->tx_queue_lock);
+       skb_queue_walk_safe(&card->tx_queue[port], skb, tmpskb) {
+               if (SKB_CB(skb)->vcc == vcc) {
+                       skb_unlink(skb, &card->tx_queue[port]);
+                       solos_pop(vcc, skb);
+               }
+       }
+-      spin_unlock(&card->tx_queue_lock);
++      spin_unlock_bh(&card->tx_queue_lock);
+       skb = alloc_skb(sizeof(*header), GFP_KERNEL);
+       if (!skb) {
+-- 
+2.43.0
+
diff --git a/queue-6.6/bnxt_en-clear-resource-reservation-during-resume.patch b/queue-6.6/bnxt_en-clear-resource-reservation-during-resume.patch
new file mode 100644 (file)
index 0000000..5aab621
--- /dev/null
@@ -0,0 +1,46 @@
+From 841064165fa0acf401485f07cddea8f33877d0f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 16:16:55 -0800
+Subject: bnxt_en: Clear resource reservation during resume
+
+From: Somnath Kotur <somnath.kotur@broadcom.com>
+
+[ Upstream commit 9ef7c58f5abe41e6d91f37f28fe2d851ffedd92a ]
+
+We are issuing HWRM_FUNC_RESET cmd to reset the device including
+all reserved resources, but not clearing the reservations
+within the driver struct. As a result, when the driver re-initializes
+as part of resume, it believes that there is no need to do any
+resource reservation and goes ahead and tries to allocate rings
+which will eventually fail beyond a certain number pre-reserved by
+the firmware.
+
+Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.")
+Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20231208001658.14230-2-michael.chan@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 7551aa8068f8f..4d2296f201adb 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -13897,6 +13897,8 @@ static int bnxt_resume(struct device *device)
+       if (rc)
+               goto resume_exit;
++      bnxt_clear_reservations(bp, true);
++
+       if (bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, false)) {
+               rc = -ENODEV;
+               goto resume_exit;
+-- 
+2.43.0
+
diff --git a/queue-6.6/bnxt_en-fix-hwtstamp_filter_all-packet-timestamp-log.patch b/queue-6.6/bnxt_en-fix-hwtstamp_filter_all-packet-timestamp-log.patch
new file mode 100644 (file)
index 0000000..a28dc85
--- /dev/null
@@ -0,0 +1,107 @@
+From 18bc07b865891ec6f3bd19753095f6b3ce6997b2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 16:16:58 -0800
+Subject: bnxt_en: Fix HWTSTAMP_FILTER_ALL packet timestamp logic
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit c13e268c0768659cdaae4bfe2fb24860bcc8ddb4 ]
+
+When the chip is configured to timestamp all receive packets, the
+timestamp in the RX completion is only valid if the metadata
+present flag is not set for packets received on the wire.  In
+addition, internal loopback packets will never have a valid timestamp
+and the timestamp field will always be zero.  We must exclude
+any 0 value in the timestamp field because there is no way to
+determine if it is a loopback packet or not.
+
+Add a new function bnxt_rx_ts_valid() to check for all timestamp
+valid conditions.
+
+Fixes: 66ed81dcedc6 ("bnxt_en: Enable packet timestamping for all RX packets")
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20231208001658.14230-5-michael.chan@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 20 +++++++++++++++++---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h |  8 +++++++-
+ 2 files changed, 24 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 4ce34a39bb5ee..f811d59fd71fd 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -1760,6 +1760,21 @@ static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
+       napi_gro_receive(&bnapi->napi, skb);
+ }
++static bool bnxt_rx_ts_valid(struct bnxt *bp, u32 flags,
++                           struct rx_cmp_ext *rxcmp1, u32 *cmpl_ts)
++{
++      u32 ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp);
++
++      if (BNXT_PTP_RX_TS_VALID(flags))
++              goto ts_valid;
++      if (!bp->ptp_all_rx_tstamp || !ts || !BNXT_ALL_RX_TS_VALID(flags))
++              return false;
++
++ts_valid:
++      *cmpl_ts = ts;
++      return true;
++}
++
+ /* returns the following:
+  * 1       - 1 packet successfully received
+  * 0       - successful TPA_START, packet not completed yet
+@@ -1785,6 +1800,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+       struct sk_buff *skb;
+       struct xdp_buff xdp;
+       u32 flags, misc;
++      u32 cmpl_ts;
+       void *data;
+       int rc = 0;
+@@ -2007,10 +2023,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+               }
+       }
+-      if (unlikely((flags & RX_CMP_FLAGS_ITYPES_MASK) ==
+-                   RX_CMP_FLAGS_ITYPE_PTP_W_TS) || bp->ptp_all_rx_tstamp) {
++      if (bnxt_rx_ts_valid(bp, flags, rxcmp1, &cmpl_ts)) {
+               if (bp->flags & BNXT_FLAG_CHIP_P5) {
+-                      u32 cmpl_ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp);
+                       u64 ns, ts;
+                       if (!bnxt_get_rx_ts_p5(bp, &ts, cmpl_ts)) {
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+index ea0f47eceea7c..0116f67593e3a 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -161,7 +161,7 @@ struct rx_cmp {
+       #define RX_CMP_FLAGS_ERROR                              (1 << 6)
+       #define RX_CMP_FLAGS_PLACEMENT                          (7 << 7)
+       #define RX_CMP_FLAGS_RSS_VALID                          (1 << 10)
+-      #define RX_CMP_FLAGS_UNUSED                             (1 << 11)
++      #define RX_CMP_FLAGS_PKT_METADATA_PRESENT               (1 << 11)
+        #define RX_CMP_FLAGS_ITYPES_SHIFT                       12
+        #define RX_CMP_FLAGS_ITYPES_MASK                        0xf000
+        #define RX_CMP_FLAGS_ITYPE_UNKNOWN                      (0 << 12)
+@@ -188,6 +188,12 @@ struct rx_cmp {
+       __le32 rx_cmp_rss_hash;
+ };
++#define BNXT_PTP_RX_TS_VALID(flags)                           \
++      (((flags) & RX_CMP_FLAGS_ITYPES_MASK) == RX_CMP_FLAGS_ITYPE_PTP_W_TS)
++
++#define BNXT_ALL_RX_TS_VALID(flags)                           \
++      !((flags) & RX_CMP_FLAGS_PKT_METADATA_PRESENT)
++
+ #define RX_CMP_HASH_VALID(rxcmp)                              \
+       ((rxcmp)->rx_cmp_len_flags_type & cpu_to_le32(RX_CMP_FLAGS_RSS_VALID))
+-- 
+2.43.0
+
diff --git a/queue-6.6/bnxt_en-fix-skb-recycling-logic-in-bnxt_deliver_skb.patch b/queue-6.6/bnxt_en-fix-skb-recycling-logic-in-bnxt_deliver_skb.patch
new file mode 100644 (file)
index 0000000..d2dc9a6
--- /dev/null
@@ -0,0 +1,50 @@
+From 61ad5d6d9e8bdc87c084bcf4b46e5ec5085551bc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 16:16:56 -0800
+Subject: bnxt_en: Fix skb recycling logic in bnxt_deliver_skb()
+
+From: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
+
+[ Upstream commit aded5d1feb08e48d544845d3594d70c4d5fe6e54 ]
+
+Receive SKBs can go through the VF-rep path or the normal path.
+skb_mark_for_recycle() is only called for the normal path.  Fix it
+to do it for both paths to fix possible stalled page pool shutdown
+errors.
+
+Fixes: 86b05508f775 ("bnxt_en: Use the unified RX page pool buffers for XDP and non-XDP")
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Reviewed-by: Vikas Gupta <vikas.gupta@broadcom.com>
+Signed-off-by: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20231208001658.14230-3-michael.chan@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 4d2296f201adb..9f52b943fedec 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -1749,13 +1749,14 @@ static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+ static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
+                            struct sk_buff *skb)
+ {
++      skb_mark_for_recycle(skb);
++
+       if (skb->dev != bp->dev) {
+               /* this packet belongs to a vf-rep */
+               bnxt_vf_rep_rx(bp, skb);
+               return;
+       }
+       skb_record_rx_queue(skb, bnapi->index);
+-      skb_mark_for_recycle(skb);
+       napi_gro_receive(&bnapi->napi, skb);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/bnxt_en-fix-wrong-return-value-check-in-bnxt_close_n.patch b/queue-6.6/bnxt_en-fix-wrong-return-value-check-in-bnxt_close_n.patch
new file mode 100644 (file)
index 0000000..876b1b7
--- /dev/null
@@ -0,0 +1,177 @@
+From 7f21997937850c004bef110e7374919546fb1245 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 16:16:57 -0800
+Subject: bnxt_en: Fix wrong return value check in bnxt_close_nic()
+
+From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+
+[ Upstream commit bd6781c18cb5b5e5d8c5873fa9a51668e89ec76e ]
+
+The wait_event_interruptible_timeout() function returns 0
+if the timeout elapsed, -ERESTARTSYS if it was interrupted
+by a signal, and the remaining jiffies otherwise if the
+condition evaluated to true before the timeout elapsed.
+
+Driver should have checked for zero return value instead of
+a positive value.
+
+MChan: Print a warning for -ERESTARTSYS.  The close operation
+will proceed anyway when wait_event_interruptible_timeout()
+returns for any reason.  Since we do the close no matter what,
+we should not return this error code to the caller.  Change
+bnxt_close_nic() to a void function and remove all error
+handling from some of the callers.
+
+Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Reviewed-by: Vikas Gupta <vikas.gupta@broadcom.com>
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20231208001658.14230-4-michael.chan@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 13 +++++++------
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  2 +-
+ .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 11 ++---------
+ .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 19 ++++---------------
+ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c |  5 ++---
+ 5 files changed, 16 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 9f52b943fedec..4ce34a39bb5ee 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -10704,10 +10704,8 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init,
+       bnxt_free_mem(bp, irq_re_init);
+ }
+-int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
++void bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
+ {
+-      int rc = 0;
+-
+       if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+               /* If we get here, it means firmware reset is in progress
+                * while we are trying to close.  We can safely proceed with
+@@ -10722,15 +10720,18 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
+ #ifdef CONFIG_BNXT_SRIOV
+       if (bp->sriov_cfg) {
++              int rc;
++
+               rc = wait_event_interruptible_timeout(bp->sriov_cfg_wait,
+                                                     !bp->sriov_cfg,
+                                                     BNXT_SRIOV_CFG_WAIT_TMO);
+-              if (rc)
+-                      netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n");
++              if (!rc)
++                      netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete, proceeding to close!\n");
++              else if (rc < 0)
++                      netdev_warn(bp->dev, "SRIOV config operation interrupted, proceeding to close!\n");
+       }
+ #endif
+       __bnxt_close_nic(bp, irq_re_init, link_re_init);
+-      return rc;
+ }
+ static int bnxt_close(struct net_device *dev)
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+index 84cbcfa61bc12..ea0f47eceea7c 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -2362,7 +2362,7 @@ int bnxt_open_nic(struct bnxt *, bool, bool);
+ int bnxt_half_open_nic(struct bnxt *bp);
+ void bnxt_half_close_nic(struct bnxt *bp);
+ void bnxt_reenable_sriov(struct bnxt *bp);
+-int bnxt_close_nic(struct bnxt *, bool, bool);
++void bnxt_close_nic(struct bnxt *, bool, bool);
+ void bnxt_get_ring_err_stats(struct bnxt *bp,
+                            struct bnxt_total_ring_err_stats *stats);
+ int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words,
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+index 8b3e7697390f7..9d39f194b260f 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+@@ -478,15 +478,8 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change,
+                       return -ENODEV;
+               }
+               bnxt_ulp_stop(bp);
+-              if (netif_running(bp->dev)) {
+-                      rc = bnxt_close_nic(bp, true, true);
+-                      if (rc) {
+-                              NL_SET_ERR_MSG_MOD(extack, "Failed to close");
+-                              dev_close(bp->dev);
+-                              rtnl_unlock();
+-                              break;
+-                      }
+-              }
++              if (netif_running(bp->dev))
++                      bnxt_close_nic(bp, true, true);
+               bnxt_vf_reps_free(bp);
+               rc = bnxt_hwrm_func_drv_unrgtr(bp);
+               if (rc) {
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+index 547247d98eba2..3c36dd8051485 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+@@ -164,9 +164,8 @@ static int bnxt_set_coalesce(struct net_device *dev,
+ reset_coalesce:
+       if (test_bit(BNXT_STATE_OPEN, &bp->state)) {
+               if (update_stats) {
+-                      rc = bnxt_close_nic(bp, true, false);
+-                      if (!rc)
+-                              rc = bnxt_open_nic(bp, true, false);
++                      bnxt_close_nic(bp, true, false);
++                      rc = bnxt_open_nic(bp, true, false);
+               } else {
+                       rc = bnxt_hwrm_set_coal(bp);
+               }
+@@ -955,12 +954,7 @@ static int bnxt_set_channels(struct net_device *dev,
+                        * before PF unload
+                        */
+               }
+-              rc = bnxt_close_nic(bp, true, false);
+-              if (rc) {
+-                      netdev_err(bp->dev, "Set channel failure rc :%x\n",
+-                                 rc);
+-                      return rc;
+-              }
++              bnxt_close_nic(bp, true, false);
+       }
+       if (sh) {
+@@ -3737,12 +3731,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
+               bnxt_run_fw_tests(bp, test_mask, &test_results);
+       } else {
+               bnxt_ulp_stop(bp);
+-              rc = bnxt_close_nic(bp, true, false);
+-              if (rc) {
+-                      etest->flags |= ETH_TEST_FL_FAILED;
+-                      bnxt_ulp_start(bp, rc);
+-                      return;
+-              }
++              bnxt_close_nic(bp, true, false);
+               bnxt_run_fw_tests(bp, test_mask, &test_results);
+               buf[BNXT_MACLPBK_TEST_IDX] = 1;
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+index f3886710e7787..6e3da3362bd61 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+@@ -521,9 +521,8 @@ static int bnxt_hwrm_ptp_cfg(struct bnxt *bp)
+       if (netif_running(bp->dev)) {
+               if (ptp->rx_filter == HWTSTAMP_FILTER_ALL) {
+-                      rc = bnxt_close_nic(bp, false, false);
+-                      if (!rc)
+-                              rc = bnxt_open_nic(bp, false, false);
++                      bnxt_close_nic(bp, false, false);
++                      rc = bnxt_open_nic(bp, false, false);
+               } else {
+                       bnxt_ptp_cfg_tstamp_filters(bp);
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.6/dpaa2-switch-do-not-ask-for-mdb-vlan-and-fdb-replay.patch b/queue-6.6/dpaa2-switch-do-not-ask-for-mdb-vlan-and-fdb-replay.patch
new file mode 100644 (file)
index 0000000..2305695
--- /dev/null
@@ -0,0 +1,85 @@
+From c30b8088f48735a81992234bda4bb49d7ef95d2b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Dec 2023 18:43:26 +0200
+Subject: dpaa2-switch: do not ask for MDB, VLAN and FDB replay
+
+From: Ioana Ciornei <ioana.ciornei@nxp.com>
+
+[ Upstream commit f24a49a375f65e8e75ee1b19d806f46dbaae57fd ]
+
+Starting with commit 4e51bf44a03a ("net: bridge: move the switchdev
+object replay helpers to "push" mode") the switchdev_bridge_port_offload()
+helper was extended with the intention to provide switchdev drivers easy
+access to object addition and deletion replays. This works by calling
+the replay helpers with non-NULL notifier blocks.
+
+In the same commit, the dpaa2-switch driver was updated so that it
+passes valid notifier blocks to the helper. At that moment, no
+regression was identified through testing.
+
+In the meantime, the blamed commit changed the behavior in terms of
+which ports get hit by the replay. Before this commit, only the initial
+port which identified itself as offloaded through
+switchdev_bridge_port_offload() got a replay of all port objects and
+FDBs. After this, the newly joining port will trigger a replay of
+objects on all bridge ports and on the bridge itself.
+
+This behavior leads to errors in dpaa2_switch_port_vlans_add() when a
+VLAN gets installed on the same interface multiple times.
+
+The intended mechanism to address this is to pass a non-NULL ctx to the
+switchdev_bridge_port_offload() helper and then check it against the
+port's private structure. But since the driver does not have any use for
+the replayed port objects and FDBs until it gains support for LAG
+offload, it's better to fix the issue by reverting the dpaa2-switch
+driver to not ask for replay. The pointers will be added back when we
+are prepared to ignore replays on unrelated ports.
+
+Fixes: b28d580e2939 ("net: bridge: switchdev: replay all VLAN groups")
+Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
+Link: https://lore.kernel.org/r/20231212164326.2753457-3-ioana.ciornei@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 11 ++---------
+ 1 file changed, 2 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+index 97d3151076d53..e01a246124ac6 100644
+--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+@@ -1998,9 +1998,6 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
+       return notifier_from_errno(err);
+ }
+-static struct notifier_block dpaa2_switch_port_switchdev_nb;
+-static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb;
+-
+ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
+                                        struct net_device *upper_dev,
+                                        struct netlink_ext_ack *extack)
+@@ -2043,9 +2040,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
+               goto err_egress_flood;
+       err = switchdev_bridge_port_offload(netdev, netdev, NULL,
+-                                          &dpaa2_switch_port_switchdev_nb,
+-                                          &dpaa2_switch_port_switchdev_blocking_nb,
+-                                          false, extack);
++                                          NULL, NULL, false, extack);
+       if (err)
+               goto err_switchdev_offload;
+@@ -2079,9 +2074,7 @@ static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, vo
+ static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev)
+ {
+-      switchdev_bridge_port_unoffload(netdev, NULL,
+-                                      &dpaa2_switch_port_switchdev_nb,
+-                                      &dpaa2_switch_port_switchdev_blocking_nb);
++      switchdev_bridge_port_unoffload(netdev, NULL, NULL, NULL);
+ }
+ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
+-- 
+2.43.0
+
diff --git a/queue-6.6/dpaa2-switch-fix-size-of-the-dma_unmap.patch b/queue-6.6/dpaa2-switch-fix-size-of-the-dma_unmap.patch
new file mode 100644 (file)
index 0000000..cf3ebf9
--- /dev/null
@@ -0,0 +1,50 @@
+From 1bfd84ec25ff0610435ca28aa1b92d2acafc0cb0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Dec 2023 18:43:25 +0200
+Subject: dpaa2-switch: fix size of the dma_unmap
+
+From: Ioana Ciornei <ioana.ciornei@nxp.com>
+
+[ Upstream commit 2aad7d4189a923b24efa8ea6ad09059882b1bfe4 ]
+
+The size of the DMA unmap was wrongly put as a sizeof of a pointer.
+Change the value of the DMA unmap to be the actual macro used for the
+allocation and the DMA map.
+
+Fixes: 1110318d83e8 ("dpaa2-switch: add tc flower hardware offload on ingress traffic")
+Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
+Link: https://lore.kernel.org/r/20231212164326.2753457-2-ioana.ciornei@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+index 4798fb7fe35d1..b6a534a3e0b12 100644
+--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+@@ -139,7 +139,8 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block,
+       err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle,
+                                filter_block->acl_id, acl_entry_cfg);
+-      dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff),
++      dma_unmap_single(dev, acl_entry_cfg->key_iova,
++                       DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE,
+                        DMA_TO_DEVICE);
+       if (err) {
+               dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err);
+@@ -181,8 +182,8 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block,
+       err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle,
+                                   block->acl_id, acl_entry_cfg);
+-      dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff),
+-                       DMA_TO_DEVICE);
++      dma_unmap_single(dev, acl_entry_cfg->key_iova,
++                       DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, DMA_TO_DEVICE);
+       if (err) {
+               dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err);
+               kfree(cmd_buff);
+-- 
+2.43.0
+
diff --git a/queue-6.6/iavf-fix-iavf_shutdown-to-call-iavf_remove-instead-i.patch b/queue-6.6/iavf-fix-iavf_shutdown-to-call-iavf_remove-instead-i.patch
new file mode 100644 (file)
index 0000000..42febe1
--- /dev/null
@@ -0,0 +1,194 @@
+From e6ac4c8fe2b678239cbc8ea4989a66022823516c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Nov 2023 10:35:26 -0500
+Subject: iavf: Fix iavf_shutdown to call iavf_remove instead iavf_close
+
+From: Slawomir Laba <slawomirx.laba@intel.com>
+
+[ Upstream commit 7ae42ef308ed0f6250b36f43e4eeb182ebbe6215 ]
+
+Make the flow for pci shutdown be the same to the pci remove.
+
+iavf_shutdown was implementing an incomplete version
+of iavf_remove. It misses several calls to the kernel like
+iavf_free_misc_irq, iavf_reset_interrupt_capability, iounmap
+that might break the system on reboot or hibernation.
+
+Implement the call of iavf_remove directly in iavf_shutdown to
+close this gap.
+
+Fixes below error messages (dmesg) during shutdown stress tests -
+[685814.900917] ice 0000:88:00.0: MAC 02:d0:5f:82:43:5d does not exist for
+ VF 0
+[685814.900928] ice 0000:88:00.0: MAC 33:33:00:00:00:01 does not exist for
+VF 0
+
+Reproduction:
+
+1. Create one VF interface:
+echo 1 > /sys/class/net/<interface_name>/device/sriov_numvfs
+
+2. Run live dmesg on the host:
+dmesg -wH
+
+3. On SUT, script below steps into vf_namespace_assignment.sh
+
+<#!/bin/sh> // Remove <>. Git removes # line
+if=<VF name> (edit this per VF name)
+loop=0
+
+while true; do
+
+echo test round $loop
+let loop++
+
+ip netns add ns$loop
+ip link set dev $if up
+ip link set dev $if netns ns$loop
+ip netns exec ns$loop ip link set dev $if up
+ip netns exec ns$loop ip link set dev $if netns 1
+ip netns delete ns$loop
+
+done
+
+4. Run the script for at least 1000 iterations on SUT:
+./vf_namespace_assignment.sh
+
+Expected result:
+No errors in dmesg.
+
+Fixes: 129cf89e5856 ("iavf: rename functions and structs to new name")
+Signed-off-by: Slawomir Laba <slawomirx.laba@intel.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Reviewed-by: Ahmed Zaki <ahmed.zaki@intel.com>
+Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
+Co-developed-by: Ranganatha Rao <ranganatha.rao@intel.com>
+Signed-off-by: Ranganatha Rao <ranganatha.rao@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 72 ++++++---------------
+ 1 file changed, 21 insertions(+), 51 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index af8eb27a3615c..257865647c865 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -277,27 +277,6 @@ void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem)
+       kfree(mem->va);
+ }
+-/**
+- * iavf_lock_timeout - try to lock mutex but give up after timeout
+- * @lock: mutex that should be locked
+- * @msecs: timeout in msecs
+- *
+- * Returns 0 on success, negative on failure
+- **/
+-static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+-{
+-      unsigned int wait, delay = 10;
+-
+-      for (wait = 0; wait < msecs; wait += delay) {
+-              if (mutex_trylock(lock))
+-                      return 0;
+-
+-              msleep(delay);
+-      }
+-
+-      return -1;
+-}
+-
+ /**
+  * iavf_schedule_reset - Set the flags and schedule a reset event
+  * @adapter: board private structure
+@@ -4925,34 +4904,6 @@ int iavf_process_config(struct iavf_adapter *adapter)
+       return 0;
+ }
+-/**
+- * iavf_shutdown - Shutdown the device in preparation for a reboot
+- * @pdev: pci device structure
+- **/
+-static void iavf_shutdown(struct pci_dev *pdev)
+-{
+-      struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev);
+-      struct net_device *netdev = adapter->netdev;
+-
+-      netif_device_detach(netdev);
+-
+-      if (netif_running(netdev))
+-              iavf_close(netdev);
+-
+-      if (iavf_lock_timeout(&adapter->crit_lock, 5000))
+-              dev_warn(&adapter->pdev->dev, "%s: failed to acquire crit_lock\n", __func__);
+-      /* Prevent the watchdog from running. */
+-      iavf_change_state(adapter, __IAVF_REMOVE);
+-      adapter->aq_required = 0;
+-      mutex_unlock(&adapter->crit_lock);
+-
+-#ifdef CONFIG_PM
+-      pci_save_state(pdev);
+-
+-#endif
+-      pci_disable_device(pdev);
+-}
+-
+ /**
+  * iavf_probe - Device Initialization Routine
+  * @pdev: PCI device information struct
+@@ -5166,17 +5117,22 @@ static int __maybe_unused iavf_resume(struct device *dev_d)
+  **/
+ static void iavf_remove(struct pci_dev *pdev)
+ {
+-      struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev);
+       struct iavf_fdir_fltr *fdir, *fdirtmp;
+       struct iavf_vlan_filter *vlf, *vlftmp;
+       struct iavf_cloud_filter *cf, *cftmp;
+       struct iavf_adv_rss *rss, *rsstmp;
+       struct iavf_mac_filter *f, *ftmp;
++      struct iavf_adapter *adapter;
+       struct net_device *netdev;
+       struct iavf_hw *hw;
+       int err;
+-      netdev = adapter->netdev;
++      /* Don't proceed with remove if netdev is already freed */
++      netdev = pci_get_drvdata(pdev);
++      if (!netdev)
++              return;
++
++      adapter = iavf_pdev_to_adapter(pdev);
+       hw = &adapter->hw;
+       if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+@@ -5304,11 +5260,25 @@ static void iavf_remove(struct pci_dev *pdev)
+       destroy_workqueue(adapter->wq);
++      pci_set_drvdata(pdev, NULL);
++
+       free_netdev(netdev);
+       pci_disable_device(pdev);
+ }
++/**
++ * iavf_shutdown - Shutdown the device in preparation for a reboot
++ * @pdev: pci device structure
++ **/
++static void iavf_shutdown(struct pci_dev *pdev)
++{
++      iavf_remove(pdev);
++
++      if (system_state == SYSTEM_POWER_OFF)
++              pci_set_power_state(pdev, PCI_D3hot);
++}
++
+ static SIMPLE_DEV_PM_OPS(iavf_pm_ops, iavf_suspend, iavf_resume);
+ static struct pci_driver iavf_driver = {
+-- 
+2.43.0
+
diff --git a/queue-6.6/iavf-handle-ntuple-on-off-based-on-new-state-machine.patch b/queue-6.6/iavf-handle-ntuple-on-off-based-on-new-state-machine.patch
new file mode 100644 (file)
index 0000000..5d964e3
--- /dev/null
@@ -0,0 +1,131 @@
+From f44ca0576e3444e64bc36f53018eb3b5c3f03943 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Nov 2023 22:47:16 -0500
+Subject: iavf: Handle ntuple on/off based on new state machines for flow
+ director
+
+From: Piotr Gardocki <piotrx.gardocki@intel.com>
+
+[ Upstream commit 09d23b8918f9ab0f8114f6b94f2faf8bde3fb52a ]
+
+ntuple-filter feature on/off:
+Default is on. If turned off, the filters will be removed from both
+PF and iavf list. The removal is irrespective of current filter state.
+
+Steps to reproduce:
+-------------------
+
+1. Ensure ntuple is on.
+
+ethtool -K enp8s0 ntuple-filters on
+
+2. Create a filter to receive the traffic into non-default rx-queue like 15
+and ensure traffic is flowing into queue into 15.
+Now, turn off ntuple. Traffic should not flow to configured queue 15.
+It should flow to default RX queue.
+
+Fixes: 0dbfbabb840d ("iavf: Add framework to enable ethtool ntuple filters")
+Signed-off-by: Piotr Gardocki <piotrx.gardocki@intel.com>
+Reviewed-by: Larysa Zaremba <larysa.zaremba@intel.com>
+Signed-off-by: Ranganatha Rao <ranganatha.rao@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 59 +++++++++++++++++++++
+ 1 file changed, 59 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 5158addc0aa96..af8eb27a3615c 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -4409,6 +4409,49 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+       return ret;
+ }
++/**
++ * iavf_disable_fdir - disable Flow Director and clear existing filters
++ * @adapter: board private structure
++ **/
++static void iavf_disable_fdir(struct iavf_adapter *adapter)
++{
++      struct iavf_fdir_fltr *fdir, *fdirtmp;
++      bool del_filters = false;
++
++      adapter->flags &= ~IAVF_FLAG_FDIR_ENABLED;
++
++      /* remove all Flow Director filters */
++      spin_lock_bh(&adapter->fdir_fltr_lock);
++      list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head,
++                               list) {
++              if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST ||
++                  fdir->state == IAVF_FDIR_FLTR_INACTIVE) {
++                      /* Delete filters not registered in PF */
++                      list_del(&fdir->list);
++                      kfree(fdir);
++                      adapter->fdir_active_fltr--;
++              } else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING ||
++                         fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST ||
++                         fdir->state == IAVF_FDIR_FLTR_ACTIVE) {
++                      /* Filters registered in PF, schedule their deletion */
++                      fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST;
++                      del_filters = true;
++              } else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) {
++                      /* Request to delete filter already sent to PF, change
++                       * state to DEL_PENDING to delete filter after PF's
++                       * response, not set as INACTIVE
++                       */
++                      fdir->state = IAVF_FDIR_FLTR_DEL_PENDING;
++              }
++      }
++      spin_unlock_bh(&adapter->fdir_fltr_lock);
++
++      if (del_filters) {
++              adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
++              mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
++      }
++}
++
+ #define NETIF_VLAN_OFFLOAD_FEATURES   (NETIF_F_HW_VLAN_CTAG_RX | \
+                                        NETIF_F_HW_VLAN_CTAG_TX | \
+                                        NETIF_F_HW_VLAN_STAG_RX | \
+@@ -4431,6 +4474,13 @@ static int iavf_set_features(struct net_device *netdev,
+               iavf_set_vlan_offload_features(adapter, netdev->features,
+                                              features);
++      if ((netdev->features & NETIF_F_NTUPLE) ^ (features & NETIF_F_NTUPLE)) {
++              if (features & NETIF_F_NTUPLE)
++                      adapter->flags |= IAVF_FLAG_FDIR_ENABLED;
++              else
++                      iavf_disable_fdir(adapter);
++      }
++
+       return 0;
+ }
+@@ -4726,6 +4776,9 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev,
+ {
+       struct iavf_adapter *adapter = netdev_priv(netdev);
++      if (!FDIR_FLTR_SUPPORT(adapter))
++              features &= ~NETIF_F_NTUPLE;
++
+       return iavf_fix_netdev_vlan_features(adapter, features);
+ }
+@@ -4843,6 +4896,12 @@ int iavf_process_config(struct iavf_adapter *adapter)
+       if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+               netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
++      if (FDIR_FLTR_SUPPORT(adapter)) {
++              netdev->hw_features |= NETIF_F_NTUPLE;
++              netdev->features |= NETIF_F_NTUPLE;
++              adapter->flags |= IAVF_FLAG_FDIR_ENABLED;
++      }
++
+       netdev->priv_flags |= IFF_UNICAST_FLT;
+       /* Do not turn on offloads when they are requested to be turned off.
+-- 
+2.43.0
+
diff --git a/queue-6.6/iavf-introduce-new-state-machines-for-flow-director.patch b/queue-6.6/iavf-introduce-new-state-machines-for-flow-director.patch
new file mode 100644 (file)
index 0000000..b1f6ba6
--- /dev/null
@@ -0,0 +1,407 @@
+From 0c159fba190bb21cb6fb097192dd70017172d8c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Nov 2023 22:47:15 -0500
+Subject: iavf: Introduce new state machines for flow director
+
+From: Piotr Gardocki <piotrx.gardocki@intel.com>
+
+[ Upstream commit 3a0b5a2929fdeda63fc921c2dbed237059acf732 ]
+
+New states introduced:
+
+ IAVF_FDIR_FLTR_DIS_REQUEST
+ IAVF_FDIR_FLTR_DIS_PENDING
+ IAVF_FDIR_FLTR_INACTIVE
+
+Current FDIR state machines (SM) are not adequate to handle a few
+scenarios in the link DOWN/UP event, reset event and ntuple-feature.
+
+For example, when VF link goes DOWN and comes back UP administratively,
+the expectation is that previously installed filters should also be
+restored. But with current SM, filters are not restored.
+So with new SM, during link DOWN filters are marked as INACTIVE in
+the iavf list but removed from PF. After link UP, SM will transition
+from INACTIVE to ADD_REQUEST to restore the filter.
+
+Similarly, with VF reset, filters will be removed from the PF, but
+marked as INACTIVE in the iavf list. Filters will be restored after
+reset completion.
+
+Steps to reproduce:
+-------------------
+
+1. Create a VF. Here VF is enp8s0.
+
+2. Assign IP addresses to VF and link partner and ping continuously
+from remote. Here remote IP is 1.1.1.1.
+
+3. Check default RX Queue of traffic.
+
+ethtool -S enp8s0 | grep -E "rx-[[:digit:]]+\.packets"
+
+4. Add filter - change default RX Queue (to 15 here)
+
+ethtool -U ens8s0 flow-type ip4 src-ip 1.1.1.1 action 15 loc 5
+
+5. Ensure filter gets added and traffic is received on RX queue 15 now.
+
+Link event testing:
+-------------------
+6. Bring VF link down and up. If traffic flows to configured queue 15,
+test is success, otherwise it is a failure.
+
+Reset event testing:
+--------------------
+7. Reset the VF. If traffic flows to configured queue 15, test is success,
+otherwise it is a failure.
+
+Fixes: 0dbfbabb840d ("iavf: Add framework to enable ethtool ntuple filters")
+Signed-off-by: Piotr Gardocki <piotrx.gardocki@intel.com>
+Reviewed-by: Larysa Zaremba <larysa.zaremba@intel.com>
+Signed-off-by: Ranganatha Rao <ranganatha.rao@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h        |  1 +
+ .../net/ethernet/intel/iavf/iavf_ethtool.c    | 27 ++++---
+ drivers/net/ethernet/intel/iavf/iavf_fdir.h   | 15 +++-
+ drivers/net/ethernet/intel/iavf/iavf_main.c   | 48 ++++++++++---
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c   | 71 +++++++++++++++++--
+ 5 files changed, 139 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index d8d7b62ceb24e..431d9d62c8c66 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -303,6 +303,7 @@ struct iavf_adapter {
+ #define IAVF_FLAG_QUEUES_DISABLED             BIT(17)
+ #define IAVF_FLAG_SETUP_NETDEV_FEATURES               BIT(18)
+ #define IAVF_FLAG_REINIT_MSIX_NEEDED          BIT(20)
++#define IAVF_FLAG_FDIR_ENABLED                        BIT(21)
+ /* duplicates for common code */
+ #define IAVF_FLAG_DCB_ENABLED                 0
+       /* flags for admin queue service task */
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 1b412754aa422..892c6a4f03bb8 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -1063,7 +1063,7 @@ iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter,
+       struct iavf_fdir_fltr *rule = NULL;
+       int ret = 0;
+-      if (!FDIR_FLTR_SUPPORT(adapter))
++      if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+               return -EOPNOTSUPP;
+       spin_lock_bh(&adapter->fdir_fltr_lock);
+@@ -1205,7 +1205,7 @@ iavf_get_fdir_fltr_ids(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd,
+       unsigned int cnt = 0;
+       int val = 0;
+-      if (!FDIR_FLTR_SUPPORT(adapter))
++      if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+               return -EOPNOTSUPP;
+       cmd->data = IAVF_MAX_FDIR_FILTERS;
+@@ -1397,7 +1397,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
+       int count = 50;
+       int err;
+-      if (!FDIR_FLTR_SUPPORT(adapter))
++      if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+               return -EOPNOTSUPP;
+       if (fsp->flow_type & FLOW_MAC_EXT)
+@@ -1438,12 +1438,16 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
+       spin_lock_bh(&adapter->fdir_fltr_lock);
+       iavf_fdir_list_add_fltr(adapter, fltr);
+       adapter->fdir_active_fltr++;
+-      fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST;
+-      adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
++      if (adapter->link_up) {
++              fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST;
++              adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
++      } else {
++              fltr->state = IAVF_FDIR_FLTR_INACTIVE;
++      }
+       spin_unlock_bh(&adapter->fdir_fltr_lock);
+-      mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+-
++      if (adapter->link_up)
++              mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+ ret:
+       if (err && fltr)
+               kfree(fltr);
+@@ -1465,7 +1469,7 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
+       struct iavf_fdir_fltr *fltr = NULL;
+       int err = 0;
+-      if (!FDIR_FLTR_SUPPORT(adapter))
++      if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+               return -EOPNOTSUPP;
+       spin_lock_bh(&adapter->fdir_fltr_lock);
+@@ -1474,6 +1478,11 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
+               if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) {
+                       fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST;
+                       adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
++              } else if (fltr->state == IAVF_FDIR_FLTR_INACTIVE) {
++                      list_del(&fltr->list);
++                      kfree(fltr);
++                      adapter->fdir_active_fltr--;
++                      fltr = NULL;
+               } else {
+                       err = -EBUSY;
+               }
+@@ -1782,7 +1791,7 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+               ret = 0;
+               break;
+       case ETHTOOL_GRXCLSRLCNT:
+-              if (!FDIR_FLTR_SUPPORT(adapter))
++              if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+                       break;
+               spin_lock_bh(&adapter->fdir_fltr_lock);
+               cmd->rule_cnt = adapter->fdir_active_fltr;
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+index 9eb9f73f6adf3..d31bd923ba8cb 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h
++++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+@@ -6,12 +6,25 @@
+ struct iavf_adapter;
+-/* State of Flow Director filter */
++/* State of Flow Director filter
++ *
++ * *_REQUEST states are used to mark filter to be sent to PF driver to perform
++ * an action (either add or delete filter). *_PENDING states are an indication
++ * that request was sent to PF and the driver is waiting for response.
++ *
++ * Both DELETE and DISABLE states are being used to delete a filter in PF.
++ * The difference is that after a successful response filter in DEL_PENDING
++ * state is being deleted from VF driver as well and filter in DIS_PENDING state
++ * is being changed to INACTIVE state.
++ */
+ enum iavf_fdir_fltr_state_t {
+       IAVF_FDIR_FLTR_ADD_REQUEST,     /* User requests to add filter */
+       IAVF_FDIR_FLTR_ADD_PENDING,     /* Filter pending add by the PF */
+       IAVF_FDIR_FLTR_DEL_REQUEST,     /* User requests to delete filter */
+       IAVF_FDIR_FLTR_DEL_PENDING,     /* Filter pending delete by the PF */
++      IAVF_FDIR_FLTR_DIS_REQUEST,     /* Filter scheduled to be disabled */
++      IAVF_FDIR_FLTR_DIS_PENDING,     /* Filter pending disable by the PF */
++      IAVF_FDIR_FLTR_INACTIVE,        /* Filter inactive on link down */
+       IAVF_FDIR_FLTR_ACTIVE,          /* Filter is active */
+ };
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 68783a7b70962..5158addc0aa96 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1356,18 +1356,20 @@ static void iavf_clear_cloud_filters(struct iavf_adapter *adapter)
+  **/
+ static void iavf_clear_fdir_filters(struct iavf_adapter *adapter)
+ {
+-      struct iavf_fdir_fltr *fdir, *fdirtmp;
++      struct iavf_fdir_fltr *fdir;
+       /* remove all Flow Director filters */
+       spin_lock_bh(&adapter->fdir_fltr_lock);
+-      list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head,
+-                               list) {
++      list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+               if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) {
+-                      list_del(&fdir->list);
+-                      kfree(fdir);
+-                      adapter->fdir_active_fltr--;
+-              } else {
+-                      fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST;
++                      /* Cancel a request, keep filter as inactive */
++                      fdir->state = IAVF_FDIR_FLTR_INACTIVE;
++              } else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING ||
++                       fdir->state == IAVF_FDIR_FLTR_ACTIVE) {
++                      /* Disable filters which are active or have a pending
++                       * request to PF to be added
++                       */
++                      fdir->state = IAVF_FDIR_FLTR_DIS_REQUEST;
+               }
+       }
+       spin_unlock_bh(&adapter->fdir_fltr_lock);
+@@ -4174,6 +4176,33 @@ static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+       }
+ }
++/**
++ * iavf_restore_fdir_filters
++ * @adapter: board private structure
++ *
++ * Restore existing FDIR filters when VF netdev comes back up.
++ **/
++static void iavf_restore_fdir_filters(struct iavf_adapter *adapter)
++{
++      struct iavf_fdir_fltr *f;
++
++      spin_lock_bh(&adapter->fdir_fltr_lock);
++      list_for_each_entry(f, &adapter->fdir_list_head, list) {
++              if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST) {
++                      /* Cancel a request, keep filter as active */
++                      f->state = IAVF_FDIR_FLTR_ACTIVE;
++              } else if (f->state == IAVF_FDIR_FLTR_DIS_PENDING ||
++                         f->state == IAVF_FDIR_FLTR_INACTIVE) {
++                      /* Add filters which are inactive or have a pending
++                       * request to PF to be deleted
++                       */
++                      f->state = IAVF_FDIR_FLTR_ADD_REQUEST;
++                      adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
++              }
++      }
++      spin_unlock_bh(&adapter->fdir_fltr_lock);
++}
++
+ /**
+  * iavf_open - Called when a network interface is made active
+  * @netdev: network interface device structure
+@@ -4241,8 +4270,9 @@ static int iavf_open(struct net_device *netdev)
+       spin_unlock_bh(&adapter->mac_vlan_list_lock);
+-      /* Restore VLAN filters that were removed with IFF_DOWN */
++      /* Restore filters that were removed with IFF_DOWN */
+       iavf_restore_filters(adapter);
++      iavf_restore_fdir_filters(adapter);
+       iavf_configure(adapter);
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 0b97b424e487a..b95a4f903204b 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -1738,8 +1738,8 @@ void iavf_add_fdir_filter(struct iavf_adapter *adapter)
+  **/
+ void iavf_del_fdir_filter(struct iavf_adapter *adapter)
+ {
++      struct virtchnl_fdir_del f = {};
+       struct iavf_fdir_fltr *fdir;
+-      struct virtchnl_fdir_del f;
+       bool process_fltr = false;
+       int len;
+@@ -1756,11 +1756,16 @@ void iavf_del_fdir_filter(struct iavf_adapter *adapter)
+       list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+               if (fdir->state == IAVF_FDIR_FLTR_DEL_REQUEST) {
+                       process_fltr = true;
+-                      memset(&f, 0, len);
+                       f.vsi_id = fdir->vc_add_msg.vsi_id;
+                       f.flow_id = fdir->flow_id;
+                       fdir->state = IAVF_FDIR_FLTR_DEL_PENDING;
+                       break;
++              } else if (fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST) {
++                      process_fltr = true;
++                      f.vsi_id = fdir->vc_add_msg.vsi_id;
++                      f.flow_id = fdir->flow_id;
++                      fdir->state = IAVF_FDIR_FLTR_DIS_PENDING;
++                      break;
+               }
+       }
+       spin_unlock_bh(&adapter->fdir_fltr_lock);
+@@ -1904,6 +1909,48 @@ static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev,
+               netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+ }
++/**
++ * iavf_activate_fdir_filters - Reactivate all FDIR filters after a reset
++ * @adapter: private adapter structure
++ *
++ * Called after a reset to re-add all FDIR filters and delete some of them
++ * if they were pending to be deleted.
++ */
++static void iavf_activate_fdir_filters(struct iavf_adapter *adapter)
++{
++      struct iavf_fdir_fltr *f, *ftmp;
++      bool add_filters = false;
++
++      spin_lock_bh(&adapter->fdir_fltr_lock);
++      list_for_each_entry_safe(f, ftmp, &adapter->fdir_list_head, list) {
++              if (f->state == IAVF_FDIR_FLTR_ADD_REQUEST ||
++                  f->state == IAVF_FDIR_FLTR_ADD_PENDING ||
++                  f->state == IAVF_FDIR_FLTR_ACTIVE) {
++                      /* All filters and requests have been removed in PF,
++                       * restore them
++                       */
++                      f->state = IAVF_FDIR_FLTR_ADD_REQUEST;
++                      add_filters = true;
++              } else if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST ||
++                         f->state == IAVF_FDIR_FLTR_DIS_PENDING) {
++                      /* Link down state, leave filters as inactive */
++                      f->state = IAVF_FDIR_FLTR_INACTIVE;
++              } else if (f->state == IAVF_FDIR_FLTR_DEL_REQUEST ||
++                         f->state == IAVF_FDIR_FLTR_DEL_PENDING) {
++                      /* Delete filters that were pending to be deleted, the
++                       * list on PF is already cleared after a reset
++                       */
++                      list_del(&f->list);
++                      kfree(f);
++                      adapter->fdir_active_fltr--;
++              }
++      }
++      spin_unlock_bh(&adapter->fdir_fltr_lock);
++
++      if (add_filters)
++              adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
++}
++
+ /**
+  * iavf_virtchnl_completion
+  * @adapter: adapter structure
+@@ -2081,7 +2128,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+                       spin_lock_bh(&adapter->fdir_fltr_lock);
+                       list_for_each_entry(fdir, &adapter->fdir_list_head,
+                                           list) {
+-                              if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) {
++                              if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING ||
++                                  fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) {
+                                       fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+                                       dev_info(&adapter->pdev->dev, "Failed to del Flow Director filter, error %s\n",
+                                                iavf_stat_str(&adapter->hw,
+@@ -2217,6 +2265,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+               spin_unlock_bh(&adapter->mac_vlan_list_lock);
++              iavf_activate_fdir_filters(adapter);
++
+               iavf_parse_vf_resource_msg(adapter);
+               /* negotiated VIRTCHNL_VF_OFFLOAD_VLAN_V2, so wait for the
+@@ -2406,7 +2456,9 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+               list_for_each_entry_safe(fdir, fdir_tmp, &adapter->fdir_list_head,
+                                        list) {
+                       if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) {
+-                              if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS) {
++                              if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS ||
++                                  del_fltr->status ==
++                                  VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) {
+                                       dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n",
+                                                fdir->loc);
+                                       list_del(&fdir->list);
+@@ -2418,6 +2470,17 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+                                                del_fltr->status);
+                                       iavf_print_fdir_fltr(adapter, fdir);
+                               }
++                      } else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) {
++                              if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS ||
++                                  del_fltr->status ==
++                                  VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) {
++                                      fdir->state = IAVF_FDIR_FLTR_INACTIVE;
++                              } else {
++                                      fdir->state = IAVF_FDIR_FLTR_ACTIVE;
++                                      dev_info(&adapter->pdev->dev, "Failed to disable Flow Director filter with status: %d\n",
++                                               del_fltr->status);
++                                      iavf_print_fdir_fltr(adapter, fdir);
++                              }
+                       }
+               }
+               spin_unlock_bh(&adapter->fdir_fltr_lock);
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-atlantic-fix-double-free-in-ring-reinit-logic.patch b/queue-6.6/net-atlantic-fix-double-free-in-ring-reinit-logic.patch
new file mode 100644 (file)
index 0000000..17cb646
--- /dev/null
@@ -0,0 +1,56 @@
+From 4f3dae3bb5f56d9b4b5633e1fcfc8a15ab421e92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Dec 2023 10:40:44 +0100
+Subject: net: atlantic: fix double free in ring reinit logic
+
+From: Igor Russkikh <irusskikh@marvell.com>
+
+[ Upstream commit 7bb26ea74aa86fdf894b7dbd8c5712c5b4187da7 ]
+
+Driver has a logic leak in ring data allocation/free,
+where double free may happen in aq_ring_free if system is under
+stress and driver init/deinit is happening.
+
+The probability is higher to get this during suspend/resume cycle.
+
+Verification was done simulating same conditions with
+
+    stress -m 2000 --vm-bytes 20M --vm-hang 10 --backoff 1000
+    while true; do sudo ifconfig enp1s0 down; sudo ifconfig enp1s0 up; done
+
+Fixed by explicitly clearing pointers to NULL on deallocation
+
+Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code")
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Closes: https://lore.kernel.org/netdev/CAHk-=wiZZi7FcvqVSUirHBjx0bBUZ4dFrMDVLc3+3HCrtq0rBA@mail.gmail.com/
+Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
+Link: https://lore.kernel.org/r/20231213094044.22988-1-irusskikh@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+index 694daeaf3e615..e1885c1eb100a 100644
+--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+@@ -938,11 +938,14 @@ void aq_ring_free(struct aq_ring_s *self)
+               return;
+       kfree(self->buff_ring);
++      self->buff_ring = NULL;
+-      if (self->dx_ring)
++      if (self->dx_ring) {
+               dma_free_coherent(aq_nic_get_dev(self->aq_nic),
+                                 self->size * self->dx_size, self->dx_ring,
+                                 self->dx_ring_pa);
++              self->dx_ring = NULL;
++      }
+ }
+ unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data)
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-destroy-correct-number-of-xdp-queues-upon-fa.patch b/queue-6.6/net-ena-destroy-correct-number-of-xdp-queues-upon-fa.patch
new file mode 100644 (file)
index 0000000..d442666
--- /dev/null
@@ -0,0 +1,71 @@
+From bd11d3397b5d9cb4c4b875b63888b4694131af4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Dec 2023 06:27:58 +0000
+Subject: net: ena: Destroy correct number of xdp queues upon failure
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 41db6f99b5489a0d2ef26afe816ef0c6118d1d47 ]
+
+The ena_setup_and_create_all_xdp_queues() function freed all the
+resources upon failure, after creating only xdp_num_queues queues,
+instead of freeing just the created ones.
+
+In this patch, the only resources that are freed, are the ones
+allocated right before the failure occurs.
+
+Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action")
+Signed-off-by: Shahar Itzko <itzko@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Link: https://lore.kernel.org/r/20231211062801.27891-2-darinzon@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index f955bde10cf90..098025d292473 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -74,6 +74,8 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+                             struct ena_tx_buffer *tx_info);
+ static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+                                           int first_index, int count);
++static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
++                                                int first_index, int count);
+ /* Increase a stat by cnt while holding syncp seqlock on 32bit machines */
+ static void ena_increase_stat(u64 *statp, u64 cnt,
+@@ -457,23 +459,22 @@ static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
+ static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
+ {
++      u32 xdp_first_ring = adapter->xdp_first_ring;
++      u32 xdp_num_queues = adapter->xdp_num_queues;
+       int rc = 0;
+-      rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring,
+-                                           adapter->xdp_num_queues);
++      rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
+       if (rc)
+               goto setup_err;
+-      rc = ena_create_io_tx_queues_in_range(adapter,
+-                                            adapter->xdp_first_ring,
+-                                            adapter->xdp_num_queues);
++      rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues);
+       if (rc)
+               goto create_err;
+       return 0;
+ create_err:
+-      ena_free_all_io_tx_resources(adapter);
++      ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
+ setup_err:
+       return rc;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-fix-dma-syncing-in-xdp-path-when-swiotlb-is-.patch b/queue-6.6/net-ena-fix-dma-syncing-in-xdp-path-when-swiotlb-is-.patch
new file mode 100644 (file)
index 0000000..b4595fe
--- /dev/null
@@ -0,0 +1,182 @@
+From 3b700c0d483a0dc614921470db3818deb43f1fca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Dec 2023 06:28:00 +0000
+Subject: net: ena: Fix DMA syncing in XDP path when SWIOTLB is on
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit d760117060cf2e90b5c59c5492cab179a4dbce01 ]
+
+This patch fixes two issues:
+
+Issue 1
+-------
+Description
+```````````
+Current code does not call dma_sync_single_for_cpu() to sync data from
+the device side memory to the CPU side memory before the XDP code path
+uses the CPU side data.
+This causes the XDP code path to read the unset garbage data in the CPU
+side memory, resulting in incorrect handling of the packet by XDP.
+
+Solution
+````````
+1. Add a call to dma_sync_single_for_cpu() before the XDP code starts to
+   use the data in the CPU side memory.
+2. The XDP code verdict can be XDP_PASS, in which case there is a
+   fallback to the non-XDP code, which also calls
+   dma_sync_single_for_cpu().
+   To avoid calling dma_sync_single_for_cpu() twice:
+2.1. Put the dma_sync_single_for_cpu() in the code in such a place where
+     it happens before XDP and non-XDP code.
+2.2. Remove the calls to dma_sync_single_for_cpu() in the non-XDP code
+     for the first buffer only (rx_copybreak and non-rx_copybreak
+     cases), since the new call that was added covers these cases.
+     The call to dma_sync_single_for_cpu() for the second buffer and on
+     stays because only the first buffer is handled by the newly added
+     dma_sync_single_for_cpu(). And there is no need for special
+     handling of the second buffer and on for the XDP path since
+     currently the driver supports only single buffer packets.
+
+Issue 2
+-------
+Description
+```````````
+In case the XDP code forwarded the packet (ENA_XDP_FORWARDED),
+ena_unmap_rx_buff_attrs() is called with attrs set to 0.
+This means that before unmapping the buffer, the internal function
+dma_unmap_page_attrs() will also call dma_sync_single_for_cpu() on
+the whole buffer (not only on the data part of it).
+This sync is both wasteful (since a sync was already explicitly
+called before) and also causes a bug, which will be explained
+using the below diagram.
+
+The following diagram shows the flow of events causing the bug.
+The order of events is (1)-(4) as shown in the diagram.
+
+CPU side memory area
+
+     (3)convert_to_xdp_frame() initializes the
+        headroom with xdpf metadata
+                      ||
+                      \/
+          ___________________________________
+         |                                   |
+ 0       |                                   V                       4K
+ ---------------------------------------------------------------------
+ | xdpf->data      | other xdpf       |   < data >   | tailroom ||...|
+ |                 | fields           |              | GARBAGE  ||   |
+ ---------------------------------------------------------------------
+
+                   /\                        /\
+                   ||                        ||
+   (4)ena_unmap_rx_buff_attrs() calls     (2)dma_sync_single_for_cpu()
+      dma_sync_single_for_cpu() on the       copies data from device
+      whole buffer page, overwriting         side to CPU side memory
+      the xdpf->data with GARBAGE.           ||
+ 0                                                                   4K
+ ---------------------------------------------------------------------
+ | headroom                           |   < data >   | tailroom ||...|
+ | GARBAGE                            |              | GARBAGE  ||   |
+ ---------------------------------------------------------------------
+
+Device side memory area                      /\
+                                             ||
+                               (1) device writes RX packet data
+
+After the call to ena_unmap_rx_buff_attrs() in (4), the xdpf->data
+becomes corrupted, and so when it is later accessed in
+ena_clean_xdp_irq()->xdp_return_frame(), it causes a page fault,
+crashing the kernel.
+
+Solution
+````````
+Explicitly tell ena_unmap_rx_buff_attrs() not to call
+dma_sync_single_for_cpu() by passing it the ENA_DMA_ATTR_SKIP_CPU_SYNC
+flag.
+
+Fixes: f7d625adeb7b ("net: ena: Add dynamic recycling mechanism for rx buffers")
+Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Link: https://lore.kernel.org/r/20231211062801.27891-4-darinzon@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 23 ++++++++------------
+ 1 file changed, 9 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index b638e1d3d151a..14e41eb57731b 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1493,11 +1493,6 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
+               if (unlikely(!skb))
+                       return NULL;
+-              /* sync this buffer for CPU use */
+-              dma_sync_single_for_cpu(rx_ring->dev,
+-                                      dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
+-                                      len,
+-                                      DMA_FROM_DEVICE);
+               skb_copy_to_linear_data(skb, buf_addr + buf_offset, len);
+               dma_sync_single_for_device(rx_ring->dev,
+                                          dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
+@@ -1516,17 +1511,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
+       buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
+-      pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr);
+-
+       /* If XDP isn't loaded try to reuse part of the RX buffer */
+       reuse_rx_buf_page = !is_xdp_loaded &&
+                           ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
+-      dma_sync_single_for_cpu(rx_ring->dev,
+-                              pre_reuse_paddr + pkt_offset,
+-                              len,
+-                              DMA_FROM_DEVICE);
+-
+       if (!reuse_rx_buf_page)
+               ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
+@@ -1723,6 +1711,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+       int xdp_flags = 0;
+       int total_len = 0;
+       int xdp_verdict;
++      u8 pkt_offset;
+       int rc = 0;
+       int i;
+@@ -1749,13 +1738,19 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+               /* First descriptor might have an offset set by the device */
+               rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
+-              rx_info->buf_offset += ena_rx_ctx.pkt_offset;
++              pkt_offset = ena_rx_ctx.pkt_offset;
++              rx_info->buf_offset += pkt_offset;
+               netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+                         "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
+                         rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
+                         ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
++              dma_sync_single_for_cpu(rx_ring->dev,
++                                      dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
++                                      rx_ring->ena_bufs[0].len,
++                                      DMA_FROM_DEVICE);
++
+               if (ena_xdp_present_ring(rx_ring))
+                       xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs);
+@@ -1781,7 +1776,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+                               if (xdp_verdict & ENA_XDP_FORWARDED) {
+                                       ena_unmap_rx_buff_attrs(rx_ring,
+                                                               &rx_ring->rx_buffer_info[req_id],
+-                                                              0);
++                                                              DMA_ATTR_SKIP_CPU_SYNC);
+                                       rx_ring->rx_buffer_info[req_id].page = NULL;
+                               }
+                       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-fix-xdp-drops-handling-due-to-multibuf-packe.patch b/queue-6.6/net-ena-fix-xdp-drops-handling-due-to-multibuf-packe.patch
new file mode 100644 (file)
index 0000000..28d60dd
--- /dev/null
@@ -0,0 +1,77 @@
+From 730de72d013cdd9bdb59d5ebf56bc6d6e2b3809c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Dec 2023 06:27:59 +0000
+Subject: net: ena: Fix xdp drops handling due to multibuf packets
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 505b1a88d311ff6f8c44a34f94e3be21745cce6f ]
+
+Current xdp code drops packets larger than ENA_XDP_MAX_MTU.
+This is an incorrect condition since the problem is not the
+size of the packet, rather the number of buffers it contains.
+
+This commit:
+
+1. Identifies and drops XDP multi-buffer packets at the
+   beginning of the function.
+2. Increases the xdp drop statistic when this drop occurs.
+3. Adds a one-time print that such drops are happening to
+   give better indication to the user.
+
+Fixes: 838c93dc5449 ("net: ena: implement XDP drop support")
+Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Link: https://lore.kernel.org/r/20231211062801.27891-3-darinzon@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+index 098025d292473..b638e1d3d151a 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1672,20 +1672,23 @@ static void ena_set_rx_hash(struct ena_ring *rx_ring,
+       }
+ }
+-static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
++static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u16 num_descs)
+ {
+       struct ena_rx_buffer *rx_info;
+       int ret;
++      /* XDP multi-buffer packets not supported */
++      if (unlikely(num_descs > 1)) {
++              netdev_err_once(rx_ring->adapter->netdev,
++                              "xdp: dropped unsupported multi-buffer packets\n");
++              ena_increase_stat(&rx_ring->rx_stats.xdp_drop, 1, &rx_ring->syncp);
++              return ENA_XDP_DROP;
++      }
++
+       rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
+       xdp_prepare_buff(xdp, page_address(rx_info->page),
+                        rx_info->buf_offset,
+                        rx_ring->ena_bufs[0].len, false);
+-      /* If for some reason we received a bigger packet than
+-       * we expect, then we simply drop it
+-       */
+-      if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
+-              return ENA_XDP_DROP;
+       ret = ena_xdp_execute(rx_ring, xdp);
+@@ -1754,7 +1757,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+                         ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
+               if (ena_xdp_present_ring(rx_ring))
+-                      xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
++                      xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs);
+               /* allocate skb and fill it */
+               if (xdp_verdict == ENA_XDP_PASS)
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ena-fix-xdp-redirection-error.patch b/queue-6.6/net-ena-fix-xdp-redirection-error.patch
new file mode 100644 (file)
index 0000000..354ac06
--- /dev/null
@@ -0,0 +1,43 @@
+From 0ea404ba02c610ccbb7f0ee1b9167a5bad3e7a9f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Dec 2023 06:28:01 +0000
+Subject: net: ena: Fix XDP redirection error
+
+From: David Arinzon <darinzon@amazon.com>
+
+[ Upstream commit 4ab138ca0a340e6d6e7a6a9bd5004bd8f83127ca ]
+
+When sending TX packets, the meta descriptor can be all zeroes
+as no meta information is required (as in XDP).
+
+This patch removes the validity check, as when
+`disable_meta_caching` is enabled, such TX packets will be
+dropped otherwise.
+
+Fixes: 0e3a3f6dacf0 ("net: ena: support new LLQ acceleration mode")
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Signed-off-by: David Arinzon <darinzon@amazon.com>
+Link: https://lore.kernel.org/r/20231211062801.27891-5-darinzon@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_eth_com.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+index 3d6f0a466a9ed..f9f886289b970 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
++++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+@@ -328,9 +328,6 @@ static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+        * compare it to the stored version, just create the meta
+        */
+       if (io_sq->disable_meta_caching) {
+-              if (unlikely(!ena_tx_ctx->meta_valid))
+-                      return -EINVAL;
+-
+               *have_meta = true;
+               return ena_com_create_meta(io_sq, ena_meta);
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-fec-correct-queue-selection.patch b/queue-6.6/net-fec-correct-queue-selection.patch
new file mode 100644 (file)
index 0000000..9cb226d
--- /dev/null
@@ -0,0 +1,81 @@
+From 582e358abf71b6163037847c9ab5e765b85d8971 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 16:38:01 +0800
+Subject: net: fec: correct queue selection
+
+From: Radu Bulie <radu-andrei.bulie@nxp.com>
+
+[ Upstream commit 9fc95fe95c3e2a63ced8eeca4b256518ab204b63 ]
+
+The old implementation extracted VLAN TCI info from the payload
+before the VLAN tag has been pushed in the payload.
+
+Another problem was that the VLAN TCI was extracted even if the
+packet did not have VLAN protocol header.
+
+This resulted in invalid VLAN TCI and as a consequence a random
+queue was computed.
+
+This patch fixes the above issues and use the VLAN TCI from the
+skb if it is present or VLAN TCI from payload if present. If no
+VLAN header is present queue 0 is selected.
+
+Fixes: 52c4a1a85f4b ("net: fec: add ndo_select_queue to fix TX bandwidth fluctuations")
+Signed-off-by: Radu Bulie <radu-andrei.bulie@nxp.com>
+Signed-off-by: Wei Fang <wei.fang@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c | 27 +++++++++--------------
+ 1 file changed, 11 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index 77c8e9cfb4456..35c95f07fd6d7 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -3710,31 +3710,26 @@ static int fec_set_features(struct net_device *netdev,
+       return 0;
+ }
+-static u16 fec_enet_get_raw_vlan_tci(struct sk_buff *skb)
+-{
+-      struct vlan_ethhdr *vhdr;
+-      unsigned short vlan_TCI = 0;
+-
+-      if (skb->protocol == htons(ETH_P_ALL)) {
+-              vhdr = (struct vlan_ethhdr *)(skb->data);
+-              vlan_TCI = ntohs(vhdr->h_vlan_TCI);
+-      }
+-
+-      return vlan_TCI;
+-}
+-
+ static u16 fec_enet_select_queue(struct net_device *ndev, struct sk_buff *skb,
+                                struct net_device *sb_dev)
+ {
+       struct fec_enet_private *fep = netdev_priv(ndev);
+-      u16 vlan_tag;
++      u16 vlan_tag = 0;
+       if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
+               return netdev_pick_tx(ndev, skb, NULL);
+-      vlan_tag = fec_enet_get_raw_vlan_tci(skb);
+-      if (!vlan_tag)
++      /* VLAN is present in the payload.*/
++      if (eth_type_vlan(skb->protocol)) {
++              struct vlan_ethhdr *vhdr = skb_vlan_eth_hdr(skb);
++
++              vlan_tag = ntohs(vhdr->h_vlan_TCI);
++      /*  VLAN is present in the skb but not yet pushed in the payload.*/
++      } else if (skb_vlan_tag_present(skb)) {
++              vlan_tag = skb->vlan_tci;
++      } else {
+               return vlan_tag;
++      }
+       return fec_enet_vlan_pri_to_queue[vlan_tag >> 13];
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-ipv6-support-reporting-otherwise-unknown-prefix-.patch b/queue-6.6/net-ipv6-support-reporting-otherwise-unknown-prefix-.patch
new file mode 100644 (file)
index 0000000..8e8fc12
--- /dev/null
@@ -0,0 +1,114 @@
+From 224c00961ad5e46c6de114ac0a8a267608c0d875 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Dec 2023 09:36:12 -0800
+Subject: net: ipv6: support reporting otherwise unknown prefix flags in
+ RTM_NEWPREFIX
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Å»enczykowski <maze@google.com>
+
+[ Upstream commit bd4a816752bab609dd6d65ae021387beb9e2ddbd ]
+
+Lorenzo points out that we effectively clear all unknown
+flags from PIO when copying them to userspace in the netlink
+RTM_NEWPREFIX notification.
+
+We could fix this one at a time as new flags are defined,
+or in one fell swoop - I choose the latter.
+
+We could either define 6 new reserved flags (reserved1..6) and handle
+them individually (and rename them as new flags are defined), or we
+could simply copy the entire unmodified byte over - I choose the latter.
+
+This unfortunately requires some anonymous union/struct magic,
+so we add a static assert on the struct size for a little extra safety.
+
+Cc: David Ahern <dsahern@kernel.org>
+Cc: Lorenzo Colitti <lorenzo@google.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Maciej Å»enczykowski <maze@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/addrconf.h | 12 ++++++++++--
+ include/net/if_inet6.h |  4 ----
+ net/ipv6/addrconf.c    |  6 +-----
+ 3 files changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/include/net/addrconf.h b/include/net/addrconf.h
+index 82da55101b5a3..61ebe723ee4d5 100644
+--- a/include/net/addrconf.h
++++ b/include/net/addrconf.h
+@@ -31,17 +31,22 @@ struct prefix_info {
+       __u8                    length;
+       __u8                    prefix_len;
++      union __packed {
++              __u8            flags;
++              struct __packed {
+ #if defined(__BIG_ENDIAN_BITFIELD)
+-      __u8                    onlink : 1,
++                      __u8    onlink : 1,
+                               autoconf : 1,
+                               reserved : 6;
+ #elif defined(__LITTLE_ENDIAN_BITFIELD)
+-      __u8                    reserved : 6,
++                      __u8    reserved : 6,
+                               autoconf : 1,
+                               onlink : 1;
+ #else
+ #error "Please fix <asm/byteorder.h>"
+ #endif
++              };
++      };
+       __be32                  valid;
+       __be32                  prefered;
+       __be32                  reserved2;
+@@ -49,6 +54,9 @@ struct prefix_info {
+       struct in6_addr         prefix;
+ };
++/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */
++static_assert(sizeof(struct prefix_info) == 32);
++
+ #include <linux/ipv6.h>
+ #include <linux/netdevice.h>
+ #include <net/if_inet6.h>
+diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
+index c8490729b4aea..31bf475eca762 100644
+--- a/include/net/if_inet6.h
++++ b/include/net/if_inet6.h
+@@ -22,10 +22,6 @@
+ #define IF_RS_SENT    0x10
+ #define IF_READY      0x80000000
+-/* prefix flags */
+-#define IF_PREFIX_ONLINK      0x01
+-#define IF_PREFIX_AUTOCONF    0x02
+-
+ enum {
+       INET6_IFADDR_STATE_PREDAD,
+       INET6_IFADDR_STATE_DAD,
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index 0b6ee962c84e2..b007d098ffe2e 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -6137,11 +6137,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
+       pmsg->prefix_len = pinfo->prefix_len;
+       pmsg->prefix_type = pinfo->type;
+       pmsg->prefix_pad3 = 0;
+-      pmsg->prefix_flags = 0;
+-      if (pinfo->onlink)
+-              pmsg->prefix_flags |= IF_PREFIX_ONLINK;
+-      if (pinfo->autoconf)
+-              pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
++      pmsg->prefix_flags = pinfo->flags;
+       if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix))
+               goto nla_put_failure;
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5-fix-a-null-vs-is_err-check.patch b/queue-6.6/net-mlx5-fix-a-null-vs-is_err-check.patch
new file mode 100644 (file)
index 0000000..0de6e38
--- /dev/null
@@ -0,0 +1,37 @@
+From dfd0684487af5bfaa34f573a541c82daec834b17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Nov 2023 09:36:20 +0300
+Subject: net/mlx5: Fix a NULL vs IS_ERR() check
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit ca4ef28d0ad831d2521fa2b16952f37fd9324ca3 ]
+
+The mlx5_esw_offloads_devlink_port() function returns error pointers, not
+NULL.
+
+Fixes: 7bef147a6ab6 ("net/mlx5: Don't skip vport check")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+index 825f9c687633f..007cb167cabc9 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+@@ -1503,7 +1503,7 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+       dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch,
+                                                rpriv->rep->vport);
+-      if (dl_port) {
++      if (!IS_ERR(dl_port)) {
+               SET_NETDEV_DEVLINK_PORT(netdev, dl_port);
+               mlx5e_rep_vnic_reporter_create(priv, dl_port);
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5-nack-sync-reset-request-when-hotplug-is-ena.patch b/queue-6.6/net-mlx5-nack-sync-reset-request-when-hotplug-is-ena.patch
new file mode 100644 (file)
index 0000000..acf0852
--- /dev/null
@@ -0,0 +1,73 @@
+From fabdb2abbb4c7d9a92c80d62340aa1aa66b3425f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Aug 2023 13:11:32 +0300
+Subject: net/mlx5: Nack sync reset request when HotPlug is enabled
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+[ Upstream commit 3d7a3f2612d75de5f371a681038b089ded6667eb ]
+
+Current sync reset flow is not supported when PCIe bridge connected
+directly to mlx5 device has HotPlug interrupt enabled and can be
+triggered on link state change event. Return nack on reset request in
+such case.
+
+Fixes: 92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now")
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlx5/core/fw_reset.c    | 29 +++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+index b568988e92e3e..c4e19d627da21 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+@@ -325,6 +325,29 @@ static void mlx5_fw_live_patch_event(struct work_struct *work)
+               mlx5_core_err(dev, "Failed to reload FW tracer\n");
+ }
++#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)
++static int mlx5_check_hotplug_interrupt(struct mlx5_core_dev *dev)
++{
++      struct pci_dev *bridge = dev->pdev->bus->self;
++      u16 reg16;
++      int err;
++
++      if (!bridge)
++              return -EOPNOTSUPP;
++
++      err = pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, &reg16);
++      if (err)
++              return err;
++
++      if ((reg16 & PCI_EXP_SLTCTL_HPIE) && (reg16 & PCI_EXP_SLTCTL_DLLSCE)) {
++              mlx5_core_warn(dev, "FW reset is not supported as HotPlug is enabled\n");
++              return -EOPNOTSUPP;
++      }
++
++      return 0;
++}
++#endif
++
+ static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id)
+ {
+       struct pci_bus *bridge_bus = dev->pdev->bus;
+@@ -357,6 +380,12 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev)
+               return false;
+       }
++#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)
++      err = mlx5_check_hotplug_interrupt(dev);
++      if (err)
++              return false;
++#endif
++
+       err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+       if (err)
+               return false;
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-check-netdev-pointer-before-checking-its-n.patch b/queue-6.6/net-mlx5e-check-netdev-pointer-before-checking-its-n.patch
new file mode 100644 (file)
index 0000000..680b1a0
--- /dev/null
@@ -0,0 +1,59 @@
+From 2312b6ae40d5f24501806cad35104b9ed9a8ce17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 05:47:09 +0300
+Subject: net/mlx5e: Check netdev pointer before checking its net ns
+
+From: Gavin Li <gavinl@nvidia.com>
+
+[ Upstream commit 7aaf975238c47b710fcc4eca0da1e7902a53abe2 ]
+
+Previously, when comparing the net namespaces, the case where the netdev
+doesn't exist wasn't taken into account, and therefore can cause a crash.
+In such a case, the comparing function should return false, as there is no
+netdev->net to compare the devlink->net to.
+
+Furthermore, this will result in an attempt to enter switchdev mode
+without a netdev to fail, and which is the desired result as there is no
+meaning in switchdev mode without a net device.
+
+Fixes: 662404b24a4c ("net/mlx5e: Block entering switchdev mode with ns inconsistency")
+Signed-off-by: Gavin Li <gavinl@nvidia.com>
+Reviewed-by: Gavi Teitz <gavi@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/eswitch_offloads.c        | 16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index bf78eeca401be..bb8bcb448ae90 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -3653,14 +3653,18 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
+ static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink)
+ {
++      struct mlx5_core_dev *dev = devlink_priv(devlink);
+       struct net *devl_net, *netdev_net;
+-      struct mlx5_eswitch *esw;
+-
+-      esw = mlx5_devlink_eswitch_nocheck_get(devlink);
+-      netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev);
+-      devl_net = devlink_net(devlink);
++      bool ret = false;
+-      return net_eq(devl_net, netdev_net);
++      mutex_lock(&dev->mlx5e_res.uplink_netdev_lock);
++      if (dev->mlx5e_res.uplink_netdev) {
++              netdev_net = dev_net(dev->mlx5e_res.uplink_netdev);
++              devl_net = devlink_net(devlink);
++              ret = net_eq(devl_net, netdev_net);
++      }
++      mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock);
++      return ret;
+ }
+ int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev)
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-check-the-number-of-elements-before-walk-t.patch b/queue-6.6/net-mlx5e-check-the-number-of-elements-before-walk-t.patch
new file mode 100644 (file)
index 0000000..3516564
--- /dev/null
@@ -0,0 +1,43 @@
+From 54a4bf1895dd8835e3563dd06381c3e54887ef4f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Oct 2023 02:00:44 +0000
+Subject: net/mlx5e: Check the number of elements before walk TC rhashtable
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit 4e25b661f484df54b6751b65f9ea2434a3b67539 ]
+
+After IPSec TX tables are destroyed, the flow rules in TC rhashtable,
+which have the destination to IPSec, are restored to the original
+one, the uplink.
+
+However, when the device is in switchdev mode and unload driver with
+IPSec rules configured, TC rhashtable cleanup is done before IPSec
+cleanup, which means tc_ht->tbl is already freed when walking TC
+rhashtable, in order to restore the destination. So add the checking
+before walking to avoid unexpected behavior.
+
+Fixes: d1569537a837 ("net/mlx5e: Modify and restore TC rules for IPSec TX rules")
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
+index 13b5916b64e22..d5d33c3b3aa2a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
+@@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev)
+       xa_for_each(&esw->offloads.vport_reps, i, rep) {
+               rpriv = rep->rep_data[REP_ETH].priv;
+-              if (!rpriv || !rpriv->netdev)
++              if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems))
+                       continue;
+               rhashtable_walk_enter(&rpriv->tc_ht, &iter);
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-disable-ipsec-offload-support-if-not-fw-st.patch b/queue-6.6/net-mlx5e-disable-ipsec-offload-support-if-not-fw-st.patch
new file mode 100644 (file)
index 0000000..203b4ac
--- /dev/null
@@ -0,0 +1,120 @@
+From 74a6e56e6d64f1bafa2c717ba3fb16030bb0201c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Oct 2023 15:44:47 +0200
+Subject: net/mlx5e: Disable IPsec offload support if not FW steering
+
+From: Chris Mi <cmi@nvidia.com>
+
+[ Upstream commit 762a55a54eec4217e4cec9265ab6e5d4c11b61bd ]
+
+IPsec FDB offload can only work with FW steering as of now,
+disable the cap upon non FW steering.
+
+And since the IPSec cap is dynamic now based on steering mode.
+Cleanup the resources if they exist instead of checking the
+IPsec cap again.
+
+Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en_accel/ipsec.c       | 26 ++++++++-----------
+ .../mlx5/core/en_accel/ipsec_offload.c        |  8 +++++-
+ 2 files changed, 18 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+index 0d4b8aef6adda..5834e47e72d82 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+@@ -929,9 +929,11 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+               return;
+       mlx5e_accel_ipsec_fs_cleanup(ipsec);
+-      if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
++      if (ipsec->netevent_nb.notifier_call) {
+               unregister_netevent_notifier(&ipsec->netevent_nb);
+-      if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
++              ipsec->netevent_nb.notifier_call = NULL;
++      }
++      if (ipsec->aso)
+               mlx5e_ipsec_aso_cleanup(ipsec);
+       destroy_workqueue(ipsec->wq);
+       kfree(ipsec);
+@@ -1040,6 +1042,12 @@ static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
+               }
+       }
++      if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET &&
++          !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
++              NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
++              return -EINVAL;
++      }
++
+       return 0;
+ }
+@@ -1135,14 +1143,6 @@ static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
+       .xdo_dev_state_free     = mlx5e_xfrm_free_state,
+       .xdo_dev_offload_ok     = mlx5e_ipsec_offload_ok,
+       .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
+-};
+-
+-static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = {
+-      .xdo_dev_state_add      = mlx5e_xfrm_add_state,
+-      .xdo_dev_state_delete   = mlx5e_xfrm_del_state,
+-      .xdo_dev_state_free     = mlx5e_xfrm_free_state,
+-      .xdo_dev_offload_ok     = mlx5e_ipsec_offload_ok,
+-      .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
+       .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft,
+       .xdo_dev_policy_add = mlx5e_xfrm_add_policy,
+@@ -1160,11 +1160,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+       mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
+-      if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
+-              netdev->xfrmdev_ops = &mlx5e_ipsec_packet_xfrmdev_ops;
+-      else
+-              netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
+-
++      netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
+       netdev->features |= NETIF_F_HW_ESP;
+       netdev->hw_enc_features |= NETIF_F_HW_ESP;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+index 55b11d8cba532..ce29e31721208 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+@@ -5,6 +5,8 @@
+ #include "en.h"
+ #include "ipsec.h"
+ #include "lib/crypto.h"
++#include "fs_core.h"
++#include "eswitch.h"
+ enum {
+       MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET,
+@@ -37,7 +39,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
+           MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp))
+               caps |= MLX5_IPSEC_CAP_CRYPTO;
+-      if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload)) {
++      if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) &&
++          (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS ||
++           (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS &&
++           is_mdev_legacy_mode(mdev)))) {
+               if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
+                                             reformat_add_esp_trasport) &&
+                   MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+@@ -558,6 +563,7 @@ void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec)
+       dma_unmap_single(pdev, aso->dma_addr, sizeof(aso->ctx),
+                        DMA_BIDIRECTIONAL);
+       kfree(aso);
++      ipsec->aso = NULL;
+ }
+ static void mlx5e_ipsec_aso_copy(struct mlx5_wqe_aso_ctrl_seg *ctrl,
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-ensure-that-ipsec-sequence-packet-number-s.patch b/queue-6.6/net-mlx5e-ensure-that-ipsec-sequence-packet-number-s.patch
new file mode 100644 (file)
index 0000000..9b39e13
--- /dev/null
@@ -0,0 +1,49 @@
+From be504b9847c84e1e28fddeda96043f5831198060 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Sep 2023 10:07:13 +0300
+Subject: net/mlx5e: Ensure that IPsec sequence packet number starts from 1
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit 3d42c8cc67a8fcbff0181f9ed6d03d353edcee07 ]
+
+According to RFC4303, section "3.3.3. Sequence Number Generation",
+the first packet sent using a given SA will contain a sequence
+number of 1.
+
+However if user didn't set seq/oseq, the HW used zero as first sequence
+packet number. Such misconfiguration causes to drop of first packet
+if replay window protection was enabled in SA.
+
+To fix it, set sequence number to be at least 1.
+
+Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers")
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+index 65678e89aea62..0d4b8aef6adda 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+@@ -121,7 +121,14 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
+       if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
+               esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
+-      sa_entry->esn_state.esn = esn;
++      if (sa_entry->esn_state.esn_msb)
++              sa_entry->esn_state.esn = esn;
++      else
++              /* According to RFC4303, section "3.3.3. Sequence Number Generation",
++               * the first packet sent using a given SA will contain a sequence
++               * number of 1.
++               */
++              sa_entry->esn_state.esn = max_t(u32, esn, 1);
+       sa_entry->esn_state.esn_msb = esn_msb;
+       if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-fix-possible-deadlock-on-mlx5e_tx_timeout_.patch b/queue-6.6/net-mlx5e-fix-possible-deadlock-on-mlx5e_tx_timeout_.patch
new file mode 100644 (file)
index 0000000..8eeb977
--- /dev/null
@@ -0,0 +1,243 @@
+From d511e13f216a152d7726e44e4a1a7f40baf85ca5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Sep 2022 18:45:11 +0300
+Subject: net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+[ Upstream commit eab0da38912ebdad922ed0388209f7eb0a5163cd ]
+
+Due to the cited patch, devlink health commands take devlink lock and
+this may result in deadlock for mlx5e_tx_reporter as it takes local
+state_lock before calling devlink health report and on the other hand
+devlink health commands such as diagnose for same reporter take local
+state_lock after taking devlink lock (see kernel log below).
+
+To fix it, remove local state_lock from mlx5e_tx_timeout_work() before
+calling devlink_health_report() and take care to cancel the work before
+any call to close channels, which may free the SQs that should be
+handled by the work. Before cancel_work_sync(), use current_work() to
+check we are not calling it from within the work, as
+mlx5e_tx_timeout_work() itself may close the channels and reopen as part
+of recovery flow.
+
+While removing state_lock from mlx5e_tx_timeout_work() keep rtnl_lock to
+ensure no change in netdev->real_num_tx_queues, but use rtnl_trylock()
+and a flag to avoid deadlock by calling cancel_work_sync() before
+closing the channels while holding rtnl_lock too.
+
+Kernel log:
+======================================================
+WARNING: possible circular locking dependency detected
+6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Not tainted
+------------------------------------------------------
+kworker/u16:2/65 is trying to acquire lock:
+ffff888122f6c2f8 (&devlink->lock_key#2){+.+.}-{3:3}, at: devlink_health_report+0x2f1/0x7e0
+
+but task is already holding lock:
+ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core]
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #1 (&priv->state_lock){+.+.}-{3:3}:
+       __mutex_lock+0x12c/0x14b0
+       mlx5e_rx_reporter_diagnose+0x71/0x700 [mlx5_core]
+       devlink_nl_cmd_health_reporter_diagnose_doit+0x212/0xa50
+       genl_family_rcv_msg_doit+0x1e9/0x2f0
+       genl_rcv_msg+0x2e9/0x530
+       netlink_rcv_skb+0x11d/0x340
+       genl_rcv+0x24/0x40
+       netlink_unicast+0x438/0x710
+       netlink_sendmsg+0x788/0xc40
+       sock_sendmsg+0xb0/0xe0
+       __sys_sendto+0x1c1/0x290
+       __x64_sys_sendto+0xdd/0x1b0
+       do_syscall_64+0x3d/0x90
+       entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+-> #0 (&devlink->lock_key#2){+.+.}-{3:3}:
+       __lock_acquire+0x2c8a/0x6200
+       lock_acquire+0x1c1/0x550
+       __mutex_lock+0x12c/0x14b0
+       devlink_health_report+0x2f1/0x7e0
+       mlx5e_health_report+0xc9/0xd7 [mlx5_core]
+       mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core]
+       mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core]
+       process_one_work+0x7c2/0x1340
+       worker_thread+0x59d/0xec0
+       kthread+0x28f/0x330
+       ret_from_fork+0x1f/0x30
+
+other info that might help us debug this:
+
+ Possible unsafe locking scenario:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(&priv->state_lock);
+                               lock(&devlink->lock_key#2);
+                               lock(&priv->state_lock);
+  lock(&devlink->lock_key#2);
+
+ *** DEADLOCK ***
+
+4 locks held by kworker/u16:2/65:
+ #0: ffff88811a55b138 ((wq_completion)mlx5e#2){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340
+ #1: ffff888101de7db8 ((work_completion)(&priv->tx_timeout_work)){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340
+ #2: ffffffff84ce8328 (rtnl_mutex){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x53/0x280 [mlx5_core]
+ #3: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core]
+
+stack backtrace:
+CPU: 1 PID: 65 Comm: kworker/u16:2 Not tainted 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core]
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x57/0x7d
+ check_noncircular+0x278/0x300
+ ? print_circular_bug+0x460/0x460
+ ? find_held_lock+0x2d/0x110
+ ? __stack_depot_save+0x24c/0x520
+ ? alloc_chain_hlocks+0x228/0x700
+ __lock_acquire+0x2c8a/0x6200
+ ? register_lock_class+0x1860/0x1860
+ ? kasan_save_stack+0x1e/0x40
+ ? kasan_set_free_info+0x20/0x30
+ ? ____kasan_slab_free+0x11d/0x1b0
+ ? kfree+0x1ba/0x520
+ ? devlink_health_do_dump.part.0+0x171/0x3a0
+ ? devlink_health_report+0x3d5/0x7e0
+ lock_acquire+0x1c1/0x550
+ ? devlink_health_report+0x2f1/0x7e0
+ ? lockdep_hardirqs_on_prepare+0x400/0x400
+ ? find_held_lock+0x2d/0x110
+ __mutex_lock+0x12c/0x14b0
+ ? devlink_health_report+0x2f1/0x7e0
+ ? devlink_health_report+0x2f1/0x7e0
+ ? mutex_lock_io_nested+0x1320/0x1320
+ ? trace_hardirqs_on+0x2d/0x100
+ ? bit_wait_io_timeout+0x170/0x170
+ ? devlink_health_do_dump.part.0+0x171/0x3a0
+ ? kfree+0x1ba/0x520
+ ? devlink_health_do_dump.part.0+0x171/0x3a0
+ devlink_health_report+0x2f1/0x7e0
+ mlx5e_health_report+0xc9/0xd7 [mlx5_core]
+ mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core]
+ ? lockdep_hardirqs_on_prepare+0x400/0x400
+ ? mlx5e_reporter_tx_err_cqe+0x1b0/0x1b0 [mlx5_core]
+ ? mlx5e_tx_reporter_timeout_dump+0x70/0x70 [mlx5_core]
+ ? mlx5e_tx_reporter_dump_sq+0x320/0x320 [mlx5_core]
+ ? mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core]
+ ? mutex_lock_io_nested+0x1320/0x1320
+ ? process_one_work+0x70f/0x1340
+ ? lockdep_hardirqs_on_prepare+0x400/0x400
+ ? lock_downgrade+0x6e0/0x6e0
+ mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core]
+ process_one_work+0x7c2/0x1340
+ ? lockdep_hardirqs_on_prepare+0x400/0x400
+ ? pwq_dec_nr_in_flight+0x230/0x230
+ ? rwlock_bug.part.0+0x90/0x90
+ worker_thread+0x59d/0xec0
+ ? process_one_work+0x1340/0x1340
+ kthread+0x28f/0x330
+ ? kthread_complete_and_exit+0x20/0x20
+ ret_from_fork+0x1f/0x30
+ </TASK>
+
+Fixes: c90005b5f75c ("devlink: Hold the instance lock in health callbacks")
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
+ .../net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++++++++++++++++---
+ 2 files changed, 25 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+index 86f2690c5e015..20a6bc1a234f4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -818,6 +818,7 @@ enum {
+       MLX5E_STATE_DESTROYING,
+       MLX5E_STATE_XDP_TX_ENABLED,
+       MLX5E_STATE_XDP_ACTIVE,
++      MLX5E_STATE_CHANNELS_ACTIVE,
+ };
+ struct mlx5e_modify_sq_param {
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index acb40770cf0cf..c3961c2bbc57c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -2668,6 +2668,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
+ {
+       int i;
++      ASSERT_RTNL();
+       if (chs->ptp) {
+               mlx5e_ptp_close(chs->ptp);
+               chs->ptp = NULL;
+@@ -2945,17 +2946,29 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
+       if (mlx5e_is_vport_rep(priv))
+               mlx5e_rep_activate_channels(priv);
++      set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state);
++
+       mlx5e_wait_channels_min_rx_wqes(&priv->channels);
+       if (priv->rx_res)
+               mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
+ }
++static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv)
++{
++      WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state));
++      if (current_work() != &priv->tx_timeout_work)
++              cancel_work_sync(&priv->tx_timeout_work);
++}
++
+ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
+ {
+       if (priv->rx_res)
+               mlx5e_rx_res_channels_deactivate(priv->rx_res);
++      clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state);
++      mlx5e_cancel_tx_timeout_work(priv);
++
+       if (mlx5e_is_vport_rep(priv))
+               mlx5e_rep_deactivate_channels(priv);
+@@ -4734,8 +4747,17 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
+       struct net_device *netdev = priv->netdev;
+       int i;
+-      rtnl_lock();
+-      mutex_lock(&priv->state_lock);
++      /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues
++       * through this flow. However, channel closing flows have to wait for
++       * this work to finish while holding rtnl lock too. So either get the
++       * lock or find that channels are being closed for other reason and
++       * this work is not relevant anymore.
++       */
++      while (!rtnl_trylock()) {
++              if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state))
++                      return;
++              msleep(20);
++      }
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+               goto unlock;
+@@ -4754,7 +4776,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
+       }
+ unlock:
+-      mutex_unlock(&priv->state_lock);
+       rtnl_unlock();
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-honor-user-choice-of-ipsec-replay-window-s.patch b/queue-6.6/net-mlx5e-honor-user-choice-of-ipsec-replay-window-s.patch
new file mode 100644 (file)
index 0000000..104eedd
--- /dev/null
@@ -0,0 +1,89 @@
+From c93e2a63f590732fff03704c9588e975b17a5255 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Aug 2023 20:58:56 +0300
+Subject: net/mlx5e: Honor user choice of IPsec replay window size
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit a5e400a985df8041ed4659ed1462aa9134318130 ]
+
+Users can configure IPsec replay window size, but mlx5 driver didn't
+honor their choice and set always 32bits. Fix assignment logic to
+configure right size from the beginning.
+
+Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers")
+Reviewed-by: Patrisious Haddad <phaddad@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en_accel/ipsec.c       | 21 +++++++++++++++++++
+ .../mlx5/core/en_accel/ipsec_offload.c        |  2 +-
+ include/linux/mlx5/mlx5_ifc.h                 |  7 +++++++
+ 3 files changed, 29 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+index 7d4ceb9b9c16f..65678e89aea62 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+@@ -335,6 +335,27 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
+               attrs->replay_esn.esn = sa_entry->esn_state.esn;
+               attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
+               attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
++              switch (x->replay_esn->replay_window) {
++              case 32:
++                      attrs->replay_esn.replay_window =
++                              MLX5_IPSEC_ASO_REPLAY_WIN_32BIT;
++                      break;
++              case 64:
++                      attrs->replay_esn.replay_window =
++                              MLX5_IPSEC_ASO_REPLAY_WIN_64BIT;
++                      break;
++              case 128:
++                      attrs->replay_esn.replay_window =
++                              MLX5_IPSEC_ASO_REPLAY_WIN_128BIT;
++                      break;
++              case 256:
++                      attrs->replay_esn.replay_window =
++                              MLX5_IPSEC_ASO_REPLAY_WIN_256BIT;
++                      break;
++              default:
++                      WARN_ON(true);
++                      return;
++              }
+       }
+       attrs->dir = x->xso.dir;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+index 3245d1c9d5392..55b11d8cba532 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+@@ -94,7 +94,7 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
+               if (attrs->dir == XFRM_DEV_OFFLOAD_IN) {
+                       MLX5_SET(ipsec_aso, aso_ctx, window_sz,
+-                               attrs->replay_esn.replay_window / 64);
++                               attrs->replay_esn.replay_window);
+                       MLX5_SET(ipsec_aso, aso_ctx, mode,
+                                MLX5_IPSEC_ASO_REPLAY_PROTECTION);
+               }
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index fc3db401f8a28..f08cd13031458 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -11936,6 +11936,13 @@ enum {
+       MLX5_IPSEC_ASO_INC_SN            = 0x2,
+ };
++enum {
++      MLX5_IPSEC_ASO_REPLAY_WIN_32BIT  = 0x0,
++      MLX5_IPSEC_ASO_REPLAY_WIN_64BIT  = 0x1,
++      MLX5_IPSEC_ASO_REPLAY_WIN_128BIT = 0x2,
++      MLX5_IPSEC_ASO_REPLAY_WIN_256BIT = 0x3,
++};
++
+ struct mlx5_ifc_ipsec_aso_bits {
+       u8         valid[0x1];
+       u8         reserved_at_201[0x1];
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-reduce-eswitch-mode_lock-protection-contex.patch b/queue-6.6/net-mlx5e-reduce-eswitch-mode_lock-protection-contex.patch
new file mode 100644 (file)
index 0000000..1438e89
--- /dev/null
@@ -0,0 +1,305 @@
+From 1fe0f5c3938ea965d4f1cdf2de0f223a13e63099 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Oct 2023 03:38:29 +0000
+Subject: net/mlx5e: Reduce eswitch mode_lock protection context
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit baac8351f74c543896b8fd40138b7ad9365587a3 ]
+
+Currently eswitch mode_lock is so heavy, for example, it's locked
+during the whole process of the mode change, which may need to hold
+other locks. As the mode_lock is also used by IPSec to block mode and
+encap change now, it is easy to cause lock dependency.
+
+Since some of protections are also done by devlink lock, the eswitch
+mode_lock is not needed at those places, and thus the possibility of
+lockdep issue is reduced.
+
+Fixes: c8e350e62fc5 ("net/mlx5e: Make TC and IPsec offloads mutually exclusive on a netdev")
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en_accel/ipsec_fs.c    |  9 +++--
+ .../net/ethernet/mellanox/mlx5/core/eswitch.c | 35 ++++++++++-------
+ .../net/ethernet/mellanox/mlx5/core/eswitch.h |  2 +
+ .../mellanox/mlx5/core/eswitch_offloads.c     | 38 +++++++++++--------
+ 4 files changed, 52 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+index 6dc60be2a697c..03f69c485a006 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+@@ -1834,8 +1834,11 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev)
+       struct mlx5_eswitch *esw = mdev->priv.eswitch;
+       int err = 0;
+-      if (esw)
+-              down_write(&esw->mode_lock);
++      if (esw) {
++              err = mlx5_esw_lock(esw);
++              if (err)
++                      return err;
++      }
+       if (mdev->num_block_ipsec) {
+               err = -EBUSY;
+@@ -1846,7 +1849,7 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev)
+ unlock:
+       if (esw)
+-              up_write(&esw->mode_lock);
++              mlx5_esw_unlock(esw);
+       return err;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 8d0b915a31214..3047d7015c525 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1463,7 +1463,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
+ {
+       int err;
+-      lockdep_assert_held(&esw->mode_lock);
++      devl_assert_locked(priv_to_devlink(esw->dev));
+       if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
+               esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
+@@ -1531,7 +1531,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
+       if (toggle_lag)
+               mlx5_lag_disable_change(esw->dev);
+-      down_write(&esw->mode_lock);
+       if (!mlx5_esw_is_fdb_created(esw)) {
+               ret = mlx5_eswitch_enable_locked(esw, num_vfs);
+       } else {
+@@ -1554,8 +1553,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
+               }
+       }
+-      up_write(&esw->mode_lock);
+-
+       if (toggle_lag)
+               mlx5_lag_enable_change(esw->dev);
+@@ -1569,12 +1566,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf)
+               return;
+       devl_assert_locked(priv_to_devlink(esw->dev));
+-      down_write(&esw->mode_lock);
+       /* If driver is unloaded, this function is called twice by remove_one()
+        * and mlx5_unload(). Prevent the second call.
+        */
+       if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf)
+-              goto unlock;
++              return;
+       esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n",
+                esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+@@ -1603,9 +1599,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf)
+               esw->esw_funcs.num_vfs = 0;
+       else
+               esw->esw_funcs.num_ec_vfs = 0;
+-
+-unlock:
+-      up_write(&esw->mode_lock);
+ }
+ /* Free resources for corresponding eswitch mode. It is called by devlink
+@@ -1647,10 +1640,8 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
+       devl_assert_locked(priv_to_devlink(esw->dev));
+       mlx5_lag_disable_change(esw->dev);
+-      down_write(&esw->mode_lock);
+       mlx5_eswitch_disable_locked(esw);
+       esw->mode = MLX5_ESWITCH_LEGACY;
+-      up_write(&esw->mode_lock);
+       mlx5_lag_enable_change(esw->dev);
+ }
+@@ -2254,8 +2245,13 @@ bool mlx5_esw_hold(struct mlx5_core_dev *mdev)
+       if (!mlx5_esw_allowed(esw))
+               return true;
+-      if (down_read_trylock(&esw->mode_lock) != 0)
++      if (down_read_trylock(&esw->mode_lock) != 0) {
++              if (esw->eswitch_operation_in_progress) {
++                      up_read(&esw->mode_lock);
++                      return false;
++              }
+               return true;
++      }
+       return false;
+ }
+@@ -2312,7 +2308,8 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
+       if (down_write_trylock(&esw->mode_lock) == 0)
+               return -EINVAL;
+-      if (atomic64_read(&esw->user_count) > 0) {
++      if (esw->eswitch_operation_in_progress ||
++          atomic64_read(&esw->user_count) > 0) {
+               up_write(&esw->mode_lock);
+               return -EBUSY;
+       }
+@@ -2320,6 +2317,18 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
+       return esw->mode;
+ }
++int mlx5_esw_lock(struct mlx5_eswitch *esw)
++{
++      down_write(&esw->mode_lock);
++
++      if (esw->eswitch_operation_in_progress) {
++              up_write(&esw->mode_lock);
++              return -EBUSY;
++      }
++
++      return 0;
++}
++
+ /**
+  * mlx5_esw_unlock() - Release write lock on esw mode lock
+  * @esw: eswitch device.
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+index 37ab66e7b403f..b674b57d05aad 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -383,6 +383,7 @@ struct mlx5_eswitch {
+       struct xarray paired;
+       struct mlx5_devcom_comp_dev *devcom;
+       u16 enabled_ipsec_vf_count;
++      bool eswitch_operation_in_progress;
+ };
+ void esw_offloads_disable(struct mlx5_eswitch *esw);
+@@ -827,6 +828,7 @@ void mlx5_esw_release(struct mlx5_core_dev *dev);
+ void mlx5_esw_get(struct mlx5_core_dev *dev);
+ void mlx5_esw_put(struct mlx5_core_dev *dev);
+ int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
++int mlx5_esw_lock(struct mlx5_eswitch *esw);
+ void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+ void esw_vport_change_handle_locked(struct mlx5_vport *vport);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index 88236e75fd901..bf78eeca401be 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -3733,13 +3733,16 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+               goto unlock;
+       }
++      esw->eswitch_operation_in_progress = true;
++      up_write(&esw->mode_lock);
++
+       mlx5_eswitch_disable_locked(esw);
+       if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
+               if (mlx5_devlink_trap_get_num_active(esw->dev)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Can't change mode while devlink traps are active");
+                       err = -EOPNOTSUPP;
+-                      goto unlock;
++                      goto skip;
+               }
+               err = esw_offloads_start(esw, extack);
+       } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) {
+@@ -3749,6 +3752,9 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+               err = -EINVAL;
+       }
++skip:
++      down_write(&esw->mode_lock);
++      esw->eswitch_operation_in_progress = false;
+ unlock:
+       mlx5_esw_unlock(esw);
+ enable_lag:
+@@ -3759,16 +3765,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+ {
+       struct mlx5_eswitch *esw;
+-      int err;
+       esw = mlx5_devlink_eswitch_get(devlink);
+       if (IS_ERR(esw))
+               return PTR_ERR(esw);
+-      down_read(&esw->mode_lock);
+-      err = esw_mode_to_devlink(esw->mode, mode);
+-      up_read(&esw->mode_lock);
+-      return err;
++      return esw_mode_to_devlink(esw->mode, mode);
+ }
+ static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode,
+@@ -3862,11 +3864,15 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
+       if (err)
+               goto out;
++      esw->eswitch_operation_in_progress = true;
++      up_write(&esw->mode_lock);
++
+       err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack);
+-      if (err)
+-              goto out;
++      if (!err)
++              esw->offloads.inline_mode = mlx5_mode;
+-      esw->offloads.inline_mode = mlx5_mode;
++      down_write(&esw->mode_lock);
++      esw->eswitch_operation_in_progress = false;
+       up_write(&esw->mode_lock);
+       return 0;
+@@ -3878,16 +3884,12 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
+ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
+ {
+       struct mlx5_eswitch *esw;
+-      int err;
+       esw = mlx5_devlink_eswitch_get(devlink);
+       if (IS_ERR(esw))
+               return PTR_ERR(esw);
+-      down_read(&esw->mode_lock);
+-      err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+-      up_read(&esw->mode_lock);
+-      return err;
++      return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+ }
+ bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
+@@ -3969,6 +3971,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+               goto unlock;
+       }
++      esw->eswitch_operation_in_progress = true;
++      up_write(&esw->mode_lock);
++
+       esw_destroy_offloads_fdb_tables(esw);
+       esw->offloads.encap = encap;
+@@ -3982,6 +3987,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+               (void)esw_create_offloads_fdb_tables(esw);
+       }
++      down_write(&esw->mode_lock);
++      esw->eswitch_operation_in_progress = false;
++
+ unlock:
+       up_write(&esw->mode_lock);
+       return err;
+@@ -3996,9 +4004,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+       if (IS_ERR(esw))
+               return PTR_ERR(esw);
+-      down_read(&esw->mode_lock);
+       *encap = esw->offloads.encap;
+-      up_read(&esw->mode_lock);
+       return 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-tc-don-t-offload-post-action-rule-if-not-s.patch b/queue-6.6/net-mlx5e-tc-don-t-offload-post-action-rule-if-not-s.patch
new file mode 100644 (file)
index 0000000..5252e0d
--- /dev/null
@@ -0,0 +1,167 @@
+From f1f2a993678b755dadf802c9beb90aa5f7b88ebb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Sep 2023 13:28:10 +0300
+Subject: net/mlx5e: TC, Don't offload post action rule if not supported
+
+From: Chris Mi <cmi@nvidia.com>
+
+[ Upstream commit ccbe33003b109f14c4dde2a4fca9c2a50c423601 ]
+
+If post action is not supported, eg. ignore_flow_level is not
+supported, don't offload post action rule. Otherwise, will hit
+panic [1].
+
+Fix it by checking if post action table is valid or not.
+
+[1]
+[445537.863880] BUG: unable to handle page fault for address: ffffffffffffffb1
+[445537.864617] #PF: supervisor read access in kernel mode
+[445537.865244] #PF: error_code(0x0000) - not-present page
+[445537.865860] PGD 70683a067 P4D 70683a067 PUD 70683c067 PMD 0
+[445537.866497] Oops: 0000 [#1] PREEMPT SMP NOPTI
+[445537.867077] CPU: 19 PID: 248742 Comm: tc Kdump: loaded Tainted: G           O       6.5.0+ #1
+[445537.867888] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[445537.868834] RIP: 0010:mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core]
+[445537.869635] Code: c0 0d 00 00 e8 20 96 c6 d3 48 85 c0 0f 84 e5 00 00 00 c7 83 b0 01 00 00 00 00 00 00 49 89 c5 31 c0 31 d2 66 89 83 b4 01 00 00 <49> 8b 44 24 10 83 23 df 83 8b d8 01 00 00 04 48 89 83 c0 01 00 00
+[445537.871318] RSP: 0018:ffffb98741cef428 EFLAGS: 00010246
+[445537.871962] RAX: 0000000000000000 RBX: ffff8df341167000 RCX: 0000000000000001
+[445537.872704] RDX: 0000000000000000 RSI: ffffffff954844e1 RDI: ffffffff9546e9cb
+[445537.873430] RBP: ffffb98741cef448 R08: 0000000000000020 R09: 0000000000000246
+[445537.874160] R10: 0000000000000000 R11: ffffffff943f73ff R12: ffffffffffffffa1
+[445537.874893] R13: ffff8df36d336c20 R14: ffffffffffffffa1 R15: ffff8df341167000
+[445537.875628] FS:  00007fcd6564f800(0000) GS:ffff8dfa9ea00000(0000) knlGS:0000000000000000
+[445537.876425] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[445537.877090] CR2: ffffffffffffffb1 CR3: 00000003b5884001 CR4: 0000000000770ee0
+[445537.877832] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[445537.878564] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[445537.879300] PKRU: 55555554
+[445537.879797] Call Trace:
+[445537.880263]  <TASK>
+[445537.880713]  ? show_regs+0x6e/0x80
+[445537.881232]  ? __die+0x29/0x70
+[445537.881731]  ? page_fault_oops+0x85/0x160
+[445537.882276]  ? search_exception_tables+0x65/0x70
+[445537.882852]  ? kernelmode_fixup_or_oops+0xa2/0x120
+[445537.883432]  ? __bad_area_nosemaphore+0x18b/0x250
+[445537.884019]  ? bad_area_nosemaphore+0x16/0x20
+[445537.884566]  ? do_kern_addr_fault+0x8b/0xa0
+[445537.885105]  ? exc_page_fault+0xf5/0x1c0
+[445537.885623]  ? asm_exc_page_fault+0x2b/0x30
+[445537.886149]  ? __kmem_cache_alloc_node+0x1df/0x2a0
+[445537.886717]  ? mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core]
+[445537.887431]  ? mlx5e_tc_post_act_add+0x30/0x130 [mlx5_core]
+[445537.888172]  alloc_flow_post_acts+0xfb/0x1c0 [mlx5_core]
+[445537.888849]  parse_tc_actions+0x582/0x5c0 [mlx5_core]
+[445537.889505]  parse_tc_fdb_actions+0xd7/0x1f0 [mlx5_core]
+[445537.890175]  __mlx5e_add_fdb_flow+0x1ab/0x2b0 [mlx5_core]
+[445537.890843]  mlx5e_add_fdb_flow+0x56/0x120 [mlx5_core]
+[445537.891491]  ? debug_smp_processor_id+0x1b/0x30
+[445537.892037]  mlx5e_tc_add_flow+0x79/0x90 [mlx5_core]
+[445537.892676]  mlx5e_configure_flower+0x305/0x450 [mlx5_core]
+[445537.893341]  mlx5e_rep_setup_tc_cls_flower+0x3d/0x80 [mlx5_core]
+[445537.894037]  mlx5e_rep_setup_tc_cb+0x5c/0xa0 [mlx5_core]
+[445537.894693]  tc_setup_cb_add+0xdc/0x220
+[445537.895177]  fl_hw_replace_filter+0x15f/0x220 [cls_flower]
+[445537.895767]  fl_change+0xe87/0x1190 [cls_flower]
+[445537.896302]  tc_new_tfilter+0x484/0xa50
+
+Fixes: f0da4daa3413 ("net/mlx5e: Refactor ct to use post action infrastructure")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Reviewed-by: Automatic Verification <verifier@nvidia.com>
+Reviewed-by: Maher Sanalla <msanalla@nvidia.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Shachar Kagan <skagan@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en/tc/post_act.c       |  6 +++++
+ .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 25 ++++++++++++++++---
+ 2 files changed, 27 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
+index 4e923a2874aef..86bf007fd05b7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
+@@ -83,6 +83,9 @@ mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+       struct mlx5_flow_spec *spec;
+       int err;
++      if (IS_ERR(post_act))
++              return PTR_ERR(post_act);
++
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return -ENOMEM;
+@@ -111,6 +114,9 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *po
+       struct mlx5e_post_act_handle *handle;
+       int err;
++      if (IS_ERR(post_act))
++              return ERR_CAST(post_act);
++
+       handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+       if (!handle)
+               return ERR_PTR(-ENOMEM);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index b62fd37493410..1bead98f73bf5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -444,6 +444,9 @@ mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
+       struct mlx5e_flow_meter_handle *meter;
+       enum mlx5e_post_meter_type type;
++      if (IS_ERR(post_act))
++              return PTR_ERR(post_act);
++
+       meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
+       if (IS_ERR(meter)) {
+               mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
+@@ -3736,6 +3739,20 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
+       return err;
+ }
++static int
++set_branch_dest_ft(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr)
++{
++      struct mlx5e_post_act *post_act = get_post_action(priv);
++
++      if (IS_ERR(post_act))
++              return PTR_ERR(post_act);
++
++      attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
++      attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act);
++
++      return 0;
++}
++
+ static int
+ alloc_branch_attr(struct mlx5e_tc_flow *flow,
+                 struct mlx5e_tc_act_branch_ctrl *cond,
+@@ -3759,8 +3776,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow,
+               break;
+       case FLOW_ACTION_ACCEPT:
+       case FLOW_ACTION_PIPE:
+-              attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+-              attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv));
++              if (set_branch_dest_ft(flow->priv, attr))
++                      goto out_err;
+               break;
+       case FLOW_ACTION_JUMP:
+               if (*jump_count) {
+@@ -3769,8 +3786,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow,
+                       goto out_err;
+               }
+               *jump_count = cond->extval;
+-              attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+-              attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv));
++              if (set_branch_dest_ft(flow->priv, attr))
++                      goto out_err;
+               break;
+       default:
+               err = -EOPNOTSUPP;
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-tidy-up-ipsec-nat-t-sa-discovery.patch b/queue-6.6/net-mlx5e-tidy-up-ipsec-nat-t-sa-discovery.patch
new file mode 100644 (file)
index 0000000..f8cf573
--- /dev/null
@@ -0,0 +1,92 @@
+From a35fab7a2c0f6ff983e80dd38255f2699c25632f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 12 Nov 2023 13:50:00 +0200
+Subject: net/mlx5e: Tidy up IPsec NAT-T SA discovery
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit c2bf84f1d1a1595dcc45fe867f0e02b331993fee ]
+
+IPsec NAT-T packets are UDP encapsulated packets over ESP normal ones.
+In case they arrive to RX, the SPI and ESP are located in inner header,
+while the check was performed on outer header instead.
+
+That wrong check caused to the situation where received rekeying request
+was missed and caused to rekey timeout, which "compensated" this failure
+by completing rekeying.
+
+Fixes: d65954934937 ("net/mlx5e: Support IPsec NAT-T functionality")
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 22 ++++++++++++++-----
+ include/linux/mlx5/mlx5_ifc.h                 |  2 +-
+ 2 files changed, 17 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+index fc6aca7c05a48..6dc60be2a697c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+@@ -974,13 +974,22 @@ static void setup_fte_esp(struct mlx5_flow_spec *spec)
+       MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP);
+ }
+-static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi)
++static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi, bool encap)
+ {
+       /* SPI number */
+       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+-      MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi);
+-      MLX5_SET(fte_match_param, spec->match_value, misc_parameters.outer_esp_spi, spi);
++      if (encap) {
++              MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
++                               misc_parameters.inner_esp_spi);
++              MLX5_SET(fte_match_param, spec->match_value,
++                       misc_parameters.inner_esp_spi, spi);
++      } else {
++              MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
++                               misc_parameters.outer_esp_spi);
++              MLX5_SET(fte_match_param, spec->match_value,
++                       misc_parameters.outer_esp_spi, spi);
++      }
+ }
+ static void setup_fte_no_frags(struct mlx5_flow_spec *spec)
+@@ -1339,8 +1348,9 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
+       else
+               setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
+-      setup_fte_spi(spec, attrs->spi);
+-      setup_fte_esp(spec);
++      setup_fte_spi(spec, attrs->spi, attrs->encap);
++      if (!attrs->encap)
++              setup_fte_esp(spec);
+       setup_fte_no_frags(spec);
+       setup_fte_upper_proto_match(spec, &attrs->upspec);
+@@ -1443,7 +1453,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
+       switch (attrs->type) {
+       case XFRM_DEV_OFFLOAD_CRYPTO:
+-              setup_fte_spi(spec, attrs->spi);
++              setup_fte_spi(spec, attrs->spi, false);
+               setup_fte_esp(spec);
+               setup_fte_reg_a(spec);
+               break;
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index f08cd13031458..8ac6ae79e0835 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -620,7 +620,7 @@ struct mlx5_ifc_fte_match_set_misc_bits {
+       u8         reserved_at_140[0x8];
+       u8         bth_dst_qp[0x18];
+-      u8         reserved_at_160[0x20];
++      u8         inner_esp_spi[0x20];
+       u8         outer_esp_spi[0x20];
+       u8         reserved_at_1a0[0x60];
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-mlx5e-unify-esw-and-normal-ipsec-status-table-cr.patch b/queue-6.6/net-mlx5e-unify-esw-and-normal-ipsec-status-table-cr.patch
new file mode 100644 (file)
index 0000000..d69f246
--- /dev/null
@@ -0,0 +1,461 @@
+From 9a5f6e2baf4ae34be44de27e1ea70124945d2d32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Sep 2023 14:06:18 +0300
+Subject: net/mlx5e: Unify esw and normal IPsec status table
+ creation/destruction
+
+From: Patrisious Haddad <phaddad@nvidia.com>
+
+[ Upstream commit 94af50c0a9bb961fe93cf0fdd14eb0883da86721 ]
+
+Change normal IPsec flow to use the same creation/destruction functions
+for status flow table as that of ESW, which first of all refines the
+code to have less code duplication.
+
+And more importantly, the ESW status table handles IPsec syndrome
+checks at steering by HW, which is more efficient than the previous
+behaviour we had where it was copied to WQE meta data and checked
+by the driver.
+
+Fixes: 1762f132d542 ("net/mlx5e: Support IPsec packet offload for RX in switchdev mode")
+Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 187 +++++++++++++-----
+ .../mellanox/mlx5/core/esw/ipsec_fs.c         | 152 --------------
+ .../mellanox/mlx5/core/esw/ipsec_fs.h         |  15 --
+ 3 files changed, 141 insertions(+), 213 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+index 7dba4221993f0..fc6aca7c05a48 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+@@ -128,63 +128,166 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns,
+       return mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ }
+-static int ipsec_status_rule(struct mlx5_core_dev *mdev,
+-                           struct mlx5e_ipsec_rx *rx,
+-                           struct mlx5_flow_destination *dest)
++static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec,
++                                       struct mlx5e_ipsec_rx *rx)
+ {
+-      u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
++      mlx5_del_flow_rules(rx->status_drop.rule);
++      mlx5_destroy_flow_group(rx->status_drop.group);
++      mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt);
++}
++
++static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec,
++                                       struct mlx5e_ipsec_rx *rx)
++{
++      mlx5_del_flow_rules(rx->status.rule);
++
++      if (rx != ipsec->rx_esw)
++              return;
++
++#ifdef CONFIG_MLX5_ESWITCH
++      mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0);
++#endif
++}
++
++static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec,
++                                     struct mlx5e_ipsec_rx *rx)
++{
++      int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
++      struct mlx5_flow_table *ft = rx->ft.status;
++      struct mlx5_core_dev *mdev = ipsec->mdev;
++      struct mlx5_flow_destination dest = {};
+       struct mlx5_flow_act flow_act = {};
+-      struct mlx5_modify_hdr *modify_hdr;
+-      struct mlx5_flow_handle *fte;
++      struct mlx5_flow_handle *rule;
++      struct mlx5_fc *flow_counter;
+       struct mlx5_flow_spec *spec;
+-      int err;
++      struct mlx5_flow_group *g;
++      u32 *flow_group_in;
++      int err = 0;
++      flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+-      if (!spec)
+-              return -ENOMEM;
++      if (!flow_group_in || !spec) {
++              err = -ENOMEM;
++              goto err_out;
++      }
+-      /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */
+-      MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY);
+-      MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME);
+-      MLX5_SET(copy_action_in, action, src_offset, 0);
+-      MLX5_SET(copy_action_in, action, length, 7);
+-      MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
+-      MLX5_SET(copy_action_in, action, dst_offset, 24);
++      MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1);
++      MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1);
++      g = mlx5_create_flow_group(ft, flow_group_in);
++      if (IS_ERR(g)) {
++              err = PTR_ERR(g);
++              mlx5_core_err(mdev,
++                            "Failed to add ipsec rx status drop flow group, err=%d\n", err);
++              goto err_out;
++      }
+-      modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL,
+-                                            1, action);
++      flow_counter = mlx5_fc_create(mdev, false);
++      if (IS_ERR(flow_counter)) {
++              err = PTR_ERR(flow_counter);
++              mlx5_core_err(mdev,
++                            "Failed to add ipsec rx status drop rule counter, err=%d\n", err);
++              goto err_cnt;
++      }
+-      if (IS_ERR(modify_hdr)) {
+-              err = PTR_ERR(modify_hdr);
++      flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
++      dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
++      dest.counter_id = mlx5_fc_id(flow_counter);
++      if (rx == ipsec->rx_esw)
++              spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
++      rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
++      if (IS_ERR(rule)) {
++              err = PTR_ERR(rule);
+               mlx5_core_err(mdev,
+-                            "fail to alloc ipsec copy modify_header_id err=%d\n", err);
+-              goto out_spec;
++                            "Failed to add ipsec rx status drop rule, err=%d\n", err);
++              goto err_rule;
+       }
+-      /* create fte */
+-      flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+-                        MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
++      rx->status_drop.group = g;
++      rx->status_drop.rule = rule;
++      rx->status_drop_cnt = flow_counter;
++
++      kvfree(flow_group_in);
++      kvfree(spec);
++      return 0;
++
++err_rule:
++      mlx5_fc_destroy(mdev, flow_counter);
++err_cnt:
++      mlx5_destroy_flow_group(g);
++err_out:
++      kvfree(flow_group_in);
++      kvfree(spec);
++      return err;
++}
++
++static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec,
++                                     struct mlx5e_ipsec_rx *rx,
++                                     struct mlx5_flow_destination *dest)
++{
++      struct mlx5_flow_act flow_act = {};
++      struct mlx5_flow_handle *rule;
++      struct mlx5_flow_spec *spec;
++      int err;
++
++      spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
++      if (!spec)
++              return -ENOMEM;
++
++      MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
++                       misc_parameters_2.ipsec_syndrome);
++      MLX5_SET(fte_match_param, spec->match_value,
++               misc_parameters_2.ipsec_syndrome, 0);
++      if (rx == ipsec->rx_esw)
++              spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
++      spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
++      flow_act.flags = FLOW_ACT_NO_APPEND;
++      flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+                         MLX5_FLOW_CONTEXT_ACTION_COUNT;
+-      flow_act.modify_hdr = modify_hdr;
+-      fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2);
+-      if (IS_ERR(fte)) {
+-              err = PTR_ERR(fte);
+-              mlx5_core_err(mdev, "fail to add ipsec rx err copy rule err=%d\n", err);
+-              goto out;
++      rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2);
++      if (IS_ERR(rule)) {
++              err = PTR_ERR(rule);
++              mlx5_core_warn(ipsec->mdev,
++                             "Failed to add ipsec rx status pass rule, err=%d\n", err);
++              goto err_rule;
+       }
++      rx->status.rule = rule;
+       kvfree(spec);
+-      rx->status.rule = fte;
+-      rx->status.modify_hdr = modify_hdr;
+       return 0;
+-out:
+-      mlx5_modify_header_dealloc(mdev, modify_hdr);
+-out_spec:
++err_rule:
+       kvfree(spec);
+       return err;
+ }
++static void mlx5_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec,
++                                       struct mlx5e_ipsec_rx *rx)
++{
++      ipsec_rx_status_pass_destroy(ipsec, rx);
++      ipsec_rx_status_drop_destroy(ipsec, rx);
++}
++
++static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec,
++                                     struct mlx5e_ipsec_rx *rx,
++                                     struct mlx5_flow_destination *dest)
++{
++      int err;
++
++      err = ipsec_rx_status_drop_create(ipsec, rx);
++      if (err)
++              return err;
++
++      err = ipsec_rx_status_pass_create(ipsec, rx, dest);
++      if (err)
++              goto err_pass_create;
++
++      return 0;
++
++err_pass_create:
++      ipsec_rx_status_drop_destroy(ipsec, rx);
++      return err;
++}
++
+ static int ipsec_miss_create(struct mlx5_core_dev *mdev,
+                            struct mlx5_flow_table *ft,
+                            struct mlx5e_ipsec_miss *miss,
+@@ -256,12 +359,7 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
+       mlx5_destroy_flow_table(rx->ft.sa);
+       if (rx->allow_tunnel_mode)
+               mlx5_eswitch_unblock_encap(mdev);
+-      if (rx == ipsec->rx_esw) {
+-              mlx5_esw_ipsec_rx_status_destroy(ipsec, rx);
+-      } else {
+-              mlx5_del_flow_rules(rx->status.rule);
+-              mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
+-      }
++      mlx5_ipsec_rx_status_destroy(ipsec, rx);
+       mlx5_destroy_flow_table(rx->ft.status);
+       mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family);
+@@ -351,10 +449,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
+       dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+       dest[1].counter_id = mlx5_fc_id(rx->fc->cnt);
+-      if (rx == ipsec->rx_esw)
+-              err = mlx5_esw_ipsec_rx_status_create(ipsec, rx, dest);
+-      else
+-              err = ipsec_status_rule(mdev, rx, dest);
++      err = mlx5_ipsec_rx_status_create(ipsec, rx, dest);
+       if (err)
+               goto err_add;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
+index 095f31f380fa3..13b5916b64e22 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
+@@ -21,158 +21,6 @@ enum {
+       MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL,
+ };
+-static void esw_ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec,
+-                                           struct mlx5e_ipsec_rx *rx)
+-{
+-      mlx5_del_flow_rules(rx->status_drop.rule);
+-      mlx5_destroy_flow_group(rx->status_drop.group);
+-      mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt);
+-}
+-
+-static void esw_ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec,
+-                                           struct mlx5e_ipsec_rx *rx)
+-{
+-      mlx5_del_flow_rules(rx->status.rule);
+-      mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0);
+-}
+-
+-static int esw_ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec,
+-                                         struct mlx5e_ipsec_rx *rx)
+-{
+-      int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+-      struct mlx5_flow_table *ft = rx->ft.status;
+-      struct mlx5_core_dev *mdev = ipsec->mdev;
+-      struct mlx5_flow_destination dest = {};
+-      struct mlx5_flow_act flow_act = {};
+-      struct mlx5_flow_handle *rule;
+-      struct mlx5_fc *flow_counter;
+-      struct mlx5_flow_spec *spec;
+-      struct mlx5_flow_group *g;
+-      u32 *flow_group_in;
+-      int err = 0;
+-
+-      flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+-      spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+-      if (!flow_group_in || !spec) {
+-              err = -ENOMEM;
+-              goto err_out;
+-      }
+-
+-      MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1);
+-      MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1);
+-      g = mlx5_create_flow_group(ft, flow_group_in);
+-      if (IS_ERR(g)) {
+-              err = PTR_ERR(g);
+-              mlx5_core_err(mdev,
+-                            "Failed to add ipsec rx status drop flow group, err=%d\n", err);
+-              goto err_out;
+-      }
+-
+-      flow_counter = mlx5_fc_create(mdev, false);
+-      if (IS_ERR(flow_counter)) {
+-              err = PTR_ERR(flow_counter);
+-              mlx5_core_err(mdev,
+-                            "Failed to add ipsec rx status drop rule counter, err=%d\n", err);
+-              goto err_cnt;
+-      }
+-
+-      flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
+-      dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+-      dest.counter_id = mlx5_fc_id(flow_counter);
+-      spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+-      rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+-      if (IS_ERR(rule)) {
+-              err = PTR_ERR(rule);
+-              mlx5_core_err(mdev,
+-                            "Failed to add ipsec rx status drop rule, err=%d\n", err);
+-              goto err_rule;
+-      }
+-
+-      rx->status_drop.group = g;
+-      rx->status_drop.rule = rule;
+-      rx->status_drop_cnt = flow_counter;
+-
+-      kvfree(flow_group_in);
+-      kvfree(spec);
+-      return 0;
+-
+-err_rule:
+-      mlx5_fc_destroy(mdev, flow_counter);
+-err_cnt:
+-      mlx5_destroy_flow_group(g);
+-err_out:
+-      kvfree(flow_group_in);
+-      kvfree(spec);
+-      return err;
+-}
+-
+-static int esw_ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec,
+-                                         struct mlx5e_ipsec_rx *rx,
+-                                         struct mlx5_flow_destination *dest)
+-{
+-      struct mlx5_flow_act flow_act = {};
+-      struct mlx5_flow_handle *rule;
+-      struct mlx5_flow_spec *spec;
+-      int err;
+-
+-      spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+-      if (!spec)
+-              return -ENOMEM;
+-
+-      MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+-                       misc_parameters_2.ipsec_syndrome);
+-      MLX5_SET(fte_match_param, spec->match_value,
+-               misc_parameters_2.ipsec_syndrome, 0);
+-      spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+-      spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+-      flow_act.flags = FLOW_ACT_NO_APPEND;
+-      flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+-                        MLX5_FLOW_CONTEXT_ACTION_COUNT;
+-      rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2);
+-      if (IS_ERR(rule)) {
+-              err = PTR_ERR(rule);
+-              mlx5_core_warn(ipsec->mdev,
+-                             "Failed to add ipsec rx status pass rule, err=%d\n", err);
+-              goto err_rule;
+-      }
+-
+-      rx->status.rule = rule;
+-      kvfree(spec);
+-      return 0;
+-
+-err_rule:
+-      kvfree(spec);
+-      return err;
+-}
+-
+-void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec,
+-                                    struct mlx5e_ipsec_rx *rx)
+-{
+-      esw_ipsec_rx_status_pass_destroy(ipsec, rx);
+-      esw_ipsec_rx_status_drop_destroy(ipsec, rx);
+-}
+-
+-int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec,
+-                                  struct mlx5e_ipsec_rx *rx,
+-                                  struct mlx5_flow_destination *dest)
+-{
+-      int err;
+-
+-      err = esw_ipsec_rx_status_drop_create(ipsec, rx);
+-      if (err)
+-              return err;
+-
+-      err = esw_ipsec_rx_status_pass_create(ipsec, rx, dest);
+-      if (err)
+-              goto err_pass_create;
+-
+-      return 0;
+-
+-err_pass_create:
+-      esw_ipsec_rx_status_drop_destroy(ipsec, rx);
+-      return err;
+-}
+-
+ void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec,
+                                      struct mlx5e_ipsec_rx_create_attr *attr)
+ {
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h
+index 0c90f7a8b0d32..ac9c65b89166e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h
+@@ -8,11 +8,6 @@ struct mlx5e_ipsec;
+ struct mlx5e_ipsec_sa_entry;
+ #ifdef CONFIG_MLX5_ESWITCH
+-void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec,
+-                                    struct mlx5e_ipsec_rx *rx);
+-int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec,
+-                                  struct mlx5e_ipsec_rx *rx,
+-                                  struct mlx5_flow_destination *dest);
+ void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec,
+                                      struct mlx5e_ipsec_rx_create_attr *attr);
+ int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec,
+@@ -26,16 +21,6 @@ void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec,
+                                      struct mlx5e_ipsec_tx_create_attr *attr);
+ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev);
+ #else
+-static inline void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec,
+-                                                  struct mlx5e_ipsec_rx *rx) {}
+-
+-static inline int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec,
+-                                                struct mlx5e_ipsec_rx *rx,
+-                                                struct mlx5_flow_destination *dest)
+-{
+-      return  -EINVAL;
+-}
+-
+ static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec,
+                                                    struct mlx5e_ipsec_rx_create_attr *attr) {}
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-remove-acked-syn-flag-from-packet-in-the-transmi.patch b/queue-6.6/net-remove-acked-syn-flag-from-packet-in-the-transmi.patch
new file mode 100644 (file)
index 0000000..20cb1aa
--- /dev/null
@@ -0,0 +1,111 @@
+From f3f5b17e5fc1ea851d3abb63dc7b50dcfd548561 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Dec 2023 10:02:00 +0800
+Subject: net: Remove acked SYN flag from packet in the transmit queue
+ correctly
+
+From: Dong Chenchen <dongchenchen2@huawei.com>
+
+[ Upstream commit f99cd56230f56c8b6b33713c5be4da5d6766be1f ]
+
+syzkaller report:
+
+ kernel BUG at net/core/skbuff.c:3452!
+ invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
+ CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.7.0-rc4-00009-gbee0e7762ad2-dirty #135
+ RIP: 0010:skb_copy_and_csum_bits (net/core/skbuff.c:3452)
+ Call Trace:
+ icmp_glue_bits (net/ipv4/icmp.c:357)
+ __ip_append_data.isra.0 (net/ipv4/ip_output.c:1165)
+ ip_append_data (net/ipv4/ip_output.c:1362 net/ipv4/ip_output.c:1341)
+ icmp_push_reply (net/ipv4/icmp.c:370)
+ __icmp_send (./include/net/route.h:252 net/ipv4/icmp.c:772)
+ ip_fragment.constprop.0 (./include/linux/skbuff.h:1234 net/ipv4/ip_output.c:592 net/ipv4/ip_output.c:577)
+ __ip_finish_output (net/ipv4/ip_output.c:311 net/ipv4/ip_output.c:295)
+ ip_output (net/ipv4/ip_output.c:427)
+ __ip_queue_xmit (net/ipv4/ip_output.c:535)
+ __tcp_transmit_skb (net/ipv4/tcp_output.c:1462)
+ __tcp_retransmit_skb (net/ipv4/tcp_output.c:3387)
+ tcp_retransmit_skb (net/ipv4/tcp_output.c:3404)
+ tcp_retransmit_timer (net/ipv4/tcp_timer.c:604)
+ tcp_write_timer (./include/linux/spinlock.h:391 net/ipv4/tcp_timer.c:716)
+
+The panic issue was trigered by tcp simultaneous initiation.
+The initiation process is as follows:
+
+      TCP A                                            TCP B
+
+  1.  CLOSED                                           CLOSED
+
+  2.  SYN-SENT     --> <SEQ=100><CTL=SYN>              ...
+
+  3.  SYN-RECEIVED <-- <SEQ=300><CTL=SYN>              <-- SYN-SENT
+
+  4.               ... <SEQ=100><CTL=SYN>              --> SYN-RECEIVED
+
+  5.  SYN-RECEIVED --> <SEQ=100><ACK=301><CTL=SYN,ACK> ...
+
+  // TCP B: not send challenge ack for ack limit or packet loss
+  // TCP A: close
+       tcp_close
+          tcp_send_fin
+              if (!tskb && tcp_under_memory_pressure(sk))
+                  tskb = skb_rb_last(&sk->tcp_rtx_queue); //pick SYN_ACK packet
+           TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;  // set FIN flag
+
+  6.  FIN_WAIT_1  --> <SEQ=100><ACK=301><END_SEQ=102><CTL=SYN,FIN,ACK> ...
+
+  // TCP B: send challenge ack to SYN_FIN_ACK
+
+  7.               ... <SEQ=301><ACK=101><CTL=ACK>   <-- SYN-RECEIVED //challenge ack
+
+  // TCP A:  <SND.UNA=101>
+
+  8.  FIN_WAIT_1 --> <SEQ=101><ACK=301><END_SEQ=102><CTL=SYN,FIN,ACK> ... // retransmit panic
+
+       __tcp_retransmit_skb  //skb->len=0
+           tcp_trim_head
+               len = tp->snd_una - TCP_SKB_CB(skb)->seq // len=101-100
+                   __pskb_trim_head
+                       skb->data_len -= len // skb->len=-1, wrap around
+           ... ...
+           ip_fragment
+               icmp_glue_bits //BUG_ON
+
+If we use tcp_trim_head() to remove acked SYN from packet that contains data
+or other flags, skb->len will be incorrectly decremented. We can remove SYN
+flag that has been acked from rtx_queue earlier than tcp_trim_head(), which
+can fix the problem mentioned above.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Co-developed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com>
+Link: https://lore.kernel.org/r/20231210020200.1539875-1-dongchenchen2@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 9ccfdc825004d..cab3c1162c3a6 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3263,7 +3263,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
+       if (skb_still_in_host_queue(sk, skb))
+               return -EBUSY;
++start:
+       if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
++              if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
++                      TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
++                      TCP_SKB_CB(skb)->seq++;
++                      goto start;
++              }
+               if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
+                       WARN_ON_ONCE(1);
+                       return -EINVAL;
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-rose-fix-use-after-free-in-rose_ioctl.patch b/queue-6.6/net-rose-fix-use-after-free-in-rose_ioctl.patch
new file mode 100644 (file)
index 0000000..008ec37
--- /dev/null
@@ -0,0 +1,48 @@
+From dc1d64471279235b3dffed40de35ec35b2a5ecb8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Dec 2023 05:05:38 -0500
+Subject: net/rose: Fix Use-After-Free in rose_ioctl
+
+From: Hyunwoo Kim <v4bel@theori.io>
+
+[ Upstream commit 810c38a369a0a0ce625b5c12169abce1dd9ccd53 ]
+
+Because rose_ioctl() accesses sk->sk_receive_queue
+without holding a sk->sk_receive_queue.lock, it can
+cause a race with rose_accept().
+A use-after-free for skb occurs with the following flow.
+```
+rose_ioctl() -> skb_peek()
+rose_accept() -> skb_dequeue() -> kfree_skb()
+```
+Add sk->sk_receive_queue.lock to rose_ioctl() to fix this issue.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Hyunwoo Kim <v4bel@theori.io>
+Link: https://lore.kernel.org/r/20231209100538.GA407321@v4bel-B760M-AORUS-ELITE-AX
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rose/af_rose.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
+index 49dafe9ac72f0..4a5c2dc8dd7a9 100644
+--- a/net/rose/af_rose.c
++++ b/net/rose/af_rose.c
+@@ -1315,9 +1315,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+       case TIOCINQ: {
+               struct sk_buff *skb;
+               long amount = 0L;
+-              /* These two are safe on a single CPU system as only user tasks fiddle here */
++
++              spin_lock_irq(&sk->sk_receive_queue.lock);
+               if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
+                       amount = skb->len;
++              spin_unlock_irq(&sk->sk_receive_queue.lock);
+               return put_user(amount, (unsigned int __user *) argp);
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch b/queue-6.6/net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch
new file mode 100644 (file)
index 0000000..c5de32b
--- /dev/null
@@ -0,0 +1,195 @@
+From f52927f6cdb3fd65812d55e619bc4c53eb46aba2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Dec 2023 18:25:54 +0100
+Subject: net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit 125f1c7f26ffcdbf96177abe75b70c1a6ceb17bc ]
+
+The referenced change added custom cleanup code to act_ct to delete any
+callbacks registered on the parent block when deleting the
+tcf_ct_flow_table instance. However, the underlying issue is that the
+drivers don't obtain the reference to the tcf_ct_flow_table instance when
+registering callbacks which means that not only driver callbacks may still
+be on the table when deleting it but also that the driver can still have
+pointers to its internal nf_flowtable and can use it concurrently which
+results either warning in netfilter[0] or use-after-free.
+
+Fix the issue by taking a reference to the underlying struct
+tcf_ct_flow_table instance when registering the callback and release the
+reference when unregistering. Expose new API required for such reference
+counting by adding two new callbacks to nf_flowtable_type and implementing
+them for act_ct flowtable_ct type. This fixes the issue by extending the
+lifetime of nf_flowtable until all users have unregistered.
+
+[0]:
+[106170.938634] ------------[ cut here ]------------
+[106170.939111] WARNING: CPU: 21 PID: 3688 at include/net/netfilter/nf_flow_table.h:262 mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
+[106170.940108] Modules linked in: act_ct nf_flow_table act_mirred act_skbedit act_tunnel_key vxlan cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa bonding openvswitch nsh rpcrdma rdma_ucm
+ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_regis
+try overlay mlx5_core
+[106170.943496] CPU: 21 PID: 3688 Comm: kworker/u48:0 Not tainted 6.6.0-rc7_for_upstream_min_debug_2023_11_01_13_02 #1
+[106170.944361] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[106170.945292] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
+[106170.945846] RIP: 0010:mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
+[106170.946413] Code: 89 ef 48 83 05 71 a4 14 00 01 e8 f4 06 04 e1 48 83 05 6c a4 14 00 01 48 83 c4 28 5b 5d 41 5c 41 5d c3 48 83 05 d1 8b 14 00 01 <0f> 0b 48 83 05 d7 8b 14 00 01 e9 96 fe ff ff 48 83 05 a2 90 14 00
+[106170.947924] RSP: 0018:ffff88813ff0fcb8 EFLAGS: 00010202
+[106170.948397] RAX: 0000000000000000 RBX: ffff88811eabac40 RCX: ffff88811eabad48
+[106170.949040] RDX: ffff88811eab8000 RSI: ffffffffa02cd560 RDI: 0000000000000000
+[106170.949679] RBP: ffff88811eab8000 R08: 0000000000000001 R09: ffffffffa0229700
+[106170.950317] R10: ffff888103538fc0 R11: 0000000000000001 R12: ffff88811eabad58
+[106170.950969] R13: ffff888110c01c00 R14: ffff888106b40000 R15: 0000000000000000
+[106170.951616] FS:  0000000000000000(0000) GS:ffff88885fd40000(0000) knlGS:0000000000000000
+[106170.952329] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[106170.952834] CR2: 00007f1cefd28cb0 CR3: 000000012181b006 CR4: 0000000000370ea0
+[106170.953482] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[106170.954121] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[106170.954766] Call Trace:
+[106170.955057]  <TASK>
+[106170.955315]  ? __warn+0x79/0x120
+[106170.955648]  ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
+[106170.956172]  ? report_bug+0x17c/0x190
+[106170.956537]  ? handle_bug+0x3c/0x60
+[106170.956891]  ? exc_invalid_op+0x14/0x70
+[106170.957264]  ? asm_exc_invalid_op+0x16/0x20
+[106170.957666]  ? mlx5_del_flow_rules+0x10/0x310 [mlx5_core]
+[106170.958172]  ? mlx5_tc_ct_block_flow_offload_add+0x1240/0x1240 [mlx5_core]
+[106170.958788]  ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
+[106170.959339]  ? mlx5_tc_ct_del_ft_cb+0xc6/0x2b0 [mlx5_core]
+[106170.959854]  ? mapping_remove+0x154/0x1d0 [mlx5_core]
+[106170.960342]  ? mlx5e_tc_action_miss_mapping_put+0x4f/0x80 [mlx5_core]
+[106170.960927]  mlx5_tc_ct_delete_flow+0x76/0xc0 [mlx5_core]
+[106170.961441]  mlx5_free_flow_attr_actions+0x13b/0x220 [mlx5_core]
+[106170.962001]  mlx5e_tc_del_fdb_flow+0x22c/0x3b0 [mlx5_core]
+[106170.962524]  mlx5e_tc_del_flow+0x95/0x3c0 [mlx5_core]
+[106170.963034]  mlx5e_flow_put+0x73/0xe0 [mlx5_core]
+[106170.963506]  mlx5e_put_flow_list+0x38/0x70 [mlx5_core]
+[106170.964002]  mlx5e_rep_update_flows+0xec/0x290 [mlx5_core]
+[106170.964525]  mlx5e_rep_neigh_update+0x1da/0x310 [mlx5_core]
+[106170.965056]  process_one_work+0x13a/0x2c0
+[106170.965443]  worker_thread+0x2e5/0x3f0
+[106170.965808]  ? rescuer_thread+0x410/0x410
+[106170.966192]  kthread+0xc6/0xf0
+[106170.966515]  ? kthread_complete_and_exit+0x20/0x20
+[106170.966970]  ret_from_fork+0x2d/0x50
+[106170.967332]  ? kthread_complete_and_exit+0x20/0x20
+[106170.967774]  ret_from_fork_asm+0x11/0x20
+[106170.970466]  </TASK>
+[106170.970726] ---[ end trace 0000000000000000 ]---
+
+Fixes: 77ac5e40c44e ("net/sched: act_ct: remove and free nf_table callbacks")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Paul Blakey <paulb@nvidia.com>
+Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_flow_table.h | 10 ++++++++
+ net/sched/act_ct.c                    | 34 ++++++++++++++++++++++-----
+ 2 files changed, 38 insertions(+), 6 deletions(-)
+
+diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
+index fe1507c1db828..692d5955911c7 100644
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -62,6 +62,8 @@ struct nf_flowtable_type {
+                                                 enum flow_offload_tuple_dir dir,
+                                                 struct nf_flow_rule *flow_rule);
+       void                            (*free)(struct nf_flowtable *ft);
++      void                            (*get)(struct nf_flowtable *ft);
++      void                            (*put)(struct nf_flowtable *ft);
+       nf_hookfn                       *hook;
+       struct module                   *owner;
+ };
+@@ -240,6 +242,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
+       }
+       list_add_tail(&block_cb->list, &block->cb_list);
++      up_write(&flow_table->flow_block_lock);
++
++      if (flow_table->type->get)
++              flow_table->type->get(flow_table);
++      return 0;
+ unlock:
+       up_write(&flow_table->flow_block_lock);
+@@ -262,6 +269,9 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
+               WARN_ON(true);
+       }
+       up_write(&flow_table->flow_block_lock);
++
++      if (flow_table->type->put)
++              flow_table->type->put(flow_table);
+ }
+ void flow_offload_route_init(struct flow_offload *flow,
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index 6dcc4585576e8..dd710fb9f4905 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -286,9 +286,31 @@ static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow)
+              !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
+ }
++static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft);
++
++static void tcf_ct_nf_get(struct nf_flowtable *ft)
++{
++      struct tcf_ct_flow_table *ct_ft =
++              container_of(ft, struct tcf_ct_flow_table, nf_ft);
++
++      tcf_ct_flow_table_get_ref(ct_ft);
++}
++
++static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft);
++
++static void tcf_ct_nf_put(struct nf_flowtable *ft)
++{
++      struct tcf_ct_flow_table *ct_ft =
++              container_of(ft, struct tcf_ct_flow_table, nf_ft);
++
++      tcf_ct_flow_table_put(ct_ft);
++}
++
+ static struct nf_flowtable_type flowtable_ct = {
+       .gc             = tcf_ct_flow_is_outdated,
+       .action         = tcf_ct_flow_table_fill_actions,
++      .get            = tcf_ct_nf_get,
++      .put            = tcf_ct_nf_put,
+       .owner          = THIS_MODULE,
+ };
+@@ -337,9 +359,13 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
+       return err;
+ }
++static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft)
++{
++      refcount_inc(&ct_ft->ref);
++}
++
+ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
+ {
+-      struct flow_block_cb *block_cb, *tmp_cb;
+       struct tcf_ct_flow_table *ct_ft;
+       struct flow_block *block;
+@@ -347,13 +373,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
+                            rwork);
+       nf_flow_table_free(&ct_ft->nf_ft);
+-      /* Remove any remaining callbacks before cleanup */
+       block = &ct_ft->nf_ft.flow_block;
+       down_write(&ct_ft->nf_ft.flow_block_lock);
+-      list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) {
+-              list_del(&block_cb->list);
+-              flow_block_cb_free(block_cb);
+-      }
++      WARN_ON(!list_empty(&block->cb_list));
+       up_write(&ct_ft->nf_ft.flow_block_lock);
+       kfree(ct_ft);
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-stmmac-dwmac-qcom-ethqos-fix-drops-in-10m-sgmii-.patch b/queue-6.6/net-stmmac-dwmac-qcom-ethqos-fix-drops-in-10m-sgmii-.patch
new file mode 100644 (file)
index 0000000..841e395
--- /dev/null
@@ -0,0 +1,69 @@
+From 06aba8a79aed14f486bf2bb12c9983603d206373 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Dec 2023 14:52:08 +0530
+Subject: net: stmmac: dwmac-qcom-ethqos: Fix drops in 10M SGMII RX
+
+From: Sneh Shah <quic_snehshah@quicinc.com>
+
+[ Upstream commit 981d947bcd382c3950a593690e0e13d194d65b1c ]
+
+In 10M SGMII mode all the packets are being dropped due to wrong Rx clock.
+SGMII 10MBPS mode needs RX clock divider programmed to avoid drops in Rx.
+Update configure SGMII function with Rx clk divider programming.
+
+Fixes: 463120c31c58 ("net: stmmac: dwmac-qcom-ethqos: add support for SGMII")
+Tested-by: Andrew Halaney <ahalaney@redhat.com>
+Signed-off-by: Sneh Shah <quic_snehshah@quicinc.com>
+Reviewed-by: Bjorn Andersson <quic_bjorande@quicinc.com>
+Link: https://lore.kernel.org/r/20231212092208.22393-1-quic_snehshah@quicinc.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c    | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+index d3bf42d0fceb6..31631e3f89d0a 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+@@ -34,6 +34,7 @@
+ #define RGMII_CONFIG_LOOPBACK_EN              BIT(2)
+ #define RGMII_CONFIG_PROG_SWAP                        BIT(1)
+ #define RGMII_CONFIG_DDR_MODE                 BIT(0)
++#define RGMII_CONFIG_SGMII_CLK_DVDR           GENMASK(18, 10)
+ /* SDCC_HC_REG_DLL_CONFIG fields */
+ #define SDCC_DLL_CONFIG_DLL_RST                       BIT(30)
+@@ -78,6 +79,8 @@
+ #define ETHQOS_MAC_CTRL_SPEED_MODE            BIT(14)
+ #define ETHQOS_MAC_CTRL_PORT_SEL              BIT(15)
++#define SGMII_10M_RX_CLK_DVDR                 0x31
++
+ struct ethqos_emac_por {
+       unsigned int offset;
+       unsigned int value;
+@@ -598,6 +601,9 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos)
+       return 0;
+ }
++/* On interface toggle MAC registers gets reset.
++ * Configure MAC block for SGMII on ethernet phy link up
++ */
+ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos)
+ {
+       int val;
+@@ -617,6 +623,10 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos)
+       case SPEED_10:
+               val |= ETHQOS_MAC_CTRL_PORT_SEL;
+               val &= ~ETHQOS_MAC_CTRL_SPEED_MODE;
++              rgmii_updatel(ethqos, RGMII_CONFIG_SGMII_CLK_DVDR,
++                            FIELD_PREP(RGMII_CONFIG_SGMII_CLK_DVDR,
++                                       SGMII_10M_RX_CLK_DVDR),
++                            RGMII_IO_MACRO_CONFIG);
+               break;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/net-stmmac-handle-disabled-mdio-busses-from-devicetr.patch b/queue-6.6/net-stmmac-handle-disabled-mdio-busses-from-devicetr.patch
new file mode 100644 (file)
index 0000000..42102f6
--- /dev/null
@@ -0,0 +1,45 @@
+From 6edd84c6c95981d99e9951fb755012e5584288d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Dec 2023 16:18:33 -0600
+Subject: net: stmmac: Handle disabled MDIO busses from devicetree
+
+From: Andrew Halaney <ahalaney@redhat.com>
+
+[ Upstream commit e23c0d21ce9234fbc31ece35663ababbb83f9347 ]
+
+Many hardware configurations have the MDIO bus disabled, and are instead
+using some other MDIO bus to talk to the MAC's phy.
+
+of_mdiobus_register() returns -ENODEV in this case. Let's handle it
+gracefully instead of failing to probe the MAC.
+
+Fixes: 47dd7a540b8a ("net: add support for STMicroelectronics Ethernet controllers.")
+Signed-off-by: Andrew Halaney <ahalaney@redhat.com>
+Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
+Link: https://lore.kernel.org/r/20231212-b4-stmmac-handle-mdio-enodev-v2-1-600171acf79f@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+index fa9e7e7040b94..0542cfd1817e6 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+@@ -591,7 +591,11 @@ int stmmac_mdio_register(struct net_device *ndev)
+       new_bus->parent = priv->device;
+       err = of_mdiobus_register(new_bus, mdio_node);
+-      if (err != 0) {
++      if (err == -ENODEV) {
++              err = 0;
++              dev_info(dev, "MDIO bus is disabled\n");
++              goto bus_register_fail;
++      } else if (err) {
+               dev_err_probe(dev, err, "Cannot register the MDIO bus\n");
+               goto bus_register_fail;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/octeon_ep-explicitly-test-for-firmware-ready-value.patch b/queue-6.6/octeon_ep-explicitly-test-for-firmware-ready-value.patch
new file mode 100644 (file)
index 0000000..9bdf2da
--- /dev/null
@@ -0,0 +1,41 @@
+From 1457b6f886ac66a5f6e601d50aaeb9bd14b64387 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 21:56:46 -0800
+Subject: octeon_ep: explicitly test for firmware ready value
+
+From: Shinas Rasheed <srasheed@marvell.com>
+
+[ Upstream commit 284f717622417cb267e344a9174f8e5698d1e3c1 ]
+
+The firmware ready value is 1, and get firmware ready status
+function should explicitly test for that value. The firmware
+ready value read will be 2 after driver load, and on unbind
+till firmware rewrites the firmware ready back to 0, the value
+seen by driver will be 2, which should be regarded as not ready.
+
+Fixes: 10c073e40469 ("octeon_ep: defer probe if firmware not ready")
+Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+index 5b46ca47c8e59..2ee1374db4c06 100644
+--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+@@ -1076,7 +1076,8 @@ static bool get_fw_ready_status(struct pci_dev *pdev)
+               pci_read_config_byte(pdev, (pos + 8), &status);
+               dev_info(&pdev->dev, "Firmware ready status = %u\n", status);
+-              return status;
++#define FW_STATUS_READY 1ULL
++              return status == FW_STATUS_READY;
+       }
+       return false;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/octeontx2-af-fix-a-use-after-free-in-rvu_nix_registe.patch b/queue-6.6/octeontx2-af-fix-a-use-after-free-in-rvu_nix_registe.patch
new file mode 100644 (file)
index 0000000..7dce9b5
--- /dev/null
@@ -0,0 +1,63 @@
+From 501566ee9ce3eab124281068846257dc464950b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Dec 2023 17:49:16 +0800
+Subject: octeontx2-af: fix a use-after-free in rvu_nix_register_reporters
+
+From: Zhipeng Lu <alexious@zju.edu.cn>
+
+[ Upstream commit 28a7cb045ab700de5554193a1642917602787784 ]
+
+The rvu_dl will be freed in rvu_nix_health_reporters_destroy(rvu_dl)
+after the create_workqueue fails, and after that free, the rvu_dl will
+be translate back through the following call chain:
+
+rvu_nix_health_reporters_destroy
+  |-> rvu_nix_health_reporters_create
+       |-> rvu_health_reporters_create
+             |-> rvu_register_dl (label err_dl_health)
+
+Finally. in the err_dl_health label, rvu_dl being freed again in
+rvu_health_reporters_destroy(rvu) by rvu_nix_health_reporters_destroy.
+In the second calls of rvu_nix_health_reporters_destroy, however,
+it uses rvu_dl->rvu_nix_health_reporter, which is already freed at
+the end of rvu_nix_health_reporters_destroy in the first call.
+
+So this patch prevents the first destroy by instantly returning -ENONMEN
+when create_workqueue fails. In addition, since the failure of
+create_workqueue is the only entrence of label err, it has been
+integrated into the error-handling path of create_workqueue.
+
+Fixes: 5ed66306eab6 ("octeontx2-af: Add devlink health reporters for NIX")
+Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+index 058f75dc4c8a5..bffe04e6d0254 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+@@ -642,7 +642,7 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl)
+       rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq");
+       if (!rvu_dl->devlink_wq)
+-              goto err;
++              return -ENOMEM;
+       INIT_WORK(&rvu_reporters->intr_work, rvu_nix_intr_work);
+       INIT_WORK(&rvu_reporters->gen_work, rvu_nix_gen_work);
+@@ -650,9 +650,6 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl)
+       INIT_WORK(&rvu_reporters->ras_work, rvu_nix_ras_work);
+       return 0;
+-err:
+-      rvu_nix_health_reporters_destroy(rvu_dl);
+-      return -ENOMEM;
+ }
+ static int rvu_nix_health_reporters_create(struct rvu_devlink *rvu_dl)
+-- 
+2.43.0
+
diff --git a/queue-6.6/octeontx2-af-fix-pause-frame-configuration.patch b/queue-6.6/octeontx2-af-fix-pause-frame-configuration.patch
new file mode 100644 (file)
index 0000000..0a92ac6
--- /dev/null
@@ -0,0 +1,56 @@
+From a59de66c19f8ea84e21207d5524c28ecb6976545 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Dec 2023 14:57:54 +0530
+Subject: octeontx2-af: Fix pause frame configuration
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit e307b5a845c5951dabafc48d00b6424ee64716c4 ]
+
+The current implementation's default Pause Forward setting is causing
+unnecessary network traffic. This patch disables Pause Forward to
+address this issue.
+
+Fixes: 1121f6b02e7a ("octeontx2-af: Priority flow control configuration support")
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+index af21e2030cff2..4728ba34b0e34 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+@@ -373,6 +373,11 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable)
+       cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
+       rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
++      /* Disable forward pause to driver */
++      cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
++      cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD;
++      rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
++
+       /* Enable channel mask for all LMACS */
+       if (is_dev_rpm2(rpm))
+               rpm_write(rpm, lmac_id, RPM2_CMR_CHAN_MSK_OR, 0xffff);
+@@ -616,12 +621,10 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p
+       if (rx_pause) {
+               cfg &= ~(RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE |
+-                              RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE |
+-                              RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD);
++                       RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE);
+       } else {
+               cfg |= (RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE |
+-                              RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE |
+-                              RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD);
++                      RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE);
+       }
+       if (tx_pause) {
+-- 
+2.43.0
+
diff --git a/queue-6.6/octeontx2-af-update-rss-algorithm-index.patch b/queue-6.6/octeontx2-af-update-rss-algorithm-index.patch
new file mode 100644 (file)
index 0000000..0b556cb
--- /dev/null
@@ -0,0 +1,147 @@
+From c10d3bce4c637e39d51cb51c14a73ace77b2fd4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Dec 2023 12:26:10 +0530
+Subject: octeontx2-af: Update RSS algorithm index
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit 570ba37898ecd9069beb58bf0b6cf84daba6e0fe ]
+
+The RSS flow algorithm is not set up correctly for promiscuous or all
+multi MCAM entries. This has an impact on flow distribution.
+
+This patch fixes the issue by updating flow algorithm index in above
+mentioned MCAM entries.
+
+Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature")
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/af/rvu_npc.c   | 55 +++++++++++++++----
+ 1 file changed, 44 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+index f65805860c8d4..0bcf3e5592806 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+@@ -671,6 +671,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
+       int blkaddr, ucast_idx, index;
+       struct nix_rx_action action = { 0 };
+       u64 relaxed_mask;
++      u8 flow_key_alg;
+       if (!hw->cap.nix_rx_multicast && is_cgx_vf(rvu, pcifunc))
+               return;
+@@ -701,6 +702,8 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
+               action.op = NIX_RX_ACTIONOP_UCAST;
+       }
++      flow_key_alg = action.flow_key_alg;
++
+       /* RX_ACTION set to MCAST for CGX PF's */
+       if (hw->cap.nix_rx_multicast && pfvf->use_mce_list &&
+           is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) {
+@@ -740,7 +743,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
+       req.vf = pcifunc;
+       req.index = action.index;
+       req.match_id = action.match_id;
+-      req.flow_key_alg = action.flow_key_alg;
++      req.flow_key_alg = flow_key_alg;
+       rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
+ }
+@@ -854,6 +857,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+       u8 mac_addr[ETH_ALEN] = { 0 };
+       struct nix_rx_action action = { 0 };
+       struct rvu_pfvf *pfvf;
++      u8 flow_key_alg;
+       u16 vf_func;
+       /* Only CGX PF/VF can add allmulticast entry */
+@@ -888,6 +892,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+               *(u64 *)&action = npc_get_mcam_action(rvu, mcam,
+                                                       blkaddr, ucast_idx);
++      flow_key_alg = action.flow_key_alg;
+       if (action.op != NIX_RX_ACTIONOP_RSS) {
+               *(u64 *)&action = 0;
+               action.op = NIX_RX_ACTIONOP_UCAST;
+@@ -924,7 +929,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+       req.vf = pcifunc | vf_func;
+       req.index = action.index;
+       req.match_id = action.match_id;
+-      req.flow_key_alg = action.flow_key_alg;
++      req.flow_key_alg = flow_key_alg;
+       rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
+ }
+@@ -990,11 +995,38 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
+       mutex_unlock(&mcam->lock);
+ }
++static void npc_update_rx_action_with_alg_idx(struct rvu *rvu, struct nix_rx_action action,
++                                            struct rvu_pfvf *pfvf, int mcam_index, int blkaddr,
++                                            int alg_idx)
++
++{
++      struct npc_mcam *mcam = &rvu->hw->mcam;
++      struct rvu_hwinfo *hw = rvu->hw;
++      int bank, op_rss;
++
++      if (!is_mcam_entry_enabled(rvu, mcam, blkaddr, mcam_index))
++              return;
++
++      op_rss = (!hw->cap.nix_rx_multicast || !pfvf->use_mce_list);
++
++      bank = npc_get_bank(mcam, mcam_index);
++      mcam_index &= (mcam->banksize - 1);
++
++      /* If Rx action is MCAST update only RSS algorithm index */
++      if (!op_rss) {
++              *(u64 *)&action = rvu_read64(rvu, blkaddr,
++                              NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank));
++
++              action.flow_key_alg = alg_idx;
++      }
++      rvu_write64(rvu, blkaddr,
++                  NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank), *(u64 *)&action);
++}
++
+ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
+                                   int group, int alg_idx, int mcam_index)
+ {
+       struct npc_mcam *mcam = &rvu->hw->mcam;
+-      struct rvu_hwinfo *hw = rvu->hw;
+       struct nix_rx_action action;
+       int blkaddr, index, bank;
+       struct rvu_pfvf *pfvf;
+@@ -1050,15 +1082,16 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
+       /* If PF's promiscuous entry is enabled,
+        * Set RSS action for that entry as well
+        */
+-      if ((!hw->cap.nix_rx_multicast || !pfvf->use_mce_list) &&
+-          is_mcam_entry_enabled(rvu, mcam, blkaddr, index)) {
+-              bank = npc_get_bank(mcam, index);
+-              index &= (mcam->banksize - 1);
++      npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr,
++                                        alg_idx);
+-              rvu_write64(rvu, blkaddr,
+-                          NPC_AF_MCAMEX_BANKX_ACTION(index, bank),
+-                          *(u64 *)&action);
+-      }
++      index = npc_get_nixlf_mcam_index(mcam, pcifunc,
++                                       nixlf, NIXLF_ALLMULTI_ENTRY);
++      /* If PF's allmulti  entry is enabled,
++       * Set RSS action for that entry as well
++       */
++      npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr,
++                                        alg_idx);
+ }
+ void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc,
+-- 
+2.43.0
+
diff --git a/queue-6.6/octeontx2-pf-fix-promisc-mcam-entry-action.patch b/queue-6.6/octeontx2-pf-fix-promisc-mcam-entry-action.patch
new file mode 100644 (file)
index 0000000..89e2daa
--- /dev/null
@@ -0,0 +1,83 @@
+From 8953c7f988432e23b003197992009e0c8542f4d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Dec 2023 12:26:09 +0530
+Subject: octeontx2-pf: Fix promisc mcam entry action
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit dbda436824ded8ef6a05bb82cd9baa8d42377a49 ]
+
+Current implementation is such that, promisc mcam entry action
+is set as multicast even when there are no trusted VFs. multicast
+action causes the hardware to copy packet data, which reduces
+the performance.
+
+This patch fixes this issue by setting the promisc mcam entry action to
+unicast instead of multicast when there are no trusted VFs. The same
+change is made for the 'allmulti' mcam entry action.
+
+Fixes: ffd2f89ad05c ("octeontx2-pf: Enable promisc/allmulti match MCAM entries.")
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/nic/otx2_pf.c  | 25 ++++++++++++++++---
+ 1 file changed, 22 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index 0c17ebdda1487..a57455aebff6f 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1650,6 +1650,21 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
+       mutex_unlock(&mbox->lock);
+ }
++static bool otx2_promisc_use_mce_list(struct otx2_nic *pfvf)
++{
++      int vf;
++
++      /* The AF driver will determine whether to allow the VF netdev or not */
++      if (is_otx2_vf(pfvf->pcifunc))
++              return true;
++
++      /* check if there are any trusted VFs associated with the PF netdev */
++      for (vf = 0; vf < pci_num_vf(pfvf->pdev); vf++)
++              if (pfvf->vf_configs[vf].trusted)
++                      return true;
++      return false;
++}
++
+ static void otx2_do_set_rx_mode(struct otx2_nic *pf)
+ {
+       struct net_device *netdev = pf->netdev;
+@@ -1682,7 +1697,8 @@ static void otx2_do_set_rx_mode(struct otx2_nic *pf)
+       if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST))
+               req->mode |= NIX_RX_MODE_ALLMULTI;
+-      req->mode |= NIX_RX_MODE_USE_MCE;
++      if (otx2_promisc_use_mce_list(pf))
++              req->mode |= NIX_RX_MODE_USE_MCE;
+       otx2_sync_mbox_msg(&pf->mbox);
+       mutex_unlock(&pf->mbox.lock);
+@@ -2691,11 +2707,14 @@ static int otx2_ndo_set_vf_trust(struct net_device *netdev, int vf,
+       pf->vf_configs[vf].trusted = enable;
+       rc = otx2_set_vf_permissions(pf, vf, OTX2_TRUSTED_VF);
+-      if (rc)
++      if (rc) {
+               pf->vf_configs[vf].trusted = !enable;
+-      else
++      } else {
+               netdev_info(pf->netdev, "VF %d is %strusted\n",
+                           vf, enable ? "" : "not ");
++              otx2_set_rx_mode(netdev);
++      }
++
+       return rc;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/qca_debug-fix-ethtool-g-iface-tx-behavior.patch b/queue-6.6/qca_debug-fix-ethtool-g-iface-tx-behavior.patch
new file mode 100644 (file)
index 0000000..b70f7ff
--- /dev/null
@@ -0,0 +1,80 @@
+From 88f9f0ef15f2ff257c658563f4b877447c75c38e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Dec 2023 15:12:21 +0100
+Subject: qca_debug: Fix ethtool -G iface tx behavior
+
+From: Stefan Wahren <wahrenst@gmx.net>
+
+[ Upstream commit 96a7e861d9e04d07febd3011c30cd84cd141d81f ]
+
+After calling ethtool -g it was not possible to adjust the TX ring
+size again:
+
+  # ethtool -g eth1
+  Ring parameters for eth1:
+  Pre-set maximums:
+  RX:          4
+  RX Mini:     n/a
+  RX Jumbo:    n/a
+  TX:          10
+  Current hardware settings:
+  RX:          4
+  RX Mini:     n/a
+  RX Jumbo:    n/a
+  TX:          10
+  # ethtool -G eth1 tx 8
+  netlink error: Invalid argument
+
+The reason for this is that the readonly setting rx_pending get
+initialized and after that the range check in qcaspi_set_ringparam()
+fails regardless of the provided parameter. So fix this by accepting
+the exposed RX defaults. Instead of adding another magic number
+better use a new define here.
+
+Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
+Link: https://lore.kernel.org/r/20231206141222.52029-3-wahrenst@gmx.net
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qualcomm/qca_debug.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c
+index a5445252b0c4d..1822f2ad8f0dd 100644
+--- a/drivers/net/ethernet/qualcomm/qca_debug.c
++++ b/drivers/net/ethernet/qualcomm/qca_debug.c
+@@ -30,6 +30,8 @@
+ #define QCASPI_MAX_REGS 0x20
++#define QCASPI_RX_MAX_FRAMES 4
++
+ static const u16 qcaspi_spi_regs[] = {
+       SPI_REG_BFR_SIZE,
+       SPI_REG_WRBUF_SPC_AVA,
+@@ -252,9 +254,9 @@ qcaspi_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring,
+ {
+       struct qcaspi *qca = netdev_priv(dev);
+-      ring->rx_max_pending = 4;
++      ring->rx_max_pending = QCASPI_RX_MAX_FRAMES;
+       ring->tx_max_pending = TX_RING_MAX_LEN;
+-      ring->rx_pending = 4;
++      ring->rx_pending = QCASPI_RX_MAX_FRAMES;
+       ring->tx_pending = qca->txr.count;
+ }
+@@ -265,7 +267,7 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring,
+ {
+       struct qcaspi *qca = netdev_priv(dev);
+-      if ((ring->rx_pending) ||
++      if (ring->rx_pending != QCASPI_RX_MAX_FRAMES ||
+           (ring->rx_mini_pending) ||
+           (ring->rx_jumbo_pending))
+               return -EINVAL;
+-- 
+2.43.0
+
diff --git a/queue-6.6/qca_debug-prevent-crash-on-tx-ring-changes.patch b/queue-6.6/qca_debug-prevent-crash-on-tx-ring-changes.patch
new file mode 100644 (file)
index 0000000..f4af99e
--- /dev/null
@@ -0,0 +1,86 @@
+From 92e0ef9f0b2a85c10a5af05de91c543d4b4e4592 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Dec 2023 15:12:20 +0100
+Subject: qca_debug: Prevent crash on TX ring changes
+
+From: Stefan Wahren <wahrenst@gmx.net>
+
+[ Upstream commit f4e6064c97c050bd9904925ff7d53d0c9954fc7b ]
+
+The qca_spi driver stop and restart the SPI kernel thread
+(via ndo_stop & ndo_open) in case of TX ring changes. This is
+a big issue because it allows userspace to prevent restart of
+the SPI kernel thread (via signals). A subsequent change of
+TX ring wrongly assume a valid spi_thread pointer which result
+in a crash.
+
+So prevent this by stopping the network traffic handling and
+temporary park the SPI thread.
+
+Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
+Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
+Link: https://lore.kernel.org/r/20231206141222.52029-2-wahrenst@gmx.net
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qualcomm/qca_debug.c |  9 ++++-----
+ drivers/net/ethernet/qualcomm/qca_spi.c   | 12 ++++++++++++
+ 2 files changed, 16 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c
+index 6f2fa2a42770a..a5445252b0c4d 100644
+--- a/drivers/net/ethernet/qualcomm/qca_debug.c
++++ b/drivers/net/ethernet/qualcomm/qca_debug.c
+@@ -263,7 +263,6 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring,
+                    struct kernel_ethtool_ringparam *kernel_ring,
+                    struct netlink_ext_ack *extack)
+ {
+-      const struct net_device_ops *ops = dev->netdev_ops;
+       struct qcaspi *qca = netdev_priv(dev);
+       if ((ring->rx_pending) ||
+@@ -271,14 +270,14 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring,
+           (ring->rx_jumbo_pending))
+               return -EINVAL;
+-      if (netif_running(dev))
+-              ops->ndo_stop(dev);
++      if (qca->spi_thread)
++              kthread_park(qca->spi_thread);
+       qca->txr.count = max_t(u32, ring->tx_pending, TX_RING_MIN_LEN);
+       qca->txr.count = min_t(u16, qca->txr.count, TX_RING_MAX_LEN);
+-      if (netif_running(dev))
+-              ops->ndo_open(dev);
++      if (qca->spi_thread)
++              kthread_unpark(qca->spi_thread);
+       return 0;
+ }
+diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
+index bec723028e96c..b0fad69bb755f 100644
+--- a/drivers/net/ethernet/qualcomm/qca_spi.c
++++ b/drivers/net/ethernet/qualcomm/qca_spi.c
+@@ -580,6 +580,18 @@ qcaspi_spi_thread(void *data)
+       netdev_info(qca->net_dev, "SPI thread created\n");
+       while (!kthread_should_stop()) {
+               set_current_state(TASK_INTERRUPTIBLE);
++              if (kthread_should_park()) {
++                      netif_tx_disable(qca->net_dev);
++                      netif_carrier_off(qca->net_dev);
++                      qcaspi_flush_tx_ring(qca);
++                      kthread_parkme();
++                      if (qca->sync == QCASPI_SYNC_READY) {
++                              netif_carrier_on(qca->net_dev);
++                              netif_wake_queue(qca->net_dev);
++                      }
++                      continue;
++              }
++
+               if ((qca->intr_req == qca->intr_svc) &&
+                   !qca->txr.skb[qca->txr.head])
+                       schedule();
+-- 
+2.43.0
+
diff --git a/queue-6.6/qca_spi-fix-reset-behavior.patch b/queue-6.6/qca_spi-fix-reset-behavior.patch
new file mode 100644 (file)
index 0000000..f204877
--- /dev/null
@@ -0,0 +1,51 @@
+From e84d2f0f90c9eee06d0ff9f599411b46b7189521 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Dec 2023 15:12:22 +0100
+Subject: qca_spi: Fix reset behavior
+
+From: Stefan Wahren <wahrenst@gmx.net>
+
+[ Upstream commit 1057812d146dd658c9a9a96d869c2551150207b5 ]
+
+In case of a reset triggered by the QCA7000 itself, the behavior of the
+qca_spi driver was not quite correct:
+- in case of a pending RX frame decoding the drop counter must be
+  incremented and decoding state machine reseted
+- also the reset counter must always be incremented regardless of sync
+  state
+
+Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
+Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
+Link: https://lore.kernel.org/r/20231206141222.52029-4-wahrenst@gmx.net
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qualcomm/qca_spi.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
+index b0fad69bb755f..5f3c11fb3fa27 100644
+--- a/drivers/net/ethernet/qualcomm/qca_spi.c
++++ b/drivers/net/ethernet/qualcomm/qca_spi.c
+@@ -620,11 +620,17 @@ qcaspi_spi_thread(void *data)
+                       if (intr_cause & SPI_INT_CPU_ON) {
+                               qcaspi_qca7k_sync(qca, QCASPI_EVENT_CPUON);
++                              /* Frame decoding in progress */
++                              if (qca->frm_handle.state != qca->frm_handle.init)
++                                      qca->net_dev->stats.rx_dropped++;
++
++                              qcafrm_fsm_init_spi(&qca->frm_handle);
++                              qca->stats.device_reset++;
++
+                               /* not synced. */
+                               if (qca->sync != QCASPI_SYNC_READY)
+                                       continue;
+-                              qca->stats.device_reset++;
+                               netif_wake_queue(qca->net_dev);
+                               netif_carrier_on(qca->net_dev);
+                       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/qed-fix-a-potential-use-after-free-in-qed_cxt_tables.patch b/queue-6.6/qed-fix-a-potential-use-after-free-in-qed_cxt_tables.patch
new file mode 100644 (file)
index 0000000..a4ae1b4
--- /dev/null
@@ -0,0 +1,41 @@
+From 7380016b6103ad007edc7f1190392f85e7c43a6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Dec 2023 12:52:55 +0800
+Subject: qed: Fix a potential use-after-free in qed_cxt_tables_alloc
+
+From: Dinghao Liu <dinghao.liu@zju.edu.cn>
+
+[ Upstream commit b65d52ac9c085c0c52dee012a210d4e2f352611b ]
+
+qed_ilt_shadow_alloc() will call qed_ilt_shadow_free() to
+free p_hwfn->p_cxt_mngr->ilt_shadow on error. However,
+qed_cxt_tables_alloc() accesses the freed pointer on failure
+of qed_ilt_shadow_alloc() through calling qed_cxt_mngr_free(),
+which may lead to use-after-free. Fix this issue by setting
+p_mngr->ilt_shadow to NULL in qed_ilt_shadow_free().
+
+Fixes: fe56b9e6a8d9 ("qed: Add module with basic common support")
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
+Link: https://lore.kernel.org/r/20231210045255.21383-1-dinghao.liu@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_cxt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+index 65e20693c549e..33f4f58ee51c6 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+@@ -933,6 +933,7 @@ static void qed_ilt_shadow_free(struct qed_hwfn *p_hwfn)
+               p_dma->virt_addr = NULL;
+       }
+       kfree(p_mngr->ilt_shadow);
++      p_mngr->ilt_shadow = NULL;
+ }
+ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
+-- 
+2.43.0
+
diff --git a/queue-6.6/rdma-mlx5-send-events-from-ib-driver-about-device-af.patch b/queue-6.6/rdma-mlx5-send-events-from-ib-driver-about-device-af.patch
new file mode 100644 (file)
index 0000000..f835dff
--- /dev/null
@@ -0,0 +1,123 @@
+From 25d4896facd82be6ab7458ba05cf40d0fbc4721d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Sep 2023 15:10:27 +0300
+Subject: RDMA/mlx5: Send events from IB driver about device affiliation state
+
+From: Patrisious Haddad <phaddad@nvidia.com>
+
+[ Upstream commit 0d293714ac32650bfb669ceadf7cc2fad8161401 ]
+
+Send blocking events from IB driver whenever the device is done being
+affiliated or if it is removed from an affiliation.
+
+This is useful since now the EN driver can register to those event and
+know when a device is affiliated or not.
+
+Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Link: https://lore.kernel.org/r/a7491c3e483cfd8d962f5f75b9a25f253043384a.1695296682.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Stable-dep-of: 762a55a54eec ("net/mlx5e: Disable IPsec offload support if not FW steering")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/main.c              | 17 +++++++++++++++++
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |  6 ++++++
+ include/linux/mlx5/device.h                    |  2 ++
+ include/linux/mlx5/driver.h                    |  2 ++
+ 4 files changed, 27 insertions(+)
+
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index 5d963abb7e609..4c4233b9c8b08 100644
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -24,6 +24,7 @@
+ #include <linux/mlx5/vport.h>
+ #include <linux/mlx5/fs.h>
+ #include <linux/mlx5/eswitch.h>
++#include <linux/mlx5/driver.h>
+ #include <linux/list.h>
+ #include <rdma/ib_smi.h>
+ #include <rdma/ib_umem_odp.h>
+@@ -3175,6 +3176,13 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
+       lockdep_assert_held(&mlx5_ib_multiport_mutex);
++      mlx5_core_mp_event_replay(ibdev->mdev,
++                                MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
++                                NULL);
++      mlx5_core_mp_event_replay(mpi->mdev,
++                                MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
++                                NULL);
++
+       mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
+       spin_lock(&port->mp.mpi_lock);
+@@ -3226,6 +3234,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
+                                   struct mlx5_ib_multiport_info *mpi)
+ {
+       u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
++      u64 key;
+       int err;
+       lockdep_assert_held(&mlx5_ib_multiport_mutex);
+@@ -3254,6 +3263,14 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
+       mlx5_ib_init_cong_debugfs(ibdev, port_num);
++      key = ibdev->ib_dev.index;
++      mlx5_core_mp_event_replay(mpi->mdev,
++                                MLX5_DRIVER_EVENT_AFFILIATION_DONE,
++                                &key);
++      mlx5_core_mp_event_replay(ibdev->mdev,
++                                MLX5_DRIVER_EVENT_AFFILIATION_DONE,
++                                &key);
++
+       return true;
+ unbind:
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 15561965d2afa..6ca91c0e8a6a5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -361,6 +361,12 @@ void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev)
+ }
+ EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay);
++void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data)
++{
++      mlx5_blocking_notifier_call_chain(dev, event, data);
++}
++EXPORT_SYMBOL(mlx5_core_mp_event_replay);
++
+ int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
+                           enum mlx5_cap_mode cap_mode)
+ {
+diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
+index 4d5be378fa8cc..26333d602a505 100644
+--- a/include/linux/mlx5/device.h
++++ b/include/linux/mlx5/device.h
+@@ -366,6 +366,8 @@ enum mlx5_driver_event {
+       MLX5_DRIVER_EVENT_UPLINK_NETDEV,
+       MLX5_DRIVER_EVENT_MACSEC_SA_ADDED,
+       MLX5_DRIVER_EVENT_MACSEC_SA_DELETED,
++      MLX5_DRIVER_EVENT_AFFILIATION_DONE,
++      MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
+ };
+ enum {
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index 3033bbaeac81c..5ca4e085d8133 100644
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -1027,6 +1027,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
+ void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev);
+ void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev);
++void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data);
++
+ void mlx5_health_cleanup(struct mlx5_core_dev *dev);
+ int mlx5_health_init(struct mlx5_core_dev *dev);
+ void mlx5_start_health_poll(struct mlx5_core_dev *dev);
+-- 
+2.43.0
+
index ab7f0d064ac9c2a1dbb795f45eae16aadf1fc268..aaeaa930968054d56a41d761baad1d2901f03b1f 100644 (file)
@@ -4,3 +4,53 @@ ksmbd-fix-memory-leak-in-smb2_lock.patch
 efi-x86-avoid-physical-kaslr-on-older-dell-systems.patch
 afs-fix-refcount-underflow-from-error-handling-race.patch
 hid-lenovo-restrict-detection-of-patched-firmware-on.patch
+net-mlx5e-honor-user-choice-of-ipsec-replay-window-s.patch
+net-mlx5e-ensure-that-ipsec-sequence-packet-number-s.patch
+net-mlx5e-unify-esw-and-normal-ipsec-status-table-cr.patch
+net-mlx5e-tidy-up-ipsec-nat-t-sa-discovery.patch
+net-mlx5e-reduce-eswitch-mode_lock-protection-contex.patch
+net-mlx5e-check-the-number-of-elements-before-walk-t.patch
+rdma-mlx5-send-events-from-ib-driver-about-device-af.patch
+net-mlx5e-disable-ipsec-offload-support-if-not-fw-st.patch
+net-mlx5e-fix-possible-deadlock-on-mlx5e_tx_timeout_.patch
+net-mlx5e-tc-don-t-offload-post-action-rule-if-not-s.patch
+net-mlx5-nack-sync-reset-request-when-hotplug-is-ena.patch
+net-mlx5e-check-netdev-pointer-before-checking-its-n.patch
+net-mlx5-fix-a-null-vs-is_err-check.patch
+net-ipv6-support-reporting-otherwise-unknown-prefix-.patch
+qca_debug-prevent-crash-on-tx-ring-changes.patch
+qca_debug-fix-ethtool-g-iface-tx-behavior.patch
+qca_spi-fix-reset-behavior.patch
+bnxt_en-clear-resource-reservation-during-resume.patch
+bnxt_en-fix-skb-recycling-logic-in-bnxt_deliver_skb.patch
+bnxt_en-fix-wrong-return-value-check-in-bnxt_close_n.patch
+bnxt_en-fix-hwtstamp_filter_all-packet-timestamp-log.patch
+atm-solos-pci-fix-potential-deadlock-on-cli_queue_lo.patch
+atm-solos-pci-fix-potential-deadlock-on-tx_queue_loc.patch
+net-fec-correct-queue-selection.patch
+octeontx2-af-fix-a-use-after-free-in-rvu_nix_registe.patch
+net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch
+octeon_ep-explicitly-test-for-firmware-ready-value.patch
+octeontx2-pf-fix-promisc-mcam-entry-action.patch
+octeontx2-af-update-rss-algorithm-index.patch
+octeontx2-af-fix-pause-frame-configuration.patch
+atm-fix-use-after-free-in-do_vcc_ioctl.patch
+net-rose-fix-use-after-free-in-rose_ioctl.patch
+iavf-introduce-new-state-machines-for-flow-director.patch
+iavf-handle-ntuple-on-off-based-on-new-state-machine.patch
+iavf-fix-iavf_shutdown-to-call-iavf_remove-instead-i.patch
+qed-fix-a-potential-use-after-free-in-qed_cxt_tables.patch
+net-remove-acked-syn-flag-from-packet-in-the-transmi.patch
+net-ena-destroy-correct-number-of-xdp-queues-upon-fa.patch
+net-ena-fix-xdp-drops-handling-due-to-multibuf-packe.patch
+net-ena-fix-dma-syncing-in-xdp-path-when-swiotlb-is-.patch
+net-ena-fix-xdp-redirection-error.patch
+stmmac-dwmac-loongson-make-sure-mdio-is-initialized-.patch
+sign-file-fix-incorrect-return-values-check.patch
+vsock-virtio-fix-unsigned-integer-wrap-around-in-vir.patch
+dpaa2-switch-fix-size-of-the-dma_unmap.patch
+dpaa2-switch-do-not-ask-for-mdb-vlan-and-fdb-replay.patch
+net-stmmac-dwmac-qcom-ethqos-fix-drops-in-10m-sgmii-.patch
+net-stmmac-handle-disabled-mdio-busses-from-devicetr.patch
+appletalk-fix-use-after-free-in-atalk_ioctl.patch
+net-atlantic-fix-double-free-in-ring-reinit-logic.patch
diff --git a/queue-6.6/sign-file-fix-incorrect-return-values-check.patch b/queue-6.6/sign-file-fix-incorrect-return-values-check.patch
new file mode 100644 (file)
index 0000000..b667e9b
--- /dev/null
@@ -0,0 +1,79 @@
+From 2a479247e48da3c1bc77f8c28f0771e7f78a2730 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Dec 2023 10:31:10 +0000
+Subject: sign-file: Fix incorrect return values check
+
+From: Yusong Gao <a869920004@gmail.com>
+
+[ Upstream commit 829649443e78d85db0cff0c37cadb28fbb1a5f6f ]
+
+There are some wrong return values check in sign-file when call OpenSSL
+API. The ERR() check cond is wrong because of the program only check the
+return value is < 0 which ignored the return val is 0. For example:
+1. CMS_final() return 1 for success or 0 for failure.
+2. i2d_CMS_bio_stream() returns 1 for success or 0 for failure.
+3. i2d_TYPEbio() return 1 for success and 0 for failure.
+4. BIO_free() return 1 for success and 0 for failure.
+
+Link: https://www.openssl.org/docs/manmaster/man3/
+Fixes: e5a2e3c84782 ("scripts/sign-file.c: Add support for signing with a raw signature")
+Signed-off-by: Yusong Gao <a869920004@gmail.com>
+Reviewed-by: Juerg Haefliger <juerg.haefliger@canonical.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Link: https://lore.kernel.org/r/20231213024405.624692-1-a869920004@gmail.com/ # v5
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/sign-file.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/scripts/sign-file.c b/scripts/sign-file.c
+index 598ef5465f825..3edb156ae52c3 100644
+--- a/scripts/sign-file.c
++++ b/scripts/sign-file.c
+@@ -322,7 +322,7 @@ int main(int argc, char **argv)
+                                    CMS_NOSMIMECAP | use_keyid |
+                                    use_signed_attrs),
+                   "CMS_add1_signer");
+-              ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) < 0,
++              ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) != 1,
+                   "CMS_final");
+ #else
+@@ -341,10 +341,10 @@ int main(int argc, char **argv)
+                       b = BIO_new_file(sig_file_name, "wb");
+                       ERR(!b, "%s", sig_file_name);
+ #ifndef USE_PKCS7
+-                      ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) < 0,
++                      ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) != 1,
+                           "%s", sig_file_name);
+ #else
+-                      ERR(i2d_PKCS7_bio(b, pkcs7) < 0,
++                      ERR(i2d_PKCS7_bio(b, pkcs7) != 1,
+                           "%s", sig_file_name);
+ #endif
+                       BIO_free(b);
+@@ -374,9 +374,9 @@ int main(int argc, char **argv)
+       if (!raw_sig) {
+ #ifndef USE_PKCS7
+-              ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) < 0, "%s", dest_name);
++              ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) != 1, "%s", dest_name);
+ #else
+-              ERR(i2d_PKCS7_bio(bd, pkcs7) < 0, "%s", dest_name);
++              ERR(i2d_PKCS7_bio(bd, pkcs7) != 1, "%s", dest_name);
+ #endif
+       } else {
+               BIO *b;
+@@ -396,7 +396,7 @@ int main(int argc, char **argv)
+       ERR(BIO_write(bd, &sig_info, sizeof(sig_info)) < 0, "%s", dest_name);
+       ERR(BIO_write(bd, magic_number, sizeof(magic_number) - 1) < 0, "%s", dest_name);
+-      ERR(BIO_free(bd) < 0, "%s", dest_name);
++      ERR(BIO_free(bd) != 1, "%s", dest_name);
+       /* Finally, if we're signing in place, replace the original. */
+       if (replace_orig)
+-- 
+2.43.0
+
diff --git a/queue-6.6/stmmac-dwmac-loongson-make-sure-mdio-is-initialized-.patch b/queue-6.6/stmmac-dwmac-loongson-make-sure-mdio-is-initialized-.patch
new file mode 100644 (file)
index 0000000..abc0dd6
--- /dev/null
@@ -0,0 +1,53 @@
+From 68228ca82a5926205fb815722dc7799ccdf35b44 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Dec 2023 18:33:11 +0800
+Subject: stmmac: dwmac-loongson: Make sure MDIO is initialized before use
+
+From: Yanteng Si <siyanteng@loongson.cn>
+
+[ Upstream commit e87d3a1370ce9f04770d789bcf7cce44865d2e8d ]
+
+Generic code will use mdio. If it is not initialized before use,
+the kernel will Oops.
+
+Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson")
+Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
+Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/stmicro/stmmac/dwmac-loongson.c   | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+index 2cd6fce5c9934..e7701326adc6a 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+@@ -68,17 +68,15 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
+       if (!plat)
+               return -ENOMEM;
++      plat->mdio_bus_data = devm_kzalloc(&pdev->dev,
++                                         sizeof(*plat->mdio_bus_data),
++                                         GFP_KERNEL);
++      if (!plat->mdio_bus_data)
++              return -ENOMEM;
++
+       plat->mdio_node = of_get_child_by_name(np, "mdio");
+       if (plat->mdio_node) {
+               dev_info(&pdev->dev, "Found MDIO subnode\n");
+-
+-              plat->mdio_bus_data = devm_kzalloc(&pdev->dev,
+-                                                 sizeof(*plat->mdio_bus_data),
+-                                                 GFP_KERNEL);
+-              if (!plat->mdio_bus_data) {
+-                      ret = -ENOMEM;
+-                      goto err_put_node;
+-              }
+               plat->mdio_bus_data->needs_reset = true;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/vsock-virtio-fix-unsigned-integer-wrap-around-in-vir.patch b/queue-6.6/vsock-virtio-fix-unsigned-integer-wrap-around-in-vir.patch
new file mode 100644 (file)
index 0000000..5a4de4b
--- /dev/null
@@ -0,0 +1,41 @@
+From 8d3c8fcaa6274c63657440f6b42281d6d7364201 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Dec 2023 19:23:17 +0300
+Subject: vsock/virtio: Fix unsigned integer wrap around in
+ virtio_transport_has_space()
+
+From: Nikolay Kuratov <kniv@yandex-team.ru>
+
+[ Upstream commit 60316d7f10b17a7ebb1ead0642fee8710e1560e0 ]
+
+We need to do signed arithmetic if we expect condition
+`if (bytes < 0)` to be possible
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE
+
+Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko")
+Signed-off-by: Nikolay Kuratov <kniv@yandex-team.ru>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Link: https://lore.kernel.org/r/20231211162317.4116625-1-kniv@yandex-team.ru
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/vmw_vsock/virtio_transport_common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
+index 8bc272b6003bb..4084578b0b911 100644
+--- a/net/vmw_vsock/virtio_transport_common.c
++++ b/net/vmw_vsock/virtio_transport_common.c
+@@ -679,7 +679,7 @@ static s64 virtio_transport_has_space(struct vsock_sock *vsk)
+       struct virtio_vsock_sock *vvs = vsk->trans;
+       s64 bytes;
+-      bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
++      bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
+       if (bytes < 0)
+               bytes = 0;
+-- 
+2.43.0
+