6.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 12 Aug 2023 18:46:40 +0000 (20:46 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 12 Aug 2023 18:46:40 +0000 (20:46 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Aug 2023 18:46:40 +0000 (20:46 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Aug 2023 18:46:40 +0000 (20:46 +0200)
diff --git a/queue-6.4/dmaengine-idxd-clear-prs-disable-flag-when-disabling-idxd-device.patch b/queue-6.4/dmaengine-idxd-clear-prs-disable-flag-when-disabling-idxd-device.patch

new file mode 100644 (file)

index 0000000..45a180c
--- /dev/null
+++ b/queue-6.4/dmaengine-idxd-clear-prs-disable-flag-when-disabling-idxd-device.patch
@@ -0,0 +1,50 @@
+From 863676fe1ac1b82fc9eb56c242e80acfbfc18b76 Mon Sep 17 00:00:00 2001
+From: Fenghua Yu <fenghua.yu@intel.com>
+Date: Wed, 12 Jul 2023 12:35:05 -0700
+Subject: dmaengine: idxd: Clear PRS disable flag when disabling IDXD device
+
+From: Fenghua Yu <fenghua.yu@intel.com>
+
+commit 863676fe1ac1b82fc9eb56c242e80acfbfc18b76 upstream.
+
+Disabling IDXD device doesn't reset Page Request Service (PRS)
+disable flag to its initial value 0. This may cause user confusion
+because once PRS is disabled user will see PRS still remains the
+previous setting (i.e. disabled) via sysfs interface even after the
+device is disabled.
+
+To eliminate user confusion, reset PRS disable flag to ensure that
+the PRS flag bit reflects correct state after the device is disabled.
+
+Additionally, simplify the code by setting wq->flags to 0, which clears
+all flag bits, including any future additions.
+
+Fixes: f2dc327131b5 ("dmaengine: idxd: add per wq PRS disable")
+Tested-by: Tony Zhu <tony.zhu@intel.com>
+Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Link: https://lore.kernel.org/r/20230712193505.3440752-1-fenghua.yu@intel.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/idxd/device.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
+index 5abbcc61c528..9a15f0d12c79 100644
+--- a/drivers/dma/idxd/device.c
++++ b/drivers/dma/idxd/device.c
+@@ -384,9 +384,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
+       wq->threshold = 0;
+       wq->priority = 0;
+       wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
+-      clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
+-      clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
+-      clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
++      wq->flags = 0;
+       memset(wq->name, 0, WQ_NAME_SIZE);
+       wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
+       idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
+-- 
+2.41.0
+
diff --git a/queue-6.4/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch b/queue-6.4/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch

new file mode 100644 (file)

index 0000000..9892f40
--- /dev/null
+++ b/queue-6.4/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch
@@ -0,0 +1,61 @@
+From 0a46781c89dece85386885a407244ca26e5c1c44 Mon Sep 17 00:00:00 2001
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Date: Wed, 12 Jul 2023 18:26:45 +0530
+Subject: dmaengine: mcf-edma: Fix a potential un-allocated memory access
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+commit 0a46781c89dece85386885a407244ca26e5c1c44 upstream.
+
+When 'mcf_edma' is allocated, some space is allocated for a
+flexible array at the end of the struct. 'chans' item are allocated, that is
+to say 'pdata->dma_channels'.
+
+Then, this number of item is stored in 'mcf_edma->n_chans'.
+
+A few lines later, if 'mcf_edma->n_chans' is 0, then a default value of 64
+is set.
+
+This ends to no space allocated by devm_kzalloc() because chans was 0, but
+64 items are read and/or written in some not allocated memory.
+
+Change the logic to define a default value before allocating the memory.
+
+Fixes: e7a3ff92eaf1 ("dmaengine: fsl-edma: add ColdFire mcf5441x edma support")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Link: https://lore.kernel.org/r/f55d914407c900828f6fad3ea5fa791a5f17b9a4.1685172449.git.christophe.jaillet@wanadoo.fr
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/mcf-edma.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/drivers/dma/mcf-edma.c
++++ b/drivers/dma/mcf-edma.c
+@@ -190,7 +190,13 @@ static int mcf_edma_probe(struct platfor
+               return -EINVAL;
+       }
+ 
+-      chans = pdata->dma_channels;
++      if (!pdata->dma_channels) {
++              dev_info(&pdev->dev, "setting default channel number to 64");
++              chans = 64;
++      } else {
++              chans = pdata->dma_channels;
++      }
++
+       len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
+       mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+       if (!mcf_edma)
+@@ -202,11 +208,6 @@ static int mcf_edma_probe(struct platfor
+       mcf_edma->drvdata = &mcf_data;
+       mcf_edma->big_endian = 1;
+ 
+-      if (!mcf_edma->n_chans) {
+-              dev_info(&pdev->dev, "setting default channel number to 64");
+-              mcf_edma->n_chans = 64;
+-      }
+-
+       mutex_init(&mcf_edma->fsl_edma_mutex);
+ 
+       mcf_edma->membase = devm_platform_ioremap_resource(pdev, 0);
diff --git a/queue-6.4/dmaengine-owl-dma-modify-mismatched-function-name.patch b/queue-6.4/dmaengine-owl-dma-modify-mismatched-function-name.patch

new file mode 100644 (file)

index 0000000..3def81b
--- /dev/null
+++ b/queue-6.4/dmaengine-owl-dma-modify-mismatched-function-name.patch
@@ -0,0 +1,34 @@
+From 74d7221c1f9c9f3a8c316a3557ca7dca8b99d14c Mon Sep 17 00:00:00 2001
+From: Zhang Jianhua <chris.zjh@huawei.com>
+Date: Sat, 22 Jul 2023 15:32:44 +0000
+Subject: dmaengine: owl-dma: Modify mismatched function name
+
+From: Zhang Jianhua <chris.zjh@huawei.com>
+
+commit 74d7221c1f9c9f3a8c316a3557ca7dca8b99d14c upstream.
+
+No functional modification involved.
+
+drivers/dma/owl-dma.c:208: warning: expecting prototype for struct owl_dma_pchan. Prototype was for struct owl_dma_vchan instead HDRTEST usr/include/sound/asequencer.h
+
+Fixes: 47e20577c24d ("dmaengine: Add Actions Semi Owl family S900 DMA driver")
+Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
+Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
+Link: https://lore.kernel.org/r/20230722153244.2086949-1-chris.zjh@huawei.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/owl-dma.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/dma/owl-dma.c
++++ b/drivers/dma/owl-dma.c
+@@ -192,7 +192,7 @@ struct owl_dma_pchan {
+ };
+ 
+ /**
+- * struct owl_dma_pchan - Wrapper for DMA ENGINE channel
++ * struct owl_dma_vchan - Wrapper for DMA ENGINE channel
+  * @vc: wrapped virtual channel
+  * @pchan: the physical channel utilized by this channel
+  * @txd: active transaction on this channel
diff --git a/queue-6.4/ibmvnic-do-partial-reset-on-login-failure.patch b/queue-6.4/ibmvnic-do-partial-reset-on-login-failure.patch

new file mode 100644 (file)

index 0000000..6935515
--- /dev/null
+++ b/queue-6.4/ibmvnic-do-partial-reset-on-login-failure.patch
@@ -0,0 +1,113 @@
+From 23cc5f667453ca7645a24c8d21bf84dbf61107b2 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:37 -0500
+Subject: ibmvnic: Do partial reset on login failure
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit 23cc5f667453ca7645a24c8d21bf84dbf61107b2 upstream.
+
+Perform a partial reset before sending a login request if any of the
+following are true:
+ 1. If a previous request times out. This can be dangerous because the
+       VIOS could still receive the old login request at any point after
+       the timeout. Therefore, it is best to re-register the CRQ's  and
+       sub-CRQ's before retrying.
+ 2. If the previous request returns an error that is not described in
+       PAPR. PAPR provides procedures if the login returns with partial
+       success or aborted return codes (section L.5.1) but other values
+       do not have a defined procedure. Previously, these conditions
+       just returned error from the login function rather than trying
+       to resolve the issue.
+       This can cause further issues since most callers of the login
+       function are not prepared to handle an error when logging in. This
+       improper cleanup can lead to the device being permanently DOWN'd.
+       For example, if the VIOS believes that the device is already logged
+       in then it will return INVALID_STATE (-7). If we never re-register
+       CRQ's then it will always think that the device is already logged
+       in. This leaves the device inoperable.
+
+The partial reset involves freeing the sub-CRQs, freeing the CRQ then
+registering and initializing a new CRQ and sub-CRQs. This essentially
+restarts all communication with VIOS to allow for a fresh login attempt
+that will be unhindered by any previous failed attempts.
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-4-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   46 ++++++++++++++++++++++++++++++++-----
+ 1 file changed, 40 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -97,6 +97,8 @@ static int pending_scrq(struct ibmvnic_a
+ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
+                                       struct ibmvnic_sub_crq_queue *);
+ static int ibmvnic_poll(struct napi_struct *napi, int data);
++static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
++static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
+ static void send_query_map(struct ibmvnic_adapter *adapter);
+ static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
+ static int send_request_unmap(struct ibmvnic_adapter *, u8);
+@@ -1527,11 +1529,9 @@ static int ibmvnic_login(struct net_devi
+ 
+               if (!wait_for_completion_timeout(&adapter->init_done,
+                                                timeout)) {
+-                      netdev_warn(netdev, "Login timed out, retrying...\n");
+-                      retry = true;
+-                      adapter->init_done_rc = 0;
+-                      retry_count++;
+-                      continue;
++                      netdev_warn(netdev, "Login timed out\n");
++                      adapter->login_pending = false;
++                      goto partial_reset;
+               }
+ 
+               if (adapter->init_done_rc == ABORTED) {
+@@ -1576,7 +1576,41 @@ static int ibmvnic_login(struct net_devi
+               } else if (adapter->init_done_rc) {
+                       netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
+                                   adapter->init_done_rc);
+-                      return -EIO;
++
++partial_reset:
++                      /* adapter login failed, so free any CRQs or sub-CRQs
++                       * and register again before attempting to login again.
++                       * If we don't do this then the VIOS may think that
++                       * we are already logged in and reject any subsequent
++                       * attempts
++                       */
++                      netdev_warn(netdev,
++                                  "Freeing and re-registering CRQs before attempting to login again\n");
++                      retry = true;
++                      adapter->init_done_rc = 0;
++                      retry_count++;
++                      release_sub_crqs(adapter, true);
++                      reinit_init_done(adapter);
++                      release_crq_queue(adapter);
++                      /* If we don't sleep here then we risk an unnecessary
++                       * failover event from the VIOS. This is a known VIOS
++                       * issue caused by a vnic device freeing and registering
++                       * a CRQ too quickly.
++                       */
++                      msleep(1500);
++                      rc = init_crq_queue(adapter);
++                      if (rc) {
++                              netdev_err(netdev, "login recovery: init CRQ failed %d\n",
++                                         rc);
++                              return -EIO;
++                      }
++
++                      rc = ibmvnic_reset_init(adapter, false);
++                      if (rc) {
++                              netdev_err(netdev, "login recovery: Reset init failed %d\n",
++                                         rc);
++                              return -EIO;
++                      }
+               }
+       } while (retry);
+ 
diff --git a/queue-6.4/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch b/queue-6.4/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch

new file mode 100644 (file)

index 0000000..5d097da
--- /dev/null
+++ b/queue-6.4/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch
@@ -0,0 +1,77 @@
+From db17ba719bceb52f0ae4ebca0e4c17d9a3bebf05 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:34 -0500
+Subject: ibmvnic: Enforce stronger sanity checks on login response
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit db17ba719bceb52f0ae4ebca0e4c17d9a3bebf05 upstream.
+
+Ensure that all offsets in a login response buffer are within the size
+of the allocated response buffer. Any offsets or lengths that surpass
+the allocation are likely the result of an incomplete response buffer.
+In these cases, a full reset is necessary.
+
+When attempting to login, the ibmvnic device will allocate a response
+buffer and pass a reference to the VIOS. The VIOS will then send the
+ibmvnic device a LOGIN_RSP CRQ to signal that the buffer has been filled
+with data. If the ibmvnic device does not get a response in 20 seconds,
+the old buffer is freed and a new login request is sent. With 2
+outstanding requests, any LOGIN_RSP CRQ's could be for the older
+login request. If this is the case then the login response buffer (which
+is for the newer login request) could be incomplete and contain invalid
+data. Therefore, we must enforce strict sanity checks on the response
+buffer values.
+
+Testing has shown that the `off_rxadd_buff_size` value is filled in last
+by the VIOS and will be the smoking gun for these circumstances.
+
+Until VIOS can implement a mechanism for tracking outstanding response
+buffers and a method for mapping a LOGIN_RSP CRQ to a particular login
+response buffer, the best ibmvnic can do in this situation is perform a
+full reset.
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-1-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -5396,6 +5396,7 @@ static int handle_login_rsp(union ibmvni
+       int num_tx_pools;
+       int num_rx_pools;
+       u64 *size_array;
++      u32 rsp_len;
+       int i;
+ 
+       /* CHECK: Test/set of login_pending does not need to be atomic
+@@ -5447,6 +5448,23 @@ static int handle_login_rsp(union ibmvni
+               ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+               return -EIO;
+       }
++
++      rsp_len = be32_to_cpu(login_rsp->len);
++      if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
++          rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
++          rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
++          rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
++          rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
++              /* This can happen if a login request times out and there are
++               * 2 outstanding login requests sent, the LOGIN_RSP crq
++               * could have been for the older login request. So we are
++               * parsing the newer response buffer which may be incomplete
++               */
++              dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
++              ibmvnic_reset(adapter, VNIC_RESET_FATAL);
++              return -EIO;
++      }
++
+       size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+               be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
+       /* variable buffer sizes are not supported, so just read the
diff --git a/queue-6.4/ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch b/queue-6.4/ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch

new file mode 100644 (file)

index 0000000..c4fbac8
--- /dev/null
+++ b/queue-6.4/ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch
@@ -0,0 +1,145 @@
+From 6db541ae279bd4e76dbd939e5fbf298396166242 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:38 -0500
+Subject: ibmvnic: Ensure login failure recovery is safe from other resets
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit 6db541ae279bd4e76dbd939e5fbf298396166242 upstream.
+
+If a login request fails, the recovery process should be protected
+against parallel resets. It is a known issue that freeing and
+registering CRQ's in quick succession can result in a failover CRQ from
+the VIOS. Processing a failover during login recovery is dangerous for
+two reasons:
+ 1. This will result in two parallel initialization processes, this can
+ cause serious issues during login.
+ 2. It is possible that the failover CRQ is received but never executed.
+ We get notified of a pending failover through a transport event CRQ.
+ The reset is not performed until a INIT CRQ request is received.
+ Previously, if CRQ init fails during login recovery, then the ibmvnic
+ irq is freed and the login process returned error. If failover_pending
+ is true (a transport event was received), then the ibmvnic device
+ would never be able to process the reset since it cannot receive the
+ CRQ_INIT request due to the irq being freed. This leaved the device
+ in a inoperable state.
+
+Therefore, the login failure recovery process must be hardened against
+these possible issues. Possible failovers (due to quick CRQ free and
+init) must be avoided and any issues during re-initialization should be
+dealt with instead of being propagated up the stack. This logic is
+similar to that of ibmvnic_probe().
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-5-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   70 +++++++++++++++++++++++++------------
+ 1 file changed, 48 insertions(+), 22 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -116,6 +116,7 @@ static void ibmvnic_tx_scrq_clean_buffer
+ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
+                               struct ibmvnic_long_term_buff *ltb);
+ static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
++static void flush_reset_queue(struct ibmvnic_adapter *adapter);
+ 
+ struct ibmvnic_stat {
+       char name[ETH_GSTRING_LEN];
+@@ -1507,8 +1508,8 @@ static const char *adapter_state_to_stri
+ 
+ static int ibmvnic_login(struct net_device *netdev)
+ {
++      unsigned long flags, timeout = msecs_to_jiffies(20000);
+       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+-      unsigned long timeout = msecs_to_jiffies(20000);
+       int retry_count = 0;
+       int retries = 10;
+       bool retry;
+@@ -1573,6 +1574,7 @@ static int ibmvnic_login(struct net_devi
+                                           "SCRQ irq initialization failed\n");
+                               return rc;
+                       }
++              /* Default/timeout error handling, reset and start fresh */
+               } else if (adapter->init_done_rc) {
+                       netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
+                                   adapter->init_done_rc);
+@@ -1588,29 +1590,53 @@ partial_reset:
+                                   "Freeing and re-registering CRQs before attempting to login again\n");
+                       retry = true;
+                       adapter->init_done_rc = 0;
+-                      retry_count++;
+                       release_sub_crqs(adapter, true);
+-                      reinit_init_done(adapter);
+-                      release_crq_queue(adapter);
+-                      /* If we don't sleep here then we risk an unnecessary
+-                       * failover event from the VIOS. This is a known VIOS
+-                       * issue caused by a vnic device freeing and registering
+-                       * a CRQ too quickly.
++                      /* Much of this is similar logic as ibmvnic_probe(),
++                       * we are essentially re-initializing communication
++                       * with the server. We really should not run any
++                       * resets/failovers here because this is already a form
++                       * of reset and we do not want parallel resets occurring
+                        */
+-                      msleep(1500);
+-                      rc = init_crq_queue(adapter);
+-                      if (rc) {
+-                              netdev_err(netdev, "login recovery: init CRQ failed %d\n",
+-                                         rc);
+-                              return -EIO;
+-                      }
+-
+-                      rc = ibmvnic_reset_init(adapter, false);
+-                      if (rc) {
+-                              netdev_err(netdev, "login recovery: Reset init failed %d\n",
+-                                         rc);
+-                              return -EIO;
+-                      }
++                      do {
++                              reinit_init_done(adapter);
++                              /* Clear any failovers we got in the previous
++                               * pass since we are re-initializing the CRQ
++                               */
++                              adapter->failover_pending = false;
++                              release_crq_queue(adapter);
++                              /* If we don't sleep here then we risk an
++                               * unnecessary failover event from the VIOS.
++                               * This is a known VIOS issue caused by a vnic
++                               * device freeing and registering a CRQ too
++                               * quickly.
++                               */
++                              msleep(1500);
++                              /* Avoid any resets, since we are currently
++                               * resetting.
++                               */
++                              spin_lock_irqsave(&adapter->rwi_lock, flags);
++                              flush_reset_queue(adapter);
++                              spin_unlock_irqrestore(&adapter->rwi_lock,
++                                                     flags);
++
++                              rc = init_crq_queue(adapter);
++                              if (rc) {
++                                      netdev_err(netdev, "login recovery: init CRQ failed %d\n",
++                                                 rc);
++                                      return -EIO;
++                              }
++
++                              rc = ibmvnic_reset_init(adapter, false);
++                              if (rc)
++                                      netdev_err(netdev, "login recovery: Reset init failed %d\n",
++                                                 rc);
++                              /* IBMVNIC_CRQ_INIT will return EAGAIN if it
++                               * fails, since ibmvnic_reset_init will free
++                               * irq's in failure, we won't be able to receive
++                               * new CRQs so we need to keep trying. probe()
++                               * handles this similarly.
++                               */
++                      } while (rc == -EAGAIN && retry_count++ < retries);
+               }
+       } while (retry);
+ 
diff --git a/queue-6.4/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch b/queue-6.4/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch

new file mode 100644 (file)

index 0000000..9c418c2
--- /dev/null
+++ b/queue-6.4/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch
@@ -0,0 +1,73 @@
+From d78a671eb8996af19d6311ecdee9790d2fa479f0 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:36 -0500
+Subject: ibmvnic: Handle DMA unmapping of login buffs in release functions
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit d78a671eb8996af19d6311ecdee9790d2fa479f0 upstream.
+
+Rather than leaving the DMA unmapping of the login buffers to the
+login response handler, move this work into the login release functions.
+Previously, these functions were only used for freeing the allocated
+buffers. This could lead to issues if there are more than one
+outstanding login buffer requests, which is possible if a login request
+times out.
+
+If a login request times out, then there is another call to send login.
+The send login function makes a call to the login buffer release
+function. In the past, this freed the buffers but did not DMA unmap.
+Therefore, the VIOS could still write to the old login (now freed)
+buffer. It is for this reason that it is a good idea to leave the DMA
+unmap call to the login buffers release function.
+
+Since the login buffer release functions now handle DMA unmapping,
+remove the duplicate DMA unmapping in handle_login_rsp().
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-3-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -1588,12 +1588,22 @@ static int ibmvnic_login(struct net_devi
+ 
+ static void release_login_buffer(struct ibmvnic_adapter *adapter)
+ {
++      if (!adapter->login_buf)
++              return;
++
++      dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
++                       adapter->login_buf_sz, DMA_TO_DEVICE);
+       kfree(adapter->login_buf);
+       adapter->login_buf = NULL;
+ }
+ 
+ static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
+ {
++      if (!adapter->login_rsp_buf)
++              return;
++
++      dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
++                       adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
+       kfree(adapter->login_rsp_buf);
+       adapter->login_rsp_buf = NULL;
+ }
+@@ -5411,11 +5421,6 @@ static int handle_login_rsp(union ibmvni
+       }
+       adapter->login_pending = false;
+ 
+-      dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
+-                       DMA_TO_DEVICE);
+-      dma_unmap_single(dev, adapter->login_rsp_buf_token,
+-                       adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
+-
+       /* If the number of queues requested can't be allocated by the
+        * server, the login response will return with code 1. We will need
+        * to resend the login buffer with fewer queues requested.
diff --git a/queue-6.4/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch b/queue-6.4/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch

new file mode 100644 (file)

index 0000000..f405923
--- /dev/null
+++ b/queue-6.4/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch
@@ -0,0 +1,41 @@
+From 411c565b4bc63e9584a8493882bd566e35a90588 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:35 -0500
+Subject: ibmvnic: Unmap DMA login rsp buffer on send login fail
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit 411c565b4bc63e9584a8493882bd566e35a90588 upstream.
+
+If the LOGIN CRQ fails to send then we must DMA unmap the response
+buffer. Previously, if the CRQ failed then the memory was freed without
+DMA unmapping.
+
+Fixes: c98d9cc4170d ("ibmvnic: send_login should check for crq errors")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-2-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -4830,11 +4830,14 @@ static int send_login(struct ibmvnic_ada
+       if (rc) {
+               adapter->login_pending = false;
+               netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
+-              goto buf_rsp_map_failed;
++              goto buf_send_failed;
+       }
+ 
+       return 0;
+ 
++buf_send_failed:
++      dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
++                       DMA_FROM_DEVICE);
+ buf_rsp_map_failed:
+       kfree(login_rsp_buffer);
+       adapter->login_rsp_buf = NULL;
diff --git a/queue-6.4/net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch b/queue-6.4/net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch

new file mode 100644 (file)

index 0000000..e667ab5
--- /dev/null
+++ b/queue-6.4/net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch
@@ -0,0 +1,91 @@
+From a94c16a2fda010866b8858a386a8bfbeba4f72c5 Mon Sep 17 00:00:00 2001
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+Date: Thu, 3 Aug 2023 16:42:53 +0300
+Subject: net: dsa: ocelot: call dsa_tag_8021q_unregister() under rtnl_lock() on driver remove
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+commit a94c16a2fda010866b8858a386a8bfbeba4f72c5 upstream.
+
+When the tagging protocol in current use is "ocelot-8021q" and we unbind
+the driver, we see this splat:
+
+$ echo '0000:00:00.2' > /sys/bus/pci/drivers/fsl_enetc/unbind
+mscc_felix 0000:00:00.5 swp0: left promiscuous mode
+sja1105 spi2.0: Link is Down
+DSA: tree 1 torn down
+mscc_felix 0000:00:00.5 swp2: left promiscuous mode
+sja1105 spi2.2: Link is Down
+DSA: tree 3 torn down
+fsl_enetc 0000:00:00.2 eno2: left promiscuous mode
+mscc_felix 0000:00:00.5: Link is Down
+------------[ cut here ]------------
+RTNL: assertion failed at net/dsa/tag_8021q.c (409)
+WARNING: CPU: 1 PID: 329 at net/dsa/tag_8021q.c:409 dsa_tag_8021q_unregister+0x12c/0x1a0
+Modules linked in:
+CPU: 1 PID: 329 Comm: bash Not tainted 6.5.0-rc3+ #771
+pc : dsa_tag_8021q_unregister+0x12c/0x1a0
+lr : dsa_tag_8021q_unregister+0x12c/0x1a0
+Call trace:
+ dsa_tag_8021q_unregister+0x12c/0x1a0
+ felix_tag_8021q_teardown+0x130/0x150
+ felix_teardown+0x3c/0xd8
+ dsa_tree_teardown_switches+0xbc/0xe0
+ dsa_unregister_switch+0x168/0x260
+ felix_pci_remove+0x30/0x60
+ pci_device_remove+0x4c/0x100
+ device_release_driver_internal+0x188/0x288
+ device_links_unbind_consumers+0xfc/0x138
+ device_release_driver_internal+0xe0/0x288
+ device_driver_detach+0x24/0x38
+ unbind_store+0xd8/0x108
+ drv_attr_store+0x30/0x50
+---[ end trace 0000000000000000 ]---
+------------[ cut here ]------------
+RTNL: assertion failed at net/8021q/vlan_core.c (376)
+WARNING: CPU: 1 PID: 329 at net/8021q/vlan_core.c:376 vlan_vid_del+0x1b8/0x1f0
+CPU: 1 PID: 329 Comm: bash Tainted: G        W          6.5.0-rc3+ #771
+pc : vlan_vid_del+0x1b8/0x1f0
+lr : vlan_vid_del+0x1b8/0x1f0
+ dsa_tag_8021q_unregister+0x8c/0x1a0
+ felix_tag_8021q_teardown+0x130/0x150
+ felix_teardown+0x3c/0xd8
+ dsa_tree_teardown_switches+0xbc/0xe0
+ dsa_unregister_switch+0x168/0x260
+ felix_pci_remove+0x30/0x60
+ pci_device_remove+0x4c/0x100
+ device_release_driver_internal+0x188/0x288
+ device_links_unbind_consumers+0xfc/0x138
+ device_release_driver_internal+0xe0/0x288
+ device_driver_detach+0x24/0x38
+ unbind_store+0xd8/0x108
+ drv_attr_store+0x30/0x50
+DSA: tree 0 torn down
+
+This was somewhat not so easy to spot, because "ocelot-8021q" is not the
+default tagging protocol, and thus, not everyone who tests the unbinding
+path may have switched to it beforehand. The default
+felix_tag_npi_teardown() does not require rtnl_lock() to be held.
+
+Fixes: 7c83a7c539ab ("net: dsa: add a second tagger for Ocelot switches based on tag_8021q")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20230803134253.2711124-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/ocelot/felix.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/dsa/ocelot/felix.c
++++ b/drivers/net/dsa/ocelot/felix.c
+@@ -1625,8 +1625,10 @@ static void felix_teardown(struct dsa_sw
+       struct felix *felix = ocelot_to_felix(ocelot);
+       struct dsa_port *dp;
+ 
++      rtnl_lock();
+       if (felix->tag_proto_ops)
+               felix->tag_proto_ops->teardown(ds);
++      rtnl_unlock();
+ 
+       dsa_switch_for_each_available_port(dp, ds)
+               ocelot_deinit_port(ocelot, dp->index);
diff --git a/queue-6.4/net-enetc-reimplement-rfs-rss-memory-clearing-as-pci-quirk.patch b/queue-6.4/net-enetc-reimplement-rfs-rss-memory-clearing-as-pci-quirk.patch

new file mode 100644 (file)

index 0000000..6c82a79
--- /dev/null
+++ b/queue-6.4/net-enetc-reimplement-rfs-rss-memory-clearing-as-pci-quirk.patch
@@ -0,0 +1,199 @@
+From f0168042a21292d20007d24ab2e4fc32f79ebf11 Mon Sep 17 00:00:00 2001
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+Date: Thu, 3 Aug 2023 16:58:57 +0300
+Subject: net: enetc: reimplement RFS/RSS memory clearing as PCI quirk
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+commit f0168042a21292d20007d24ab2e4fc32f79ebf11 upstream.
+
+The workaround implemented in commit 3222b5b613db ("net: enetc:
+initialize RFS/RSS memories for unused ports too") is no longer
+effective after commit 6fffbc7ae137 ("PCI: Honor firmware's device
+disabled status"). Thus, it has introduced a regression and we see AER
+errors being reported again:
+
+$ ip link set sw2p0 up && dhclient -i sw2p0 && ip addr show sw2p0
+fsl_enetc 0000:00:00.2 eno2: configuring for fixed/internal link mode
+fsl_enetc 0000:00:00.2 eno2: Link is Up - 2.5Gbps/Full - flow control rx/tx
+mscc_felix 0000:00:00.5 swp2: configuring for fixed/sgmii link mode
+mscc_felix 0000:00:00.5 swp2: Link is Up - 1Gbps/Full - flow control off
+sja1105 spi2.2 sw2p0: configuring for phy/rgmii-id link mode
+sja1105 spi2.2 sw2p0: Link is Up - 1Gbps/Full - flow control off
+pcieport 0000:00:1f.0: AER: Multiple Corrected error received: 0000:00:00.0
+pcieport 0000:00:1f.0: AER: can't find device of ID0000
+
+Rob's suggestion is to reimplement the enetc driver workaround as a
+PCI fixup, and to modify the PCI core to run the fixups for all PCI
+functions. This change handles the first part.
+
+We refactor the common code in enetc_psi_create() and enetc_psi_destroy(),
+and use the PCI fixup only for those functions for which enetc_pf_probe()
+won't get called. This avoids some work being done twice for the PFs
+which are enabled.
+
+Fixes: 6fffbc7ae137 ("PCI: Honor firmware's device disabled status")
+Link: https://lore.kernel.org/netdev/CAL_JsqLsVYiPLx2kcHkDQ4t=hQVCR7NHziDwi9cCFUFhx48Qow@mail.gmail.com/
+Suggested-by: Rob Herring <robh@kernel.org>
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/enetc/enetc_pf.c |  103 +++++++++++++++++-------
+ 1 file changed, 73 insertions(+), 30 deletions(-)
+
+--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+@@ -1222,50 +1222,81 @@ static int enetc_pf_register_with_ierb(s
+       return enetc_ierb_register_pf(ierb_pdev, pdev);
+ }
+ 
+-static int enetc_pf_probe(struct pci_dev *pdev,
+-                        const struct pci_device_id *ent)
++static struct enetc_si *enetc_psi_create(struct pci_dev *pdev)
+ {
+-      struct device_node *node = pdev->dev.of_node;
+-      struct enetc_ndev_priv *priv;
+-      struct net_device *ndev;
+       struct enetc_si *si;
+-      struct enetc_pf *pf;
+       int err;
+ 
+-      err = enetc_pf_register_with_ierb(pdev);
+-      if (err == -EPROBE_DEFER)
+-              return err;
+-      if (err)
+-              dev_warn(&pdev->dev,
+-                       "Could not register with IERB driver: %pe, please update the device tree\n",
+-                       ERR_PTR(err));
+-
+-      err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
+-      if (err)
+-              return dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
++      err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(struct enetc_pf));
++      if (err) {
++              dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
++              goto out;
++      }
+ 
+       si = pci_get_drvdata(pdev);
+       if (!si->hw.port || !si->hw.global) {
+               err = -ENODEV;
+               dev_err(&pdev->dev, "could not map PF space, probing a VF?\n");
+-              goto err_map_pf_space;
++              goto out_pci_remove;
+       }
+ 
+       err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
+                              &si->cbd_ring);
+       if (err)
+-              goto err_setup_cbdr;
++              goto out_pci_remove;
+ 
+       err = enetc_init_port_rfs_memory(si);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
+-              goto err_init_port_rfs;
++              goto out_teardown_cbdr;
+       }
+ 
+       err = enetc_init_port_rss_memory(si);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
+-              goto err_init_port_rss;
++              goto out_teardown_cbdr;
++      }
++
++      return si;
++
++out_teardown_cbdr:
++      enetc_teardown_cbdr(&si->cbd_ring);
++out_pci_remove:
++      enetc_pci_remove(pdev);
++out:
++      return ERR_PTR(err);
++}
++
++static void enetc_psi_destroy(struct pci_dev *pdev)
++{
++      struct enetc_si *si = pci_get_drvdata(pdev);
++
++      enetc_teardown_cbdr(&si->cbd_ring);
++      enetc_pci_remove(pdev);
++}
++
++static int enetc_pf_probe(struct pci_dev *pdev,
++                        const struct pci_device_id *ent)
++{
++      struct device_node *node = pdev->dev.of_node;
++      struct enetc_ndev_priv *priv;
++      struct net_device *ndev;
++      struct enetc_si *si;
++      struct enetc_pf *pf;
++      int err;
++
++      err = enetc_pf_register_with_ierb(pdev);
++      if (err == -EPROBE_DEFER)
++              return err;
++      if (err)
++              dev_warn(&pdev->dev,
++                       "Could not register with IERB driver: %pe, please update the device tree\n",
++                       ERR_PTR(err));
++
++      si = enetc_psi_create(pdev);
++      if (IS_ERR(si)) {
++              err = PTR_ERR(si);
++              goto err_psi_create;
+       }
+ 
+       if (node && !of_device_is_available(node)) {
+@@ -1353,15 +1384,10 @@ err_alloc_si_res:
+       si->ndev = NULL;
+       free_netdev(ndev);
+ err_alloc_netdev:
+-err_init_port_rss:
+-err_init_port_rfs:
+ err_device_disabled:
+ err_setup_mac_addresses:
+-      enetc_teardown_cbdr(&si->cbd_ring);
+-err_setup_cbdr:
+-err_map_pf_space:
+-      enetc_pci_remove(pdev);
+-
++      enetc_psi_destroy(pdev);
++err_psi_create:
+       return err;
+ }
+ 
+@@ -1384,12 +1410,29 @@ static void enetc_pf_remove(struct pci_d
+       enetc_free_msix(priv);
+ 
+       enetc_free_si_resources(priv);
+-      enetc_teardown_cbdr(&si->cbd_ring);
+ 
+       free_netdev(si->ndev);
+ 
+-      enetc_pci_remove(pdev);
++      enetc_psi_destroy(pdev);
++}
++
++static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
++{
++      struct device_node *node = pdev->dev.of_node;
++      struct enetc_si *si;
++
++      /* Only apply quirk for disabled functions. For the ones
++       * that are enabled, enetc_pf_probe() will apply it.
++       */
++      if (node && of_device_is_available(node))
++              return;
++
++      si = enetc_psi_create(pdev);
++      if (si)
++              enetc_psi_destroy(pdev);
+ }
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
++                      enetc_fixup_clear_rss_rfs);
+ 
+ static const struct pci_device_id enetc_pf_id_table[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF) },
diff --git a/queue-6.4/net-hns3-add-wait-until-mac-link-down.patch b/queue-6.4/net-hns3-add-wait-until-mac-link-down.patch

new file mode 100644 (file)

index 0000000..d00cdc8
--- /dev/null
+++ b/queue-6.4/net-hns3-add-wait-until-mac-link-down.patch
@@ -0,0 +1,54 @@
+From 6265e242f7b95f2c1195b42ec912b84ad161470e Mon Sep 17 00:00:00 2001
+From: Jie Wang <wangjie125@huawei.com>
+Date: Mon, 7 Aug 2023 19:34:51 +0800
+Subject: net: hns3: add wait until mac link down
+
+From: Jie Wang <wangjie125@huawei.com>
+
+commit 6265e242f7b95f2c1195b42ec912b84ad161470e upstream.
+
+In some configure flow of hns3 driver, for example, change mtu, it will
+disable MAC through firmware before configuration. But firmware disables
+MAC asynchronously. The rx traffic may be not stopped in this case.
+
+So fixes it by waiting until mac link is down.
+
+Fixes: a9775bb64aa7 ("net: hns3: fix set and get link ksettings issue")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230807113452.474224-4-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -7569,6 +7569,8 @@ static void hclge_enable_fd(struct hnae3
+ 
+ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
+ {
++#define HCLGE_LINK_STATUS_WAIT_CNT  3
++
+       struct hclge_desc desc;
+       struct hclge_config_mac_mode_cmd *req =
+               (struct hclge_config_mac_mode_cmd *)desc.data;
+@@ -7593,9 +7595,15 @@ static void hclge_cfg_mac_mode(struct hc
+       req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
+ 
+       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+-      if (ret)
++      if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "mac enable fail, ret =%d.\n", ret);
++              return;
++      }
++
++      if (!enable)
++              hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN,
++                                         HCLGE_LINK_STATUS_WAIT_CNT);
+ }
+ 
+ static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
diff --git a/queue-6.4/net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch b/queue-6.4/net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch

new file mode 100644 (file)

index 0000000..daaacbb
--- /dev/null
+++ b/queue-6.4/net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch
@@ -0,0 +1,84 @@
+From ac6257a3ae5db5193b1f19c268e4f72d274ddb88 Mon Sep 17 00:00:00 2001
+From: Yonglong Liu <liuyonglong@huawei.com>
+Date: Mon, 7 Aug 2023 19:34:52 +0800
+Subject: net: hns3: fix deadlock issue when externel_lb and reset are executed together
+
+From: Yonglong Liu <liuyonglong@huawei.com>
+
+commit ac6257a3ae5db5193b1f19c268e4f72d274ddb88 upstream.
+
+When externel_lb and reset are executed together, a deadlock may
+occur:
+[ 3147.217009] INFO: task kworker/u321:0:7 blocked for more than 120 seconds.
+[ 3147.230483] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+[ 3147.238999] task:kworker/u321:0  state:D stack:    0 pid:    7 ppid:     2 flags:0x00000008
+[ 3147.248045] Workqueue: hclge hclge_service_task [hclge]
+[ 3147.253957] Call trace:
+[ 3147.257093]  __switch_to+0x7c/0xbc
+[ 3147.261183]  __schedule+0x338/0x6f0
+[ 3147.265357]  schedule+0x50/0xe0
+[ 3147.269185]  schedule_preempt_disabled+0x18/0x24
+[ 3147.274488]  __mutex_lock.constprop.0+0x1d4/0x5dc
+[ 3147.279880]  __mutex_lock_slowpath+0x1c/0x30
+[ 3147.284839]  mutex_lock+0x50/0x60
+[ 3147.288841]  rtnl_lock+0x20/0x2c
+[ 3147.292759]  hclge_reset_prepare+0x68/0x90 [hclge]
+[ 3147.298239]  hclge_reset_subtask+0x88/0xe0 [hclge]
+[ 3147.303718]  hclge_reset_service_task+0x84/0x120 [hclge]
+[ 3147.309718]  hclge_service_task+0x2c/0x70 [hclge]
+[ 3147.315109]  process_one_work+0x1d0/0x490
+[ 3147.319805]  worker_thread+0x158/0x3d0
+[ 3147.324240]  kthread+0x108/0x13c
+[ 3147.328154]  ret_from_fork+0x10/0x18
+
+In externel_lb process, the hns3 driver call napi_disable()
+first, then the reset happen, then the restore process of the
+externel_lb will fail, and will not call napi_enable(). When
+doing externel_lb again, napi_disable() will be double call,
+cause a deadlock of rtnl_lock().
+
+This patch use the HNS3_NIC_STATE_DOWN state to protect the
+calling of napi_disable() and napi_enable() in externel_lb
+process, just as the usage in ndo_stop() and ndo_start().
+
+Fixes: 04b6ba143521 ("net: hns3: add support for external loopback test")
+Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230807113452.474224-5-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c |   14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+@@ -5854,6 +5854,9 @@ void hns3_external_lb_prepare(struct net
+       if (!if_running)
+               return;
+ 
++      if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state))
++              return;
++
+       netif_carrier_off(ndev);
+       netif_tx_disable(ndev);
+ 
+@@ -5882,7 +5885,16 @@ void hns3_external_lb_restore(struct net
+       if (!if_running)
+               return;
+ 
+-      hns3_nic_reset_all_ring(priv->ae_handle);
++      if (hns3_nic_resetting(ndev))
++              return;
++
++      if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
++              return;
++
++      if (hns3_nic_reset_all_ring(priv->ae_handle))
++              return;
++
++      clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
+ 
+       for (i = 0; i < priv->vector_num; i++)
+               hns3_vector_enable(&priv->tqp_vector[i]);
diff --git a/queue-6.4/net-hns3-fix-strscpy-causing-content-truncation-issue.patch b/queue-6.4/net-hns3-fix-strscpy-causing-content-truncation-issue.patch

new file mode 100644 (file)

index 0000000..d5b12d4
--- /dev/null
+++ b/queue-6.4/net-hns3-fix-strscpy-causing-content-truncation-issue.patch
@@ -0,0 +1,68 @@
+From 5e3d20617b055e725e785e0058426368269949f3 Mon Sep 17 00:00:00 2001
+From: Hao Chen <chenhao418@huawei.com>
+Date: Wed, 9 Aug 2023 10:09:02 +0800
+Subject: net: hns3: fix strscpy causing content truncation issue
+
+From: Hao Chen <chenhao418@huawei.com>
+
+commit 5e3d20617b055e725e785e0058426368269949f3 upstream.
+
+hns3_dbg_fill_content()/hclge_dbg_fill_content() is aim to integrate some
+items to a string for content, and we add '\n' and '\0' in the last
+two bytes of content.
+
+strscpy() will add '\0' in the last byte of destination buffer(one of
+items), it result in finishing content print ahead of schedule and some
+dump content truncation.
+
+One Error log shows as below:
+cat mac_list/uc
+UC MAC_LIST:
+
+Expected:
+UC MAC_LIST:
+FUNC_ID  MAC_ADDR            STATE
+pf       00:2b:19:05:03:00   ACTIVE
+
+The destination buffer is length-bounded and not required to be
+NUL-terminated, so just change strscpy() to memcpy() to fix it.
+
+Fixes: 1cf3d5567f27 ("net: hns3: fix strncpy() not using dest-buf length as length issue")
+Signed-off-by: Hao Chen <chenhao418@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Link: https://lore.kernel.org/r/20230809020902.1941471-1-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c         |    4 ++--
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c |    4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+@@ -461,9 +461,9 @@ static void hns3_dbg_fill_content(char *
+               if (result) {
+                       if (item_len < strlen(result[i]))
+                               break;
+-                      strscpy(pos, result[i], strlen(result[i]));
++                      memcpy(pos, result[i], strlen(result[i]));
+               } else {
+-                      strscpy(pos, items[i].name, strlen(items[i].name));
++                      memcpy(pos, items[i].name, strlen(items[i].name));
+               }
+               pos += item_len;
+               len -= item_len;
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+@@ -111,9 +111,9 @@ static void hclge_dbg_fill_content(char
+               if (result) {
+                       if (item_len < strlen(result[i]))
+                               break;
+-                      strscpy(pos, result[i], strlen(result[i]));
++                      memcpy(pos, result[i], strlen(result[i]));
+               } else {
+-                      strscpy(pos, items[i].name, strlen(items[i].name));
++                      memcpy(pos, items[i].name, strlen(items[i].name));
+               }
+               pos += item_len;
+               len -= item_len;
diff --git a/queue-6.4/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch b/queue-6.4/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch

new file mode 100644 (file)

index 0000000..faf5ed5
--- /dev/null
+++ b/queue-6.4/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch
@@ -0,0 +1,73 @@
+From 08469dacfad25428b66549716811807203744f4f Mon Sep 17 00:00:00 2001
+From: Jie Wang <wangjie125@huawei.com>
+Date: Mon, 7 Aug 2023 19:34:50 +0800
+Subject: net: hns3: refactor hclge_mac_link_status_wait for interface reuse
+
+From: Jie Wang <wangjie125@huawei.com>
+
+commit 08469dacfad25428b66549716811807203744f4f upstream.
+
+Some nic configurations could only be performed after link is down. So this
+patch refactor this API for reuse.
+
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230807113452.474224-3-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -72,6 +72,8 @@ static void hclge_restore_hw_table(struc
+ static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
+ static void hclge_sync_fd_table(struct hclge_dev *hdev);
+ static void hclge_update_fec_stats(struct hclge_dev *hdev);
++static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
++                                    int wait_cnt);
+ 
+ static struct hnae3_ae_algo ae_algo;
+ 
+@@ -7656,10 +7658,9 @@ static void hclge_phy_link_status_wait(s
+       } while (++i < HCLGE_PHY_LINK_STATUS_NUM);
+ }
+ 
+-static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
++static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
++                                    int wait_cnt)
+ {
+-#define HCLGE_MAC_LINK_STATUS_NUM  100
+-
+       int link_status;
+       int i = 0;
+       int ret;
+@@ -7672,13 +7673,15 @@ static int hclge_mac_link_status_wait(st
+                       return 0;
+ 
+               msleep(HCLGE_LINK_STATUS_MS);
+-      } while (++i < HCLGE_MAC_LINK_STATUS_NUM);
++      } while (++i < wait_cnt);
+       return -EBUSY;
+ }
+ 
+ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
+                                         bool is_phy)
+ {
++#define HCLGE_MAC_LINK_STATUS_NUM  100
++
+       int link_ret;
+ 
+       link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
+@@ -7686,7 +7689,8 @@ static int hclge_mac_phy_link_status_wai
+       if (is_phy)
+               hclge_phy_link_status_wait(hdev, link_ret);
+ 
+-      return hclge_mac_link_status_wait(hdev, link_ret);
++      return hclge_mac_link_status_wait(hdev, link_ret,
++                                        HCLGE_MAC_LINK_STATUS_NUM);
+ }
+ 
+ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
diff --git a/queue-6.4/net-mlx5-allow-0-for-total-host-vfs.patch b/queue-6.4/net-mlx5-allow-0-for-total-host-vfs.patch

new file mode 100644 (file)

index 0000000..40a7816
--- /dev/null
+++ b/queue-6.4/net-mlx5-allow-0-for-total-host-vfs.patch
@@ -0,0 +1,33 @@
+From 2dc2b3922d3c0f52d3a792d15dcacfbc4cc76b8f Mon Sep 17 00:00:00 2001
+From: Daniel Jurgens <danielj@nvidia.com>
+Date: Tue, 11 Jul 2023 00:28:10 +0300
+Subject: net/mlx5: Allow 0 for total host VFs
+
+From: Daniel Jurgens <danielj@nvidia.com>
+
+commit 2dc2b3922d3c0f52d3a792d15dcacfbc4cc76b8f upstream.
+
+When querying eswitch functions 0 is a valid number of host VFs. After
+introducing ARM SRIOV falling through to getting the max value from PCI
+results in using the total VFs allowed on the ARM for the host.
+
+Fixes: 86eec50beaf3 ("net/mlx5: Support querying max VFs from device");
+Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/sriov.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+@@ -264,8 +264,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_
+               host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+                                         host_params_context.host_total_vfs);
+               kvfree(out);
+-              if (host_total_vfs)
+-                      return host_total_vfs;
++              return host_total_vfs;
+       }
+ 
+ done:
diff --git a/queue-6.4/net-mlx5-dr-fix-wrong-allocation-of-modify-hdr-pattern.patch b/queue-6.4/net-mlx5-dr-fix-wrong-allocation-of-modify-hdr-pattern.patch

new file mode 100644 (file)

index 0000000..92d40cc
--- /dev/null
+++ b/queue-6.4/net-mlx5-dr-fix-wrong-allocation-of-modify-hdr-pattern.patch
@@ -0,0 +1,38 @@
+From 8bfe1e19fb96d89fce14302e35cba0cd9f39d0a1 Mon Sep 17 00:00:00 2001
+From: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Date: Wed, 26 Jul 2023 14:38:03 +0300
+Subject: net/mlx5: DR, Fix wrong allocation of modify hdr pattern
+
+From: Yevgeny Kliteynik <kliteyn@nvidia.com>
+
+commit 8bfe1e19fb96d89fce14302e35cba0cd9f39d0a1 upstream.
+
+Fixing wrong calculation of the modify hdr pattern size,
+where the previously calculated number would not be enough
+to accommodate the required number of actions.
+
+Fixes: da5d0027d666 ("net/mlx5: DR, Add cache for modify header pattern")
+Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Reviewed-by: Erez Shitrit <erezsh@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
+index d6947fe13d56..8ca534ef5d03 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
+@@ -82,7 +82,7 @@ dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr,
+       u32 chunk_size;
+       u32 index;
+ 
+-      chunk_size = ilog2(num_of_actions);
++      chunk_size = ilog2(roundup_pow_of_two(num_of_actions));
+       /* HW modify action index granularity is at least 64B */
+       chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
+ 
+-- 
+2.41.0
+
diff --git a/queue-6.4/net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch b/queue-6.4/net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch

new file mode 100644 (file)

index 0000000..cb93947
--- /dev/null
+++ b/queue-6.4/net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch
@@ -0,0 +1,33 @@
+From 86ed7b773c01ba71617538b3b107c33fd9cf90b8 Mon Sep 17 00:00:00 2001
+From: Shay Drory <shayd@nvidia.com>
+Date: Sun, 30 Jul 2023 09:26:27 +0300
+Subject: net/mlx5: LAG, Check correct bucket when modifying LAG
+
+From: Shay Drory <shayd@nvidia.com>
+
+commit 86ed7b773c01ba71617538b3b107c33fd9cf90b8 upstream.
+
+Cited patch introduced buckets in hash mode, but missed to update
+the ports/bucket check when modifying LAG.
+Fix the check.
+
+Fixes: 352899f384d4 ("net/mlx5: Lag, use buckets in hash mode")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+@@ -574,7 +574,7 @@ static int __mlx5_lag_modify_definers_de
+       for (i = 0; i < ldev->ports; i++) {
+               for (j = 0; j < ldev->buckets; j++) {
+                       idx = i * ldev->buckets + j;
+-                      if (ldev->v2p_map[i] == ports[i])
++                      if (ldev->v2p_map[idx] == ports[idx])
+                               continue;
+ 
+                       dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
diff --git a/queue-6.4/net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch b/queue-6.4/net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch

new file mode 100644 (file)

index 0000000..2bdecdd
--- /dev/null
+++ b/queue-6.4/net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch
@@ -0,0 +1,31 @@
+From aab8e1a200b926147db51e3f82fd07bb9edf6a98 Mon Sep 17 00:00:00 2001
+From: Moshe Shemesh <moshe@nvidia.com>
+Date: Sun, 23 Jul 2023 11:03:01 +0300
+Subject: net/mlx5: Reload auxiliary devices in pci error handlers
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+commit aab8e1a200b926147db51e3f82fd07bb9edf6a98 upstream.
+
+Handling pci errors should fully teardown and load back auxiliary
+devices, same as done through mlx5 health recovery flow.
+
+Fixes: 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in case of PCI device suspend")
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1845,7 +1845,7 @@ static pci_ers_result_t mlx5_pci_err_det
+ 
+       mlx5_enter_error_state(dev, false);
+       mlx5_error_sw_reset(dev);
+-      mlx5_unload_one(dev, true);
++      mlx5_unload_one(dev, false);
+       mlx5_drain_health_wq(dev);
+       mlx5_pci_disable_device(dev);
+ 
diff --git a/queue-6.4/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch b/queue-6.4/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch

new file mode 100644 (file)

index 0000000..6c4e016
--- /dev/null
+++ b/queue-6.4/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch
@@ -0,0 +1,44 @@
+From d006207625657322ba8251b6e7e829f9659755dc Mon Sep 17 00:00:00 2001
+From: Moshe Shemesh <moshe@nvidia.com>
+Date: Wed, 19 Jul 2023 11:33:44 +0300
+Subject: net/mlx5: Skip clock update work when device is in error state
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+commit d006207625657322ba8251b6e7e829f9659755dc upstream.
+
+When device is in error state, marked by the flag
+MLX5_DEVICE_STATE_INTERNAL_ERROR, the HW and PCI may not be accessible
+and so clock update work should be skipped. Furthermore, such access
+through PCI in error state, after calling mlx5_pci_disable_device() can
+result in failing to recover from pci errors.
+
+Fixes: ef9814deafd0 ("net/mlx5e: Add HW timestamping (TS) support")
+Reported-and-tested-by: Ganesh G R <ganeshgr@linux.ibm.com>
+Closes: https://lore.kernel.org/netdev/9bdb9b9d-140a-7a28-f0de-2e64e873c068@nvidia.com
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Aya Levin <ayal@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+@@ -221,10 +221,15 @@ static void mlx5_timestamp_overflow(stru
+       clock = container_of(timer, struct mlx5_clock, timer);
+       mdev = container_of(clock, struct mlx5_core_dev, clock);
+ 
++      if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
++              goto out;
++
+       write_seqlock_irqsave(&clock->lock, flags);
+       timecounter_read(&timer->tc);
+       mlx5_update_clock_info_page(mdev);
+       write_sequnlock_irqrestore(&clock->lock, flags);
++
++out:
+       schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
+ }
+ 
diff --git a/queue-6.4/net-mlx5e-take-rtnl-lock-when-needed-before-calling-xdp_set_features.patch b/queue-6.4/net-mlx5e-take-rtnl-lock-when-needed-before-calling-xdp_set_features.patch

new file mode 100644 (file)

index 0000000..f176817
--- /dev/null
+++ b/queue-6.4/net-mlx5e-take-rtnl-lock-when-needed-before-calling-xdp_set_features.patch
@@ -0,0 +1,132 @@
+From 72cc654970658e88a1cdea08f06b11c218efa4da Mon Sep 17 00:00:00 2001
+From: Gal Pressman <gal@nvidia.com>
+Date: Sun, 16 Jul 2023 14:28:10 +0300
+Subject: net/mlx5e: Take RTNL lock when needed before calling xdp_set_features()
+
+From: Gal Pressman <gal@nvidia.com>
+
+commit 72cc654970658e88a1cdea08f06b11c218efa4da upstream.
+
+Hold RTNL lock when calling xdp_set_features() with a registered netdev,
+as the call triggers the netdev notifiers. This could happen when
+switching from uplink rep to nic profile for example.
+
+This resolves the following call trace:
+
+RTNL: assertion failed at net/core/dev.c (1953)
+WARNING: CPU: 6 PID: 112670 at net/core/dev.c:1953 call_netdevice_notifiers_info+0x7c/0x80
+Modules linked in: sch_mqprio sch_mqprio_lib act_tunnel_key act_mirred act_skbedit cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress bonding ib_umad ip_gre rdma_ucm mlx5_vfio_pci ipip tunnel4 ip6_gre gre mlx5_ib vfio_pci vfio_pci_core vfio_iommu_type1 ib_uverbs vfio mlx5_core ib_ipoib geneve nf_tables ip6_tunnel tunnel6 iptable_raw openvswitch nsh rpcrdma ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_cm ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay zram zsmalloc fuse [last unloaded: ib_uverbs]
+CPU: 6 PID: 112670 Comm: devlink Not tainted 6.4.0-rc7_for_upstream_min_debug_2023_06_28_17_02 #1
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+RIP: 0010:call_netdevice_notifiers_info+0x7c/0x80
+Code: 90 ff 80 3d 2d 6b f7 00 00 75 c5 ba a1 07 00 00 48 c7 c6 e4 ce 0b 82 48 c7 c7 c8 f4 04 82 c6 05 11 6b f7 00 01 e8 a4 7c 8e ff <0f> 0b eb a2 0f 1f 44 00 00 55 48 89 e5 41 54 48 83 e4 f0 48 83 ec
+RSP: 0018:ffff8882a21c3948 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: ffffffff82e6f880 RCX: 0000000000000027
+RDX: ffff88885f99b5c8 RSI: 0000000000000001 RDI: ffff88885f99b5c0
+RBP: 0000000000000028 R08: ffff88887ffabaa8 R09: 0000000000000003
+R10: ffff88887fecbac0 R11: ffff88887ff7bac0 R12: ffff8882a21c3968
+R13: ffff88811c018940 R14: 0000000000000000 R15: ffff8881274401a0
+FS:  00007fe141c81800(0000) GS:ffff88885f980000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f787c28b948 CR3: 000000014bcf3005 CR4: 0000000000370ea0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ ? __warn+0x79/0x120
+ ? call_netdevice_notifiers_info+0x7c/0x80
+ ? report_bug+0x17c/0x190
+ ? handle_bug+0x3c/0x60
+ ? exc_invalid_op+0x14/0x70
+ ? asm_exc_invalid_op+0x16/0x20
+ ? call_netdevice_notifiers_info+0x7c/0x80
+ ? call_netdevice_notifiers_info+0x7c/0x80
+ call_netdevice_notifiers+0x2e/0x50
+ mlx5e_set_xdp_feature+0x21/0x50 [mlx5_core]
+ mlx5e_nic_init+0xf1/0x1a0 [mlx5_core]
+ mlx5e_netdev_init_profile+0x76/0x110 [mlx5_core]
+ mlx5e_netdev_attach_profile+0x1f/0x90 [mlx5_core]
+ mlx5e_netdev_change_profile+0x92/0x160 [mlx5_core]
+ mlx5e_netdev_attach_nic_profile+0x1b/0x30 [mlx5_core]
+ mlx5e_vport_rep_unload+0xaa/0xc0 [mlx5_core]
+ __esw_offloads_unload_rep+0x52/0x60 [mlx5_core]
+ mlx5_esw_offloads_rep_unload+0x52/0x70 [mlx5_core]
+ esw_offloads_unload_rep+0x34/0x70 [mlx5_core]
+ esw_offloads_disable+0x2b/0x90 [mlx5_core]
+ mlx5_eswitch_disable_locked+0x1b9/0x210 [mlx5_core]
+ mlx5_devlink_eswitch_mode_set+0xf5/0x630 [mlx5_core]
+ ? devlink_get_from_attrs_lock+0x9e/0x110
+ devlink_nl_cmd_eswitch_set_doit+0x60/0xe0
+ genl_family_rcv_msg_doit.isra.0+0xc2/0x110
+ genl_rcv_msg+0x17d/0x2b0
+ ? devlink_get_from_attrs_lock+0x110/0x110
+ ? devlink_nl_cmd_eswitch_get_doit+0x290/0x290
+ ? devlink_pernet_pre_exit+0xf0/0xf0
+ ? genl_family_rcv_msg_doit.isra.0+0x110/0x110
+ netlink_rcv_skb+0x54/0x100
+ genl_rcv+0x24/0x40
+ netlink_unicast+0x1f6/0x2c0
+ netlink_sendmsg+0x232/0x4a0
+ sock_sendmsg+0x38/0x60
+ ? _copy_from_user+0x2a/0x60
+ __sys_sendto+0x110/0x160
+ ? __count_memcg_events+0x48/0x90
+ ? handle_mm_fault+0x161/0x260
+ ? do_user_addr_fault+0x278/0x6e0
+ __x64_sys_sendto+0x20/0x30
+ do_syscall_64+0x3d/0x90
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+RIP: 0033:0x7fe141b1340a
+Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89
+RSP: 002b:00007fff61d03de8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+RAX: ffffffffffffffda RBX: 0000000000afab00 RCX: 00007fe141b1340a
+RDX: 0000000000000038 RSI: 0000000000afab00 RDI: 0000000000000003
+RBP: 0000000000afa910 R08: 00007fe141d80200 R09: 000000000000000c
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001
+ </TASK>
+
+Fixes: 4d5ab0ad964d ("net/mlx5e: take into account device reconfiguration for xdp_features flag")
+Signed-off-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 1c820119e438..c27df14df145 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -5266,6 +5266,7 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
+ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
+                         struct net_device *netdev)
+ {
++      const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED;
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_flow_steering *fs;
+       int err;
+@@ -5294,9 +5295,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
+               mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
+ 
+       mlx5e_health_create_reporters(priv);
++
++      /* If netdev is already registered (e.g. move from uplink to nic profile),
++       * RTNL lock must be held before triggering netdev notifiers.
++       */
++      if (take_rtnl)
++              rtnl_lock();
++
+       /* update XDP supported features */
+       mlx5e_set_xdp_feature(netdev);
+ 
++      if (take_rtnl)
++              rtnl_unlock();
++
+       return 0;
+ }
+ 
+-- 
+2.41.0
+
diff --git a/queue-6.4/net-mlx5e-tc-fix-internal-port-memory-leak.patch b/queue-6.4/net-mlx5e-tc-fix-internal-port-memory-leak.patch

new file mode 100644 (file)

index 0000000..55f7221
--- /dev/null
+++ b/queue-6.4/net-mlx5e-tc-fix-internal-port-memory-leak.patch
@@ -0,0 +1,106 @@
+From ac5da544a3c2047cbfd715acd9cec8380d7fe5c6 Mon Sep 17 00:00:00 2001
+From: Jianbo Liu <jianbol@nvidia.com>
+Date: Fri, 14 Apr 2023 08:48:20 +0000
+Subject: net/mlx5e: TC, Fix internal port memory leak
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+commit ac5da544a3c2047cbfd715acd9cec8380d7fe5c6 upstream.
+
+The flow rule can be splited, and the extra post_act rules are added
+to post_act table. It's possible to trigger memleak when the rule
+forwards packets from internal port and over tunnel, in the case that,
+for example, CT 'new' state offload is allowed. As int_port object is
+assigned to the flow attribute of post_act rule, and its refcnt is
+incremented by mlx5e_tc_int_port_get(), but mlx5e_tc_int_port_put() is
+not called, the refcnt is never decremented, then int_port is never
+freed.
+
+The kmemleak reports the following error:
+unreferenced object 0xffff888128204b80 (size 64):
+  comm "handler20", pid 50121, jiffies 4296973009 (age 642.932s)
+  hex dump (first 32 bytes):
+    01 00 00 00 19 00 00 00 03 f0 00 00 04 00 00 00  ................
+    98 77 67 41 81 88 ff ff 98 77 67 41 81 88 ff ff  .wgA.....wgA....
+  backtrace:
+    [<00000000e992680d>] kmalloc_trace+0x27/0x120
+    [<000000009e945a98>] mlx5e_tc_int_port_get+0x3f3/0xe20 [mlx5_core]
+    [<0000000035a537f0>] mlx5e_tc_add_fdb_flow+0x473/0xcf0 [mlx5_core]
+    [<0000000070c2cec6>] __mlx5e_add_fdb_flow+0x7cf/0xe90 [mlx5_core]
+    [<000000005cc84048>] mlx5e_configure_flower+0xd40/0x4c40 [mlx5_core]
+    [<000000004f8a2031>] mlx5e_rep_indr_offload.isra.0+0x10e/0x1c0 [mlx5_core]
+    [<000000007df797dc>] mlx5e_rep_indr_setup_tc_cb+0x90/0x130 [mlx5_core]
+    [<0000000016c15cc3>] tc_setup_cb_add+0x1cf/0x410
+    [<00000000a63305b4>] fl_hw_replace_filter+0x38f/0x670 [cls_flower]
+    [<000000008bc9e77c>] fl_change+0x1fd5/0x4430 [cls_flower]
+    [<00000000e7f766e4>] tc_new_tfilter+0x867/0x2010
+    [<00000000e101c0ef>] rtnetlink_rcv_msg+0x6fc/0x9f0
+    [<00000000e1111d44>] netlink_rcv_skb+0x12c/0x360
+    [<0000000082dd6c8b>] netlink_unicast+0x438/0x710
+    [<00000000fc568f70>] netlink_sendmsg+0x794/0xc50
+    [<0000000016e92590>] sock_sendmsg+0xc5/0x190
+
+So fix this by moving int_port cleanup code to the flow attribute
+free helper, which is used by all the attribute free cases.
+
+Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions")
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |   21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1943,9 +1943,7 @@ static void mlx5e_tc_del_fdb_flow(struct
+ {
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5_flow_attr *attr = flow->attr;
+-      struct mlx5_esw_flow_attr *esw_attr;
+ 
+-      esw_attr = attr->esw_attr;
+       mlx5e_put_flow_tunnel_id(flow);
+ 
+       remove_unready_flow(flow);
+@@ -1966,12 +1964,6 @@ static void mlx5e_tc_del_fdb_flow(struct
+ 
+       mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
+ 
+-      if (esw_attr->int_port)
+-              mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
+-
+-      if (esw_attr->dest_int_port)
+-              mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
+-
+       if (flow_flag_test(flow, L3_TO_L2_DECAP))
+               mlx5e_detach_decap(priv, flow);
+ 
+@@ -4250,6 +4242,7 @@ static void
+ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
+ {
+       struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
++      struct mlx5_esw_flow_attr *esw_attr;
+ 
+       if (!attr)
+               return;
+@@ -4267,6 +4260,18 @@ mlx5_free_flow_attr_actions(struct mlx5e
+               mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
+       }
+ 
++      if (mlx5e_is_eswitch_flow(flow)) {
++              esw_attr = attr->esw_attr;
++
++              if (esw_attr->int_port)
++                      mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
++                                            esw_attr->int_port);
++
++              if (esw_attr->dest_int_port)
++                      mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
++                                            esw_attr->dest_int_port);
++      }
++
+       mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
+ 
+       free_branch_attr(flow, attr->branch_true);
diff --git a/queue-6.4/net-mlx5e-unoffload-post-act-rule-when-handling-fib-events.patch b/queue-6.4/net-mlx5e-unoffload-post-act-rule-when-handling-fib-events.patch

new file mode 100644 (file)

index 0000000..21f2add
--- /dev/null
+++ b/queue-6.4/net-mlx5e-unoffload-post-act-rule-when-handling-fib-events.patch
@@ -0,0 +1,80 @@
+From 6b5926eb1c034affff3fb44a98cb8c67153847d8 Mon Sep 17 00:00:00 2001
+From: Chris Mi <cmi@nvidia.com>
+Date: Wed, 26 Jul 2023 09:06:33 +0300
+Subject: net/mlx5e: Unoffload post act rule when handling FIB events
+
+From: Chris Mi <cmi@nvidia.com>
+
+commit 6b5926eb1c034affff3fb44a98cb8c67153847d8 upstream.
+
+If having the following tc rule on stack device:
+
+filter parent ffff: protocol ip pref 3 flower chain 1
+filter parent ffff: protocol ip pref 3 flower chain 1 handle 0x1
+  dst_mac 24:25:d0:e1:00:00
+  src_mac 02:25:d0:25:01:02
+  eth_type ipv4
+  ct_state +trk+new
+  in_hw in_hw_count 1
+        action order 1: ct commit zone 0 pipe
+         index 2 ref 1 bind 1 installed 3807 sec used 3779 sec firstused 3800 sec
+        Action statistics:
+        Sent 120 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
+        backlog 0b 0p requeues 0
+        used_hw_stats delayed
+
+        action order 2: tunnel_key  set
+        src_ip 192.168.1.25
+        dst_ip 192.168.1.26
+        key_id 4
+        dst_port 4789
+        csum pipe
+         index 3 ref 1 bind 1 installed 3807 sec used 3779 sec firstused 3800 sec
+        Action statistics:
+        Sent 120 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
+        backlog 0b 0p requeues 0
+        used_hw_stats delayed
+
+        action order 3: mirred (Egress Redirect to device vxlan1) stolen
+        index 9 ref 1 bind 1 installed 3807 sec used 3779 sec firstused 3800 sec
+        Action statistics:
+        Sent 120 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
+        backlog 0b 0p requeues 0
+        used_hw_stats delayed
+
+When handling FIB events, the rule in post act will not be deleted.
+And because the post act rule has packet reformat and modify header
+actions, also will hit the following syndromes:
+
+mlx5_core 0000:08:00.0: mlx5_cmd_out_err:829:(pid 11613): DEALLOC_MODIFY_HEADER_CONTEXT(0x941) op_mod(0x0) failed, status bad resource state(0x9), syndrome (0x1ab444), err(-22)
+mlx5_core 0000:08:00.0: mlx5_cmd_out_err:829:(pid 11613): DEALLOC_PACKET_REFORMAT_CONTEXT(0x93e) op_mod(0x0) failed, status bad resource state(0x9), syndrome (0x179e84), err(-22)
+
+Fix it by unoffloading post act rule when handling FIB events.
+
+Fixes: 314e1105831b ("net/mlx5e: Add post act offload/unoffload API")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+@@ -1461,10 +1461,12 @@ static void mlx5e_invalidate_encap(struc
+               attr = mlx5e_tc_get_encap_attr(flow);
+               esw_attr = attr->esw_attr;
+ 
+-              if (flow_flag_test(flow, SLOW))
++              if (flow_flag_test(flow, SLOW)) {
+                       mlx5e_tc_unoffload_from_slow_path(esw, flow);
+-              else
++              } else {
+                       mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
++                      mlx5e_tc_unoffload_flow_post_acts(flow);
++              }
+ 
+               mlx5e_tc_detach_mod_hdr(priv, flow, attr);
+               attr->modify_hdr = NULL;
diff --git a/queue-6.4/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch b/queue-6.4/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch

new file mode 100644 (file)

index 0000000..1b75042
--- /dev/null
+++ b/queue-6.4/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch
@@ -0,0 +1,32 @@
+From d7791cec2304aea22eb2ada944e4d467302f5bfe Mon Sep 17 00:00:00 2001
+From: Li Yang <leoyang.li@nxp.com>
+Date: Wed, 2 Aug 2023 14:13:47 -0500
+Subject: net: phy: at803x: remove set/get wol callbacks for AR8032
+
+From: Li Yang <leoyang.li@nxp.com>
+
+commit d7791cec2304aea22eb2ada944e4d467302f5bfe upstream.
+
+Since the AR8032 part does not support wol, remove related callbacks
+from it.
+
+Fixes: 5800091a2061 ("net: phy: at803x: add support for AR8032 PHY")
+Signed-off-by: Li Yang <leoyang.li@nxp.com>
+Cc: David Bauer <mail@david-bauer.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/at803x.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/net/phy/at803x.c
++++ b/drivers/net/phy/at803x.c
+@@ -2086,8 +2086,6 @@ static struct phy_driver at803x_driver[]
+       .flags                  = PHY_POLL_CABLE_TEST,
+       .config_init            = at803x_config_init,
+       .link_change_notify     = at803x_link_change_notify,
+-      .set_wol                = at803x_set_wol,
+-      .get_wol                = at803x_get_wol,
+       .suspend                = at803x_suspend,
+       .resume                 = at803x_resume,
+       /* PHY_BASIC_FEATURES */
diff --git a/queue-6.4/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch b/queue-6.4/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch

new file mode 100644 (file)

index 0000000..65dc802
--- /dev/null
+++ b/queue-6.4/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch
@@ -0,0 +1,128 @@
+From 8743aeff5bc4dcb5b87b43765f48d5ac3ad7dd9f Mon Sep 17 00:00:00 2001
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Tue, 8 Aug 2023 10:52:33 +0300
+Subject: nexthop: Fix infinite nexthop bucket dump when using maximum nexthop ID
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+commit 8743aeff5bc4dcb5b87b43765f48d5ac3ad7dd9f upstream.
+
+A netlink dump callback can return a positive number to signal that more
+information needs to be dumped or zero to signal that the dump is
+complete. In the second case, the core netlink code will append the
+NLMSG_DONE message to the skb in order to indicate to user space that
+the dump is complete.
+
+The nexthop bucket dump callback always returns a positive number if
+nexthop buckets were filled in the provided skb, even if the dump is
+complete. This means that a dump will span at least two recvmsg() calls
+as long as nexthop buckets are present. In the last recvmsg() call the
+dump callback will not fill in any nexthop buckets because the previous
+call indicated that the dump should restart from the last dumped nexthop
+ID plus one.
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id 10 group 1 type resilient buckets 2
+ # strace -e sendto,recvmsg -s 5 ip nexthop bucket
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOPBUCKET, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691396980, nlmsg_pid=0}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 128
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396980, nlmsg_pid=347}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], [{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396980, nlmsg_pid=347}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 128
+ id 10 index 0 idle_time 6.66 nhid 1
+ id 10 index 1 idle_time 6.66 nhid 1
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 20
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396980, nlmsg_pid=347}, 0], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
+ +++ exited with 0 +++
+
+This behavior is both inefficient and buggy. If the last nexthop to be
+dumped had the maximum ID of 0xffffffff, then the dump will restart from
+0 (0xffffffff + 1) and never end:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id $((2**32-1)) group 1 type resilient buckets 2
+ # ip nexthop bucket
+ id 4294967295 index 0 idle_time 5.55 nhid 1
+ id 4294967295 index 1 idle_time 5.55 nhid 1
+ id 4294967295 index 0 idle_time 5.55 nhid 1
+ id 4294967295 index 1 idle_time 5.55 nhid 1
+ [...]
+
+Fix by adjusting the dump callback to return zero when the dump is
+complete. After the fix only one recvmsg() call is made and the
+NLMSG_DONE message is appended to the RTM_NEWNEXTHOPBUCKET responses:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id $((2**32-1)) group 1 type resilient buckets 2
+ # strace -e sendto,recvmsg -s 5 ip nexthop bucket
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOPBUCKET, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691396737, nlmsg_pid=0}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 148
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396737, nlmsg_pid=350}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], [{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396737, nlmsg_pid=350}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], [{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396737, nlmsg_pid=350}, 0]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 148
+ id 4294967295 index 0 idle_time 6.61 nhid 1
+ id 4294967295 index 1 idle_time 6.61 nhid 1
+ +++ exited with 0 +++
+
+Note that if the NLMSG_DONE message cannot be appended because of size
+limitations, then another recvmsg() will be needed, but the core netlink
+code will not invoke the dump callback and simply reply with a
+NLMSG_DONE message since it knows that the callback previously returned
+zero.
+
+Add a test that fails before the fix:
+
+ # ./fib_nexthops.sh -t basic_res
+ [...]
+ TEST: Maximum nexthop ID dump                                       [FAIL]
+ [...]
+
+And passes after it:
+
+ # ./fib_nexthops.sh -t basic_res
+ [...]
+ TEST: Maximum nexthop ID dump                                       [ OK ]
+ [...]
+
+Fixes: 8a1bbabb034d ("nexthop: Add netlink handlers for bucket dump")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230808075233.3337922-4-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/nexthop.c                          |    6 +-----
+ tools/testing/selftests/net/fib_nexthops.sh |    5 +++++
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -3424,13 +3424,9 @@ static int rtm_dump_nexthop_bucket(struc
+ 
+       if (err < 0) {
+               if (likely(skb->len))
+-                      goto out;
+-              goto out_err;
++                      err = skb->len;
+       }
+ 
+-out:
+-      err = skb->len;
+-out_err:
+       cb->seq = net->nexthop.seq;
+       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+       return err;
+--- a/tools/testing/selftests/net/fib_nexthops.sh
++++ b/tools/testing/selftests/net/fib_nexthops.sh
+@@ -2206,6 +2206,11 @@ basic_res()
+       run_cmd "$IP nexthop bucket list fdb"
+       log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+ 
++      # Dump should not loop endlessly when maximum nexthop ID is configured.
++      run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
++      run_cmd "timeout 5 $IP nexthop bucket"
++      log_test $? 0 "Maximum nexthop ID dump"
++
+       #
+       # resilient nexthop buckets get requests
+       #
diff --git a/queue-6.4/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch b/queue-6.4/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch

new file mode 100644 (file)

index 0000000..f401b00
--- /dev/null
+++ b/queue-6.4/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch
@@ -0,0 +1,119 @@
+From 913f60cacda73ccac8eead94983e5884c03e04cd Mon Sep 17 00:00:00 2001
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Tue, 8 Aug 2023 10:52:31 +0300
+Subject: nexthop: Fix infinite nexthop dump when using maximum nexthop ID
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+commit 913f60cacda73ccac8eead94983e5884c03e04cd upstream.
+
+A netlink dump callback can return a positive number to signal that more
+information needs to be dumped or zero to signal that the dump is
+complete. In the second case, the core netlink code will append the
+NLMSG_DONE message to the skb in order to indicate to user space that
+the dump is complete.
+
+The nexthop dump callback always returns a positive number if nexthops
+were filled in the provided skb, even if the dump is complete. This
+means that a dump will span at least two recvmsg() calls as long as
+nexthops are present. In the last recvmsg() call the dump callback will
+not fill in any nexthops because the previous call indicated that the
+dump should restart from the last dumped nexthop ID plus one.
+
+ # ip nexthop add id 1 blackhole
+ # strace -e sendto,recvmsg -s 5 ip nexthop
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691394315, nlmsg_pid=0}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 36
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=36, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394315, nlmsg_pid=343}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 1], {nla_len=4, nla_type=NHA_BLACKHOLE}]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
+ id 1 blackhole
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 20
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394315, nlmsg_pid=343}, 0], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
+ +++ exited with 0 +++
+
+This behavior is both inefficient and buggy. If the last nexthop to be
+dumped had the maximum ID of 0xffffffff, then the dump will restart from
+0 (0xffffffff + 1) and never end:
+
+ # ip nexthop add id $((2**32-1)) blackhole
+ # ip nexthop
+ id 4294967295 blackhole
+ id 4294967295 blackhole
+ [...]
+
+Fix by adjusting the dump callback to return zero when the dump is
+complete. After the fix only one recvmsg() call is made and the
+NLMSG_DONE message is appended to the RTM_NEWNEXTHOP response:
+
+ # ip nexthop add id $((2**32-1)) blackhole
+ # strace -e sendto,recvmsg -s 5 ip nexthop
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691394080, nlmsg_pid=0}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 56
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=36, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394080, nlmsg_pid=342}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 4294967295], {nla_len=4, nla_type=NHA_BLACKHOLE}]], [{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394080, nlmsg_pid=342}, 0]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 56
+ id 4294967295 blackhole
+ +++ exited with 0 +++
+
+Note that if the NLMSG_DONE message cannot be appended because of size
+limitations, then another recvmsg() will be needed, but the core netlink
+code will not invoke the dump callback and simply reply with a
+NLMSG_DONE message since it knows that the callback previously returned
+zero.
+
+Add a test that fails before the fix:
+
+ # ./fib_nexthops.sh -t basic
+ [...]
+ TEST: Maximum nexthop ID dump                                       [FAIL]
+ [...]
+
+And passes after it:
+
+ # ./fib_nexthops.sh -t basic
+ [...]
+ TEST: Maximum nexthop ID dump                                       [ OK ]
+ [...]
+
+Fixes: ab84be7e54fc ("net: Initial nexthop code")
+Reported-by: Petr Machata <petrm@nvidia.com>
+Closes: https://lore.kernel.org/netdev/87sf91enuf.fsf@nvidia.com/
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230808075233.3337922-2-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/nexthop.c                          |    6 +-----
+ tools/testing/selftests/net/fib_nexthops.sh |    5 +++++
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -3221,13 +3221,9 @@ static int rtm_dump_nexthop(struct sk_bu
+                                    &rtm_dump_nexthop_cb, &filter);
+       if (err < 0) {
+               if (likely(skb->len))
+-                      goto out;
+-              goto out_err;
++                      err = skb->len;
+       }
+ 
+-out:
+-      err = skb->len;
+-out_err:
+       cb->seq = net->nexthop.seq;
+       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+       return err;
+--- a/tools/testing/selftests/net/fib_nexthops.sh
++++ b/tools/testing/selftests/net/fib_nexthops.sh
+@@ -1981,6 +1981,11 @@ basic()
+ 
+       run_cmd "$IP link set dev lo up"
+ 
++      # Dump should not loop endlessly when maximum nexthop ID is configured.
++      run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
++      run_cmd "timeout 5 $IP nexthop"
++      log_test $? 0 "Maximum nexthop ID dump"
++
+       #
+       # groups
+       #
diff --git a/queue-6.4/nexthop-make-nexthop-bucket-dump-more-efficient.patch b/queue-6.4/nexthop-make-nexthop-bucket-dump-more-efficient.patch

new file mode 100644 (file)

index 0000000..cb3c7d8
--- /dev/null
+++ b/queue-6.4/nexthop-make-nexthop-bucket-dump-more-efficient.patch
@@ -0,0 +1,96 @@
+From f10d3d9df49d9e6ee244fda6ca264f901a9c5d85 Mon Sep 17 00:00:00 2001
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Tue, 8 Aug 2023 10:52:32 +0300
+Subject: nexthop: Make nexthop bucket dump more efficient
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+commit f10d3d9df49d9e6ee244fda6ca264f901a9c5d85 upstream.
+
+rtm_dump_nexthop_bucket_nh() is used to dump nexthop buckets belonging
+to a specific resilient nexthop group. The function returns a positive
+return code (the skb length) upon both success and failure.
+
+The above behavior is problematic. When a complete nexthop bucket dump
+is requested, the function that walks the different nexthops treats the
+non-zero return code as an error. This causes buckets belonging to
+different resilient nexthop groups to be dumped using different buffers
+even if they can all fit in the same buffer:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id 10 group 1 type resilient buckets 1
+ # ip nexthop add id 20 group 1 type resilient buckets 1
+ # strace -e recvmsg -s 0 ip nexthop bucket
+ [...]
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[...], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 64
+ id 10 index 0 idle_time 10.27 nhid 1
+ [...]
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[...], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 64
+ id 20 index 0 idle_time 6.44 nhid 1
+ [...]
+
+Fix by only returning a non-zero return code when an error occurred and
+restarting the dump from the bucket index we failed to fill in. This
+allows buckets belonging to different resilient nexthop groups to be
+dumped using the same buffer:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id 10 group 1 type resilient buckets 1
+ # ip nexthop add id 20 group 1 type resilient buckets 1
+ # strace -e recvmsg -s 0 ip nexthop bucket
+ [...]
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[...], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 128
+ id 10 index 0 idle_time 30.21 nhid 1
+ id 20 index 0 idle_time 26.7 nhid 1
+ [...]
+
+While this change is more of a performance improvement change than an
+actual bug fix, it is a prerequisite for a subsequent patch that does
+fix a bug.
+
+Fixes: 8a1bbabb034d ("nexthop: Add netlink handlers for bucket dump")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230808075233.3337922-3-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/nexthop.c |   16 +++++-----------
+ 1 file changed, 5 insertions(+), 11 deletions(-)
+
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -3363,25 +3363,19 @@ static int rtm_dump_nexthop_bucket_nh(st
+                   dd->filter.res_bucket_nh_id != nhge->nh->id)
+                       continue;
+ 
++              dd->ctx->bucket_index = bucket_index;
+               err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+                                        RTM_NEWNEXTHOPBUCKET, portid,
+                                        cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                        cb->extack);
+-              if (err < 0) {
+-                      if (likely(skb->len))
+-                              goto out;
+-                      goto out_err;
+-              }
++              if (err)
++                      return err;
+       }
+ 
+       dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
+-      bucket_index = 0;
++      dd->ctx->bucket_index = 0;
+ 
+-out:
+-      err = skb->len;
+-out_err:
+-      dd->ctx->bucket_index = bucket_index;
+-      return err;
++      return 0;
+ }
+ 
+ static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
diff --git a/queue-6.4/series b/queue-6.4/series

index 8307624ec3307e63ea088899dc3beba0d207b6e3..5ec0fa959977fe93be919f99d175b2a1a3f8d6dd 100644 (file)
--- a/queue-6.4/series
+++ b/queue-6.4/series
@@ -152,3 +152,29 @@ rdma-bnxt_re-properly-order-ib_device_unalloc-to-avoid-uaf.patch
  rdma-bnxt_re-fix-error-handling-in-probe-failure-path.patch
  net-tls-avoid-discarding-data-on-record-close.patch
  net-marvell-prestera-fix-handling-ipv4-routes-with-nhid.patch
+net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch
+net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch
+net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch
+net-hns3-add-wait-until-mac-link-down.patch
+net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch
+net-enetc-reimplement-rfs-rss-memory-clearing-as-pci-quirk.patch
+nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch
+nexthop-make-nexthop-bucket-dump-more-efficient.patch
+nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch
+net-hns3-fix-strscpy-causing-content-truncation-issue.patch
+dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch
+dmaengine-idxd-clear-prs-disable-flag-when-disabling-idxd-device.patch
+dmaengine-owl-dma-modify-mismatched-function-name.patch
+net-mlx5e-take-rtnl-lock-when-needed-before-calling-xdp_set_features.patch
+net-mlx5e-tc-fix-internal-port-memory-leak.patch
+net-mlx5-dr-fix-wrong-allocation-of-modify-hdr-pattern.patch
+net-mlx5-allow-0-for-total-host-vfs.patch
+net-mlx5e-unoffload-post-act-rule-when-handling-fib-events.patch
+net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch
+net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch
+net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch
+ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch
+ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch
+ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch
+ibmvnic-do-partial-reset-on-login-failure.patch
+ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 12 Aug 2023 18:46:40 +0000 (20:46 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 12 Aug 2023 18:46:40 +0000 (20:46 +0200)
queue-6.4/dmaengine-idxd-clear-prs-disable-flag-when-disabling-idxd-device.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/dmaengine-owl-dma-modify-mismatched-function-name.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ibmvnic-do-partial-reset-on-login-failure.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-enetc-reimplement-rfs-rss-memory-clearing-as-pci-quirk.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-hns3-add-wait-until-mac-link-down.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-hns3-fix-strscpy-causing-content-truncation-issue.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-allow-0-for-total-host-vfs.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-dr-fix-wrong-allocation-of-modify-hdr-pattern.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-take-rtnl-lock-when-needed-before-calling-xdp_set_features.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-tc-fix-internal-port-memory-leak.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-unoffload-post-act-rule-when-handling-fib-events.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/nexthop-make-nexthop-bucket-dump-more-efficient.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/series		patch \| blob \| blame \| history