]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Aug 2023 18:53:26 +0000 (20:53 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Aug 2023 18:53:26 +0000 (20:53 +0200)
added patches:
dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch
dmaengine-owl-dma-modify-mismatched-function-name.patch
gpio-sim-mark-the-gpio-chip-as-a-one-that-can-sleep.patch
gpio-ws16c48-fix-off-by-one-error-in-ws16c48-resource-region-extent.patch
ibmvnic-do-partial-reset-on-login-failure.patch
ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch
ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch
ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch
ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch
net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch
net-hns3-add-wait-until-mac-link-down.patch
net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch
net-hns3-fix-strscpy-causing-content-truncation-issue.patch
net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch
net-mlx5-allow-0-for-total-host-vfs.patch
net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch
net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch
net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch
net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch
nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch
nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch
nexthop-make-nexthop-bucket-dump-more-efficient.patch

23 files changed:
queue-6.1/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch [new file with mode: 0644]
queue-6.1/dmaengine-owl-dma-modify-mismatched-function-name.patch [new file with mode: 0644]
queue-6.1/gpio-sim-mark-the-gpio-chip-as-a-one-that-can-sleep.patch [new file with mode: 0644]
queue-6.1/gpio-ws16c48-fix-off-by-one-error-in-ws16c48-resource-region-extent.patch [new file with mode: 0644]
queue-6.1/ibmvnic-do-partial-reset-on-login-failure.patch [new file with mode: 0644]
queue-6.1/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch [new file with mode: 0644]
queue-6.1/ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch [new file with mode: 0644]
queue-6.1/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch [new file with mode: 0644]
queue-6.1/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch [new file with mode: 0644]
queue-6.1/net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch [new file with mode: 0644]
queue-6.1/net-hns3-add-wait-until-mac-link-down.patch [new file with mode: 0644]
queue-6.1/net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch [new file with mode: 0644]
queue-6.1/net-hns3-fix-strscpy-causing-content-truncation-issue.patch [new file with mode: 0644]
queue-6.1/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch [new file with mode: 0644]
queue-6.1/net-mlx5-allow-0-for-total-host-vfs.patch [new file with mode: 0644]
queue-6.1/net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch [new file with mode: 0644]
queue-6.1/net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch [new file with mode: 0644]
queue-6.1/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch [new file with mode: 0644]
queue-6.1/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch [new file with mode: 0644]
queue-6.1/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch [new file with mode: 0644]
queue-6.1/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch [new file with mode: 0644]
queue-6.1/nexthop-make-nexthop-bucket-dump-more-efficient.patch [new file with mode: 0644]
queue-6.1/series

diff --git a/queue-6.1/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch b/queue-6.1/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch
new file mode 100644 (file)
index 0000000..1e6d960
--- /dev/null
@@ -0,0 +1,61 @@
+From 0a46781c89dece85386885a407244ca26e5c1c44 Mon Sep 17 00:00:00 2001
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Date: Wed, 12 Jul 2023 18:26:45 +0530
+Subject: dmaengine: mcf-edma: Fix a potential un-allocated memory access
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+commit 0a46781c89dece85386885a407244ca26e5c1c44 upstream.
+
+When 'mcf_edma' is allocated, some space is allocated for a
+flexible array at the end of the struct. 'chans' item are allocated, that is
+to say 'pdata->dma_channels'.
+
+Then, this number of item is stored in 'mcf_edma->n_chans'.
+
+A few lines later, if 'mcf_edma->n_chans' is 0, then a default value of 64
+is set.
+
+This ends to no space allocated by devm_kzalloc() because chans was 0, but
+64 items are read and/or written in some not allocated memory.
+
+Change the logic to define a default value before allocating the memory.
+
+Fixes: e7a3ff92eaf1 ("dmaengine: fsl-edma: add ColdFire mcf5441x edma support")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Link: https://lore.kernel.org/r/f55d914407c900828f6fad3ea5fa791a5f17b9a4.1685172449.git.christophe.jaillet@wanadoo.fr
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/mcf-edma.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/drivers/dma/mcf-edma.c
++++ b/drivers/dma/mcf-edma.c
+@@ -191,7 +191,13 @@ static int mcf_edma_probe(struct platfor
+               return -EINVAL;
+       }
+-      chans = pdata->dma_channels;
++      if (!pdata->dma_channels) {
++              dev_info(&pdev->dev, "setting default channel number to 64");
++              chans = 64;
++      } else {
++              chans = pdata->dma_channels;
++      }
++
+       len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
+       mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+       if (!mcf_edma)
+@@ -203,11 +209,6 @@ static int mcf_edma_probe(struct platfor
+       mcf_edma->drvdata = &mcf_data;
+       mcf_edma->big_endian = 1;
+-      if (!mcf_edma->n_chans) {
+-              dev_info(&pdev->dev, "setting default channel number to 64");
+-              mcf_edma->n_chans = 64;
+-      }
+-
+       mutex_init(&mcf_edma->fsl_edma_mutex);
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/queue-6.1/dmaengine-owl-dma-modify-mismatched-function-name.patch b/queue-6.1/dmaengine-owl-dma-modify-mismatched-function-name.patch
new file mode 100644 (file)
index 0000000..3def81b
--- /dev/null
@@ -0,0 +1,34 @@
+From 74d7221c1f9c9f3a8c316a3557ca7dca8b99d14c Mon Sep 17 00:00:00 2001
+From: Zhang Jianhua <chris.zjh@huawei.com>
+Date: Sat, 22 Jul 2023 15:32:44 +0000
+Subject: dmaengine: owl-dma: Modify mismatched function name
+
+From: Zhang Jianhua <chris.zjh@huawei.com>
+
+commit 74d7221c1f9c9f3a8c316a3557ca7dca8b99d14c upstream.
+
+No functional modification involved.
+
+drivers/dma/owl-dma.c:208: warning: expecting prototype for struct owl_dma_pchan. Prototype was for struct owl_dma_vchan instead HDRTEST usr/include/sound/asequencer.h
+
+Fixes: 47e20577c24d ("dmaengine: Add Actions Semi Owl family S900 DMA driver")
+Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
+Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
+Link: https://lore.kernel.org/r/20230722153244.2086949-1-chris.zjh@huawei.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/owl-dma.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/dma/owl-dma.c
++++ b/drivers/dma/owl-dma.c
+@@ -192,7 +192,7 @@ struct owl_dma_pchan {
+ };
+ /**
+- * struct owl_dma_pchan - Wrapper for DMA ENGINE channel
++ * struct owl_dma_vchan - Wrapper for DMA ENGINE channel
+  * @vc: wrapped virtual channel
+  * @pchan: the physical channel utilized by this channel
+  * @txd: active transaction on this channel
diff --git a/queue-6.1/gpio-sim-mark-the-gpio-chip-as-a-one-that-can-sleep.patch b/queue-6.1/gpio-sim-mark-the-gpio-chip-as-a-one-that-can-sleep.patch
new file mode 100644 (file)
index 0000000..69b9b61
--- /dev/null
@@ -0,0 +1,32 @@
+From 5a78d5db9c90c9dc84212f40a5f2687b7cafc8ec Mon Sep 17 00:00:00 2001
+From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Date: Tue, 1 Aug 2023 21:09:51 +0200
+Subject: gpio: sim: mark the GPIO chip as a one that can sleep
+
+From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+
+commit 5a78d5db9c90c9dc84212f40a5f2687b7cafc8ec upstream.
+
+Simulated chips use a mutex for synchronization in driver callbacks so
+they must not be called from interrupt context. Set the can_sleep field
+of the GPIO chip to true to force users to only use threaded irqs.
+
+Fixes: cb8c474e79be ("gpio: sim: new testing module")
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpio/gpio-sim.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpio/gpio-sim.c
++++ b/drivers/gpio/gpio-sim.c
+@@ -425,6 +425,7 @@ static int gpio_sim_add_bank(struct fwno
+       gc->set_config = gpio_sim_set_config;
+       gc->to_irq = gpio_sim_to_irq;
+       gc->free = gpio_sim_free;
++      gc->can_sleep = true;
+       ret = devm_gpiochip_add_data(dev, gc, chip);
+       if (ret)
diff --git a/queue-6.1/gpio-ws16c48-fix-off-by-one-error-in-ws16c48-resource-region-extent.patch b/queue-6.1/gpio-ws16c48-fix-off-by-one-error-in-ws16c48-resource-region-extent.patch
new file mode 100644 (file)
index 0000000..36f001f
--- /dev/null
@@ -0,0 +1,38 @@
+From 33f83d13ded164cd49ce2a3bd2770115abc64e6f Mon Sep 17 00:00:00 2001
+From: William Breathitt Gray <william.gray@linaro.org>
+Date: Thu, 10 Aug 2023 18:00:44 -0400
+Subject: gpio: ws16c48: Fix off-by-one error in WS16C48 resource region extent
+
+From: William Breathitt Gray <william.gray@linaro.org>
+
+commit 33f83d13ded164cd49ce2a3bd2770115abc64e6f upstream.
+
+The WinSystems WS16C48 I/O address region spans offsets 0x0 through 0xA,
+which is a total of 11 bytes. Fix the WS16C48_EXTENT define to the
+correct value of 11 so that access to necessary device registers is
+properly requested in the ws16c48_probe() callback by the
+devm_request_region() function call.
+
+Fixes: 2c05a0f29f41 ("gpio: ws16c48: Implement and utilize register structures")
+Cc: stable@vger.kernel.org
+Cc: Paul Demetrotion <pdemetrotion@winsystems.com>
+Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpio/gpio-ws16c48.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpio/gpio-ws16c48.c
++++ b/drivers/gpio/gpio-ws16c48.c
+@@ -18,7 +18,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/types.h>
+-#define WS16C48_EXTENT 10
++#define WS16C48_EXTENT 11
+ #define MAX_NUM_WS16C48 max_num_isa_dev(WS16C48_EXTENT)
+ static unsigned int base[MAX_NUM_WS16C48];
diff --git a/queue-6.1/ibmvnic-do-partial-reset-on-login-failure.patch b/queue-6.1/ibmvnic-do-partial-reset-on-login-failure.patch
new file mode 100644 (file)
index 0000000..1d70eb6
--- /dev/null
@@ -0,0 +1,113 @@
+From 23cc5f667453ca7645a24c8d21bf84dbf61107b2 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:37 -0500
+Subject: ibmvnic: Do partial reset on login failure
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit 23cc5f667453ca7645a24c8d21bf84dbf61107b2 upstream.
+
+Perform a partial reset before sending a login request if any of the
+following are true:
+ 1. If a previous request times out. This can be dangerous because the
+       VIOS could still receive the old login request at any point after
+       the timeout. Therefore, it is best to re-register the CRQ's  and
+       sub-CRQ's before retrying.
+ 2. If the previous request returns an error that is not described in
+       PAPR. PAPR provides procedures if the login returns with partial
+       success or aborted return codes (section L.5.1) but other values
+       do not have a defined procedure. Previously, these conditions
+       just returned error from the login function rather than trying
+       to resolve the issue.
+       This can cause further issues since most callers of the login
+       function are not prepared to handle an error when logging in. This
+       improper cleanup can lead to the device being permanently DOWN'd.
+       For example, if the VIOS believes that the device is already logged
+       in then it will return INVALID_STATE (-7). If we never re-register
+       CRQ's then it will always think that the device is already logged
+       in. This leaves the device inoperable.
+
+The partial reset involves freeing the sub-CRQs, freeing the CRQ then
+registering and initializing a new CRQ and sub-CRQs. This essentially
+restarts all communication with VIOS to allow for a fresh login attempt
+that will be unhindered by any previous failed attempts.
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-4-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   46 ++++++++++++++++++++++++++++++++-----
+ 1 file changed, 40 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -96,6 +96,8 @@ static int pending_scrq(struct ibmvnic_a
+ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
+                                       struct ibmvnic_sub_crq_queue *);
+ static int ibmvnic_poll(struct napi_struct *napi, int data);
++static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
++static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
+ static void send_query_map(struct ibmvnic_adapter *adapter);
+ static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
+ static int send_request_unmap(struct ibmvnic_adapter *, u8);
+@@ -1336,11 +1338,9 @@ static int ibmvnic_login(struct net_devi
+               if (!wait_for_completion_timeout(&adapter->init_done,
+                                                timeout)) {
+-                      netdev_warn(netdev, "Login timed out, retrying...\n");
+-                      retry = true;
+-                      adapter->init_done_rc = 0;
+-                      retry_count++;
+-                      continue;
++                      netdev_warn(netdev, "Login timed out\n");
++                      adapter->login_pending = false;
++                      goto partial_reset;
+               }
+               if (adapter->init_done_rc == ABORTED) {
+@@ -1385,7 +1385,41 @@ static int ibmvnic_login(struct net_devi
+               } else if (adapter->init_done_rc) {
+                       netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
+                                   adapter->init_done_rc);
+-                      return -EIO;
++
++partial_reset:
++                      /* adapter login failed, so free any CRQs or sub-CRQs
++                       * and register again before attempting to login again.
++                       * If we don't do this then the VIOS may think that
++                       * we are already logged in and reject any subsequent
++                       * attempts
++                       */
++                      netdev_warn(netdev,
++                                  "Freeing and re-registering CRQs before attempting to login again\n");
++                      retry = true;
++                      adapter->init_done_rc = 0;
++                      retry_count++;
++                      release_sub_crqs(adapter, true);
++                      reinit_init_done(adapter);
++                      release_crq_queue(adapter);
++                      /* If we don't sleep here then we risk an unnecessary
++                       * failover event from the VIOS. This is a known VIOS
++                       * issue caused by a vnic device freeing and registering
++                       * a CRQ too quickly.
++                       */
++                      msleep(1500);
++                      rc = init_crq_queue(adapter);
++                      if (rc) {
++                              netdev_err(netdev, "login recovery: init CRQ failed %d\n",
++                                         rc);
++                              return -EIO;
++                      }
++
++                      rc = ibmvnic_reset_init(adapter, false);
++                      if (rc) {
++                              netdev_err(netdev, "login recovery: Reset init failed %d\n",
++                                         rc);
++                              return -EIO;
++                      }
+               }
+       } while (retry);
diff --git a/queue-6.1/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch b/queue-6.1/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch
new file mode 100644 (file)
index 0000000..2a698e7
--- /dev/null
@@ -0,0 +1,77 @@
+From db17ba719bceb52f0ae4ebca0e4c17d9a3bebf05 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:34 -0500
+Subject: ibmvnic: Enforce stronger sanity checks on login response
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit db17ba719bceb52f0ae4ebca0e4c17d9a3bebf05 upstream.
+
+Ensure that all offsets in a login response buffer are within the size
+of the allocated response buffer. Any offsets or lengths that surpass
+the allocation are likely the result of an incomplete response buffer.
+In these cases, a full reset is necessary.
+
+When attempting to login, the ibmvnic device will allocate a response
+buffer and pass a reference to the VIOS. The VIOS will then send the
+ibmvnic device a LOGIN_RSP CRQ to signal that the buffer has been filled
+with data. If the ibmvnic device does not get a response in 20 seconds,
+the old buffer is freed and a new login request is sent. With 2
+outstanding requests, any LOGIN_RSP CRQ's could be for the older
+login request. If this is the case then the login response buffer (which
+is for the newer login request) could be incomplete and contain invalid
+data. Therefore, we must enforce strict sanity checks on the response
+buffer values.
+
+Testing has shown that the `off_rxadd_buff_size` value is filled in last
+by the VIOS and will be the smoking gun for these circumstances.
+
+Until VIOS can implement a mechanism for tracking outstanding response
+buffers and a method for mapping a LOGIN_RSP CRQ to a particular login
+response buffer, the best ibmvnic can do in this situation is perform a
+full reset.
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-1-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -5192,6 +5192,7 @@ static int handle_login_rsp(union ibmvni
+       int num_tx_pools;
+       int num_rx_pools;
+       u64 *size_array;
++      u32 rsp_len;
+       int i;
+       /* CHECK: Test/set of login_pending does not need to be atomic
+@@ -5243,6 +5244,23 @@ static int handle_login_rsp(union ibmvni
+               ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+               return -EIO;
+       }
++
++      rsp_len = be32_to_cpu(login_rsp->len);
++      if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
++          rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
++          rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
++          rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
++          rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
++              /* This can happen if a login request times out and there are
++               * 2 outstanding login requests sent, the LOGIN_RSP crq
++               * could have been for the older login request. So we are
++               * parsing the newer response buffer which may be incomplete
++               */
++              dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
++              ibmvnic_reset(adapter, VNIC_RESET_FATAL);
++              return -EIO;
++      }
++
+       size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+               be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
+       /* variable buffer sizes are not supported, so just read the
diff --git a/queue-6.1/ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch b/queue-6.1/ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch
new file mode 100644 (file)
index 0000000..c5dc550
--- /dev/null
@@ -0,0 +1,145 @@
+From 6db541ae279bd4e76dbd939e5fbf298396166242 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:38 -0500
+Subject: ibmvnic: Ensure login failure recovery is safe from other resets
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit 6db541ae279bd4e76dbd939e5fbf298396166242 upstream.
+
+If a login request fails, the recovery process should be protected
+against parallel resets. It is a known issue that freeing and
+registering CRQ's in quick succession can result in a failover CRQ from
+the VIOS. Processing a failover during login recovery is dangerous for
+two reasons:
+ 1. This will result in two parallel initialization processes, this can
+ cause serious issues during login.
+ 2. It is possible that the failover CRQ is received but never executed.
+ We get notified of a pending failover through a transport event CRQ.
+ The reset is not performed until a INIT CRQ request is received.
+ Previously, if CRQ init fails during login recovery, then the ibmvnic
+ irq is freed and the login process returned error. If failover_pending
+ is true (a transport event was received), then the ibmvnic device
+ would never be able to process the reset since it cannot receive the
+ CRQ_INIT request due to the irq being freed. This leaved the device
+ in a inoperable state.
+
+Therefore, the login failure recovery process must be hardened against
+these possible issues. Possible failovers (due to quick CRQ free and
+init) must be avoided and any issues during re-initialization should be
+dealt with instead of being propagated up the stack. This logic is
+similar to that of ibmvnic_probe().
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-5-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   70 +++++++++++++++++++++++++------------
+ 1 file changed, 48 insertions(+), 22 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -115,6 +115,7 @@ static void ibmvnic_tx_scrq_clean_buffer
+ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
+                               struct ibmvnic_long_term_buff *ltb);
+ static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
++static void flush_reset_queue(struct ibmvnic_adapter *adapter);
+ struct ibmvnic_stat {
+       char name[ETH_GSTRING_LEN];
+@@ -1316,8 +1317,8 @@ static const char *adapter_state_to_stri
+ static int ibmvnic_login(struct net_device *netdev)
+ {
++      unsigned long flags, timeout = msecs_to_jiffies(20000);
+       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+-      unsigned long timeout = msecs_to_jiffies(20000);
+       int retry_count = 0;
+       int retries = 10;
+       bool retry;
+@@ -1382,6 +1383,7 @@ static int ibmvnic_login(struct net_devi
+                                           "SCRQ irq initialization failed\n");
+                               return rc;
+                       }
++              /* Default/timeout error handling, reset and start fresh */
+               } else if (adapter->init_done_rc) {
+                       netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
+                                   adapter->init_done_rc);
+@@ -1397,29 +1399,53 @@ partial_reset:
+                                   "Freeing and re-registering CRQs before attempting to login again\n");
+                       retry = true;
+                       adapter->init_done_rc = 0;
+-                      retry_count++;
+                       release_sub_crqs(adapter, true);
+-                      reinit_init_done(adapter);
+-                      release_crq_queue(adapter);
+-                      /* If we don't sleep here then we risk an unnecessary
+-                       * failover event from the VIOS. This is a known VIOS
+-                       * issue caused by a vnic device freeing and registering
+-                       * a CRQ too quickly.
++                      /* Much of this is similar logic as ibmvnic_probe(),
++                       * we are essentially re-initializing communication
++                       * with the server. We really should not run any
++                       * resets/failovers here because this is already a form
++                       * of reset and we do not want parallel resets occurring
+                        */
+-                      msleep(1500);
+-                      rc = init_crq_queue(adapter);
+-                      if (rc) {
+-                              netdev_err(netdev, "login recovery: init CRQ failed %d\n",
+-                                         rc);
+-                              return -EIO;
+-                      }
+-
+-                      rc = ibmvnic_reset_init(adapter, false);
+-                      if (rc) {
+-                              netdev_err(netdev, "login recovery: Reset init failed %d\n",
+-                                         rc);
+-                              return -EIO;
+-                      }
++                      do {
++                              reinit_init_done(adapter);
++                              /* Clear any failovers we got in the previous
++                               * pass since we are re-initializing the CRQ
++                               */
++                              adapter->failover_pending = false;
++                              release_crq_queue(adapter);
++                              /* If we don't sleep here then we risk an
++                               * unnecessary failover event from the VIOS.
++                               * This is a known VIOS issue caused by a vnic
++                               * device freeing and registering a CRQ too
++                               * quickly.
++                               */
++                              msleep(1500);
++                              /* Avoid any resets, since we are currently
++                               * resetting.
++                               */
++                              spin_lock_irqsave(&adapter->rwi_lock, flags);
++                              flush_reset_queue(adapter);
++                              spin_unlock_irqrestore(&adapter->rwi_lock,
++                                                     flags);
++
++                              rc = init_crq_queue(adapter);
++                              if (rc) {
++                                      netdev_err(netdev, "login recovery: init CRQ failed %d\n",
++                                                 rc);
++                                      return -EIO;
++                              }
++
++                              rc = ibmvnic_reset_init(adapter, false);
++                              if (rc)
++                                      netdev_err(netdev, "login recovery: Reset init failed %d\n",
++                                                 rc);
++                              /* IBMVNIC_CRQ_INIT will return EAGAIN if it
++                               * fails, since ibmvnic_reset_init will free
++                               * irq's in failure, we won't be able to receive
++                               * new CRQs so we need to keep trying. probe()
++                               * handles this similarly.
++                               */
++                      } while (rc == -EAGAIN && retry_count++ < retries);
+               }
+       } while (retry);
diff --git a/queue-6.1/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch b/queue-6.1/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch
new file mode 100644 (file)
index 0000000..1f9320e
--- /dev/null
@@ -0,0 +1,73 @@
+From d78a671eb8996af19d6311ecdee9790d2fa479f0 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:36 -0500
+Subject: ibmvnic: Handle DMA unmapping of login buffs in release functions
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit d78a671eb8996af19d6311ecdee9790d2fa479f0 upstream.
+
+Rather than leaving the DMA unmapping of the login buffers to the
+login response handler, move this work into the login release functions.
+Previously, these functions were only used for freeing the allocated
+buffers. This could lead to issues if there are more than one
+outstanding login buffer requests, which is possible if a login request
+times out.
+
+If a login request times out, then there is another call to send login.
+The send login function makes a call to the login buffer release
+function. In the past, this freed the buffers but did not DMA unmap.
+Therefore, the VIOS could still write to the old login (now freed)
+buffer. It is for this reason that it is a good idea to leave the DMA
+unmap call to the login buffers release function.
+
+Since the login buffer release functions now handle DMA unmapping,
+remove the duplicate DMA unmapping in handle_login_rsp().
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-3-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -1397,12 +1397,22 @@ static int ibmvnic_login(struct net_devi
+ static void release_login_buffer(struct ibmvnic_adapter *adapter)
+ {
++      if (!adapter->login_buf)
++              return;
++
++      dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
++                       adapter->login_buf_sz, DMA_TO_DEVICE);
+       kfree(adapter->login_buf);
+       adapter->login_buf = NULL;
+ }
+ static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
+ {
++      if (!adapter->login_rsp_buf)
++              return;
++
++      dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
++                       adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
+       kfree(adapter->login_rsp_buf);
+       adapter->login_rsp_buf = NULL;
+ }
+@@ -5207,11 +5217,6 @@ static int handle_login_rsp(union ibmvni
+       }
+       adapter->login_pending = false;
+-      dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
+-                       DMA_TO_DEVICE);
+-      dma_unmap_single(dev, adapter->login_rsp_buf_token,
+-                       adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
+-
+       /* If the number of queues requested can't be allocated by the
+        * server, the login response will return with code 1. We will need
+        * to resend the login buffer with fewer queues requested.
diff --git a/queue-6.1/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch b/queue-6.1/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch
new file mode 100644 (file)
index 0000000..11d5135
--- /dev/null
@@ -0,0 +1,41 @@
+From 411c565b4bc63e9584a8493882bd566e35a90588 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:35 -0500
+Subject: ibmvnic: Unmap DMA login rsp buffer on send login fail
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit 411c565b4bc63e9584a8493882bd566e35a90588 upstream.
+
+If the LOGIN CRQ fails to send then we must DMA unmap the response
+buffer. Previously, if the CRQ failed then the memory was freed without
+DMA unmapping.
+
+Fixes: c98d9cc4170d ("ibmvnic: send_login should check for crq errors")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-2-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -4626,11 +4626,14 @@ static int send_login(struct ibmvnic_ada
+       if (rc) {
+               adapter->login_pending = false;
+               netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
+-              goto buf_rsp_map_failed;
++              goto buf_send_failed;
+       }
+       return 0;
++buf_send_failed:
++      dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
++                       DMA_FROM_DEVICE);
+ buf_rsp_map_failed:
+       kfree(login_rsp_buffer);
+       adapter->login_rsp_buf = NULL;
diff --git a/queue-6.1/net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch b/queue-6.1/net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch
new file mode 100644 (file)
index 0000000..2d968a2
--- /dev/null
@@ -0,0 +1,91 @@
+From a94c16a2fda010866b8858a386a8bfbeba4f72c5 Mon Sep 17 00:00:00 2001
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+Date: Thu, 3 Aug 2023 16:42:53 +0300
+Subject: net: dsa: ocelot: call dsa_tag_8021q_unregister() under rtnl_lock() on driver remove
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+commit a94c16a2fda010866b8858a386a8bfbeba4f72c5 upstream.
+
+When the tagging protocol in current use is "ocelot-8021q" and we unbind
+the driver, we see this splat:
+
+$ echo '0000:00:00.2' > /sys/bus/pci/drivers/fsl_enetc/unbind
+mscc_felix 0000:00:00.5 swp0: left promiscuous mode
+sja1105 spi2.0: Link is Down
+DSA: tree 1 torn down
+mscc_felix 0000:00:00.5 swp2: left promiscuous mode
+sja1105 spi2.2: Link is Down
+DSA: tree 3 torn down
+fsl_enetc 0000:00:00.2 eno2: left promiscuous mode
+mscc_felix 0000:00:00.5: Link is Down
+------------[ cut here ]------------
+RTNL: assertion failed at net/dsa/tag_8021q.c (409)
+WARNING: CPU: 1 PID: 329 at net/dsa/tag_8021q.c:409 dsa_tag_8021q_unregister+0x12c/0x1a0
+Modules linked in:
+CPU: 1 PID: 329 Comm: bash Not tainted 6.5.0-rc3+ #771
+pc : dsa_tag_8021q_unregister+0x12c/0x1a0
+lr : dsa_tag_8021q_unregister+0x12c/0x1a0
+Call trace:
+ dsa_tag_8021q_unregister+0x12c/0x1a0
+ felix_tag_8021q_teardown+0x130/0x150
+ felix_teardown+0x3c/0xd8
+ dsa_tree_teardown_switches+0xbc/0xe0
+ dsa_unregister_switch+0x168/0x260
+ felix_pci_remove+0x30/0x60
+ pci_device_remove+0x4c/0x100
+ device_release_driver_internal+0x188/0x288
+ device_links_unbind_consumers+0xfc/0x138
+ device_release_driver_internal+0xe0/0x288
+ device_driver_detach+0x24/0x38
+ unbind_store+0xd8/0x108
+ drv_attr_store+0x30/0x50
+---[ end trace 0000000000000000 ]---
+------------[ cut here ]------------
+RTNL: assertion failed at net/8021q/vlan_core.c (376)
+WARNING: CPU: 1 PID: 329 at net/8021q/vlan_core.c:376 vlan_vid_del+0x1b8/0x1f0
+CPU: 1 PID: 329 Comm: bash Tainted: G        W          6.5.0-rc3+ #771
+pc : vlan_vid_del+0x1b8/0x1f0
+lr : vlan_vid_del+0x1b8/0x1f0
+ dsa_tag_8021q_unregister+0x8c/0x1a0
+ felix_tag_8021q_teardown+0x130/0x150
+ felix_teardown+0x3c/0xd8
+ dsa_tree_teardown_switches+0xbc/0xe0
+ dsa_unregister_switch+0x168/0x260
+ felix_pci_remove+0x30/0x60
+ pci_device_remove+0x4c/0x100
+ device_release_driver_internal+0x188/0x288
+ device_links_unbind_consumers+0xfc/0x138
+ device_release_driver_internal+0xe0/0x288
+ device_driver_detach+0x24/0x38
+ unbind_store+0xd8/0x108
+ drv_attr_store+0x30/0x50
+DSA: tree 0 torn down
+
+This was somewhat not so easy to spot, because "ocelot-8021q" is not the
+default tagging protocol, and thus, not everyone who tests the unbinding
+path may have switched to it beforehand. The default
+felix_tag_npi_teardown() does not require rtnl_lock() to be held.
+
+Fixes: 7c83a7c539ab ("net: dsa: add a second tagger for Ocelot switches based on tag_8021q")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20230803134253.2711124-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/ocelot/felix.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/dsa/ocelot/felix.c
++++ b/drivers/net/dsa/ocelot/felix.c
+@@ -1606,8 +1606,10 @@ static void felix_teardown(struct dsa_sw
+       struct felix *felix = ocelot_to_felix(ocelot);
+       struct dsa_port *dp;
++      rtnl_lock();
+       if (felix->tag_proto_ops)
+               felix->tag_proto_ops->teardown(ds);
++      rtnl_unlock();
+       dsa_switch_for_each_available_port(dp, ds)
+               ocelot_deinit_port(ocelot, dp->index);
diff --git a/queue-6.1/net-hns3-add-wait-until-mac-link-down.patch b/queue-6.1/net-hns3-add-wait-until-mac-link-down.patch
new file mode 100644 (file)
index 0000000..d00cdc8
--- /dev/null
@@ -0,0 +1,54 @@
+From 6265e242f7b95f2c1195b42ec912b84ad161470e Mon Sep 17 00:00:00 2001
+From: Jie Wang <wangjie125@huawei.com>
+Date: Mon, 7 Aug 2023 19:34:51 +0800
+Subject: net: hns3: add wait until mac link down
+
+From: Jie Wang <wangjie125@huawei.com>
+
+commit 6265e242f7b95f2c1195b42ec912b84ad161470e upstream.
+
+In some configure flow of hns3 driver, for example, change mtu, it will
+disable MAC through firmware before configuration. But firmware disables
+MAC asynchronously. The rx traffic may be not stopped in this case.
+
+So fixes it by waiting until mac link is down.
+
+Fixes: a9775bb64aa7 ("net: hns3: fix set and get link ksettings issue")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230807113452.474224-4-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -7569,6 +7569,8 @@ static void hclge_enable_fd(struct hnae3
+ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
+ {
++#define HCLGE_LINK_STATUS_WAIT_CNT  3
++
+       struct hclge_desc desc;
+       struct hclge_config_mac_mode_cmd *req =
+               (struct hclge_config_mac_mode_cmd *)desc.data;
+@@ -7593,9 +7595,15 @@ static void hclge_cfg_mac_mode(struct hc
+       req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
+       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+-      if (ret)
++      if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "mac enable fail, ret =%d.\n", ret);
++              return;
++      }
++
++      if (!enable)
++              hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN,
++                                         HCLGE_LINK_STATUS_WAIT_CNT);
+ }
+ static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
diff --git a/queue-6.1/net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch b/queue-6.1/net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch
new file mode 100644 (file)
index 0000000..422f7ad
--- /dev/null
@@ -0,0 +1,89 @@
+From ac6257a3ae5db5193b1f19c268e4f72d274ddb88 Mon Sep 17 00:00:00 2001
+From: Yonglong Liu <liuyonglong@huawei.com>
+Date: Mon, 7 Aug 2023 19:34:52 +0800
+Subject: net: hns3: fix deadlock issue when externel_lb and reset are executed together
+
+From: Yonglong Liu <liuyonglong@huawei.com>
+
+commit ac6257a3ae5db5193b1f19c268e4f72d274ddb88 upstream.
+
+When externel_lb and reset are executed together, a deadlock may
+occur:
+[ 3147.217009] INFO: task kworker/u321:0:7 blocked for more than 120 seconds.
+[ 3147.230483] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+[ 3147.238999] task:kworker/u321:0  state:D stack:    0 pid:    7 ppid:     2 flags:0x00000008
+[ 3147.248045] Workqueue: hclge hclge_service_task [hclge]
+[ 3147.253957] Call trace:
+[ 3147.257093]  __switch_to+0x7c/0xbc
+[ 3147.261183]  __schedule+0x338/0x6f0
+[ 3147.265357]  schedule+0x50/0xe0
+[ 3147.269185]  schedule_preempt_disabled+0x18/0x24
+[ 3147.274488]  __mutex_lock.constprop.0+0x1d4/0x5dc
+[ 3147.279880]  __mutex_lock_slowpath+0x1c/0x30
+[ 3147.284839]  mutex_lock+0x50/0x60
+[ 3147.288841]  rtnl_lock+0x20/0x2c
+[ 3147.292759]  hclge_reset_prepare+0x68/0x90 [hclge]
+[ 3147.298239]  hclge_reset_subtask+0x88/0xe0 [hclge]
+[ 3147.303718]  hclge_reset_service_task+0x84/0x120 [hclge]
+[ 3147.309718]  hclge_service_task+0x2c/0x70 [hclge]
+[ 3147.315109]  process_one_work+0x1d0/0x490
+[ 3147.319805]  worker_thread+0x158/0x3d0
+[ 3147.324240]  kthread+0x108/0x13c
+[ 3147.328154]  ret_from_fork+0x10/0x18
+
+In externel_lb process, the hns3 driver call napi_disable()
+first, then the reset happen, then the restore process of the
+externel_lb will fail, and will not call napi_enable(). When
+doing externel_lb again, napi_disable() will be double call,
+cause a deadlock of rtnl_lock().
+
+This patch use the HNS3_NIC_STATE_DOWN state to protect the
+calling of napi_disable() and napi_enable() in externel_lb
+process, just as the usage in ndo_stop() and ndo_start().
+
+Fixes: 04b6ba143521 ("net: hns3: add support for external loopback test")
+Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230807113452.474224-5-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+index 9f6890059666..b7b51e56b030 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+@@ -5854,6 +5854,9 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running)
+       if (!if_running)
+               return;
++      if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state))
++              return;
++
+       netif_carrier_off(ndev);
+       netif_tx_disable(ndev);
+@@ -5882,7 +5885,16 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running)
+       if (!if_running)
+               return;
+-      hns3_nic_reset_all_ring(priv->ae_handle);
++      if (hns3_nic_resetting(ndev))
++              return;
++
++      if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
++              return;
++
++      if (hns3_nic_reset_all_ring(priv->ae_handle))
++              return;
++
++      clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
+       for (i = 0; i < priv->vector_num; i++)
+               hns3_vector_enable(&priv->tqp_vector[i]);
+-- 
+2.41.0
+
diff --git a/queue-6.1/net-hns3-fix-strscpy-causing-content-truncation-issue.patch b/queue-6.1/net-hns3-fix-strscpy-causing-content-truncation-issue.patch
new file mode 100644 (file)
index 0000000..57c20f6
--- /dev/null
@@ -0,0 +1,68 @@
+From 5e3d20617b055e725e785e0058426368269949f3 Mon Sep 17 00:00:00 2001
+From: Hao Chen <chenhao418@huawei.com>
+Date: Wed, 9 Aug 2023 10:09:02 +0800
+Subject: net: hns3: fix strscpy causing content truncation issue
+
+From: Hao Chen <chenhao418@huawei.com>
+
+commit 5e3d20617b055e725e785e0058426368269949f3 upstream.
+
+hns3_dbg_fill_content()/hclge_dbg_fill_content() is aim to integrate some
+items to a string for content, and we add '\n' and '\0' in the last
+two bytes of content.
+
+strscpy() will add '\0' in the last byte of destination buffer(one of
+items), it result in finishing content print ahead of schedule and some
+dump content truncation.
+
+One Error log shows as below:
+cat mac_list/uc
+UC MAC_LIST:
+
+Expected:
+UC MAC_LIST:
+FUNC_ID  MAC_ADDR            STATE
+pf       00:2b:19:05:03:00   ACTIVE
+
+The destination buffer is length-bounded and not required to be
+NUL-terminated, so just change strscpy() to memcpy() to fix it.
+
+Fixes: 1cf3d5567f27 ("net: hns3: fix strncpy() not using dest-buf length as length issue")
+Signed-off-by: Hao Chen <chenhao418@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Link: https://lore.kernel.org/r/20230809020902.1941471-1-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c         |    4 ++--
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c |    4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+@@ -458,9 +458,9 @@ static void hns3_dbg_fill_content(char *
+               if (result) {
+                       if (item_len < strlen(result[i]))
+                               break;
+-                      strscpy(pos, result[i], strlen(result[i]));
++                      memcpy(pos, result[i], strlen(result[i]));
+               } else {
+-                      strscpy(pos, items[i].name, strlen(items[i].name));
++                      memcpy(pos, items[i].name, strlen(items[i].name));
+               }
+               pos += item_len;
+               len -= item_len;
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+@@ -110,9 +110,9 @@ static void hclge_dbg_fill_content(char
+               if (result) {
+                       if (item_len < strlen(result[i]))
+                               break;
+-                      strscpy(pos, result[i], strlen(result[i]));
++                      memcpy(pos, result[i], strlen(result[i]));
+               } else {
+-                      strscpy(pos, items[i].name, strlen(items[i].name));
++                      memcpy(pos, items[i].name, strlen(items[i].name));
+               }
+               pos += item_len;
+               len -= item_len;
diff --git a/queue-6.1/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch b/queue-6.1/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch
new file mode 100644 (file)
index 0000000..faf5ed5
--- /dev/null
@@ -0,0 +1,73 @@
+From 08469dacfad25428b66549716811807203744f4f Mon Sep 17 00:00:00 2001
+From: Jie Wang <wangjie125@huawei.com>
+Date: Mon, 7 Aug 2023 19:34:50 +0800
+Subject: net: hns3: refactor hclge_mac_link_status_wait for interface reuse
+
+From: Jie Wang <wangjie125@huawei.com>
+
+commit 08469dacfad25428b66549716811807203744f4f upstream.
+
+Some nic configurations could only be performed after link is down. So this
+patch refactor this API for reuse.
+
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230807113452.474224-3-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -72,6 +72,8 @@ static void hclge_restore_hw_table(struc
+ static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
+ static void hclge_sync_fd_table(struct hclge_dev *hdev);
+ static void hclge_update_fec_stats(struct hclge_dev *hdev);
++static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
++                                    int wait_cnt);
+ static struct hnae3_ae_algo ae_algo;
+@@ -7656,10 +7658,9 @@ static void hclge_phy_link_status_wait(s
+       } while (++i < HCLGE_PHY_LINK_STATUS_NUM);
+ }
+-static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
++static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
++                                    int wait_cnt)
+ {
+-#define HCLGE_MAC_LINK_STATUS_NUM  100
+-
+       int link_status;
+       int i = 0;
+       int ret;
+@@ -7672,13 +7673,15 @@ static int hclge_mac_link_status_wait(st
+                       return 0;
+               msleep(HCLGE_LINK_STATUS_MS);
+-      } while (++i < HCLGE_MAC_LINK_STATUS_NUM);
++      } while (++i < wait_cnt);
+       return -EBUSY;
+ }
+ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
+                                         bool is_phy)
+ {
++#define HCLGE_MAC_LINK_STATUS_NUM  100
++
+       int link_ret;
+       link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
+@@ -7686,7 +7689,8 @@ static int hclge_mac_phy_link_status_wai
+       if (is_phy)
+               hclge_phy_link_status_wait(hdev, link_ret);
+-      return hclge_mac_link_status_wait(hdev, link_ret);
++      return hclge_mac_link_status_wait(hdev, link_ret,
++                                        HCLGE_MAC_LINK_STATUS_NUM);
+ }
+ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
diff --git a/queue-6.1/net-mlx5-allow-0-for-total-host-vfs.patch b/queue-6.1/net-mlx5-allow-0-for-total-host-vfs.patch
new file mode 100644 (file)
index 0000000..40a7816
--- /dev/null
@@ -0,0 +1,33 @@
+From 2dc2b3922d3c0f52d3a792d15dcacfbc4cc76b8f Mon Sep 17 00:00:00 2001
+From: Daniel Jurgens <danielj@nvidia.com>
+Date: Tue, 11 Jul 2023 00:28:10 +0300
+Subject: net/mlx5: Allow 0 for total host VFs
+
+From: Daniel Jurgens <danielj@nvidia.com>
+
+commit 2dc2b3922d3c0f52d3a792d15dcacfbc4cc76b8f upstream.
+
+When querying eswitch functions 0 is a valid number of host VFs. After
+introducing ARM SRIOV falling through to getting the max value from PCI
+results in using the total VFs allowed on the ARM for the host.
+
+Fixes: 86eec50beaf3 ("net/mlx5: Support querying max VFs from device");
+Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/sriov.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+@@ -264,8 +264,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_
+               host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+                                         host_params_context.host_total_vfs);
+               kvfree(out);
+-              if (host_total_vfs)
+-                      return host_total_vfs;
++              return host_total_vfs;
+       }
+ done:
diff --git a/queue-6.1/net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch b/queue-6.1/net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch
new file mode 100644 (file)
index 0000000..cb93947
--- /dev/null
@@ -0,0 +1,33 @@
+From 86ed7b773c01ba71617538b3b107c33fd9cf90b8 Mon Sep 17 00:00:00 2001
+From: Shay Drory <shayd@nvidia.com>
+Date: Sun, 30 Jul 2023 09:26:27 +0300
+Subject: net/mlx5: LAG, Check correct bucket when modifying LAG
+
+From: Shay Drory <shayd@nvidia.com>
+
+commit 86ed7b773c01ba71617538b3b107c33fd9cf90b8 upstream.
+
+Cited patch introduced buckets in hash mode, but missed to update
+the ports/bucket check when modifying LAG.
+Fix the check.
+
+Fixes: 352899f384d4 ("net/mlx5: Lag, use buckets in hash mode")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+@@ -574,7 +574,7 @@ static int __mlx5_lag_modify_definers_de
+       for (i = 0; i < ldev->ports; i++) {
+               for (j = 0; j < ldev->buckets; j++) {
+                       idx = i * ldev->buckets + j;
+-                      if (ldev->v2p_map[i] == ports[i])
++                      if (ldev->v2p_map[idx] == ports[idx])
+                               continue;
+                       dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
diff --git a/queue-6.1/net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch b/queue-6.1/net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch
new file mode 100644 (file)
index 0000000..f1ea9f2
--- /dev/null
@@ -0,0 +1,31 @@
+From aab8e1a200b926147db51e3f82fd07bb9edf6a98 Mon Sep 17 00:00:00 2001
+From: Moshe Shemesh <moshe@nvidia.com>
+Date: Sun, 23 Jul 2023 11:03:01 +0300
+Subject: net/mlx5: Reload auxiliary devices in pci error handlers
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+commit aab8e1a200b926147db51e3f82fd07bb9edf6a98 upstream.
+
+Handling pci errors should fully teardown and load back auxiliary
+devices, same as done through mlx5 health recovery flow.
+
+Fixes: 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in case of PCI device suspend")
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1794,7 +1794,7 @@ static pci_ers_result_t mlx5_pci_err_det
+       mlx5_enter_error_state(dev, false);
+       mlx5_error_sw_reset(dev);
+-      mlx5_unload_one(dev, true);
++      mlx5_unload_one(dev, false);
+       mlx5_drain_health_wq(dev);
+       mlx5_pci_disable_device(dev);
diff --git a/queue-6.1/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch b/queue-6.1/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch
new file mode 100644 (file)
index 0000000..3f33e13
--- /dev/null
@@ -0,0 +1,44 @@
+From d006207625657322ba8251b6e7e829f9659755dc Mon Sep 17 00:00:00 2001
+From: Moshe Shemesh <moshe@nvidia.com>
+Date: Wed, 19 Jul 2023 11:33:44 +0300
+Subject: net/mlx5: Skip clock update work when device is in error state
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+commit d006207625657322ba8251b6e7e829f9659755dc upstream.
+
+When device is in error state, marked by the flag
+MLX5_DEVICE_STATE_INTERNAL_ERROR, the HW and PCI may not be accessible
+and so clock update work should be skipped. Furthermore, such access
+through PCI in error state, after calling mlx5_pci_disable_device() can
+result in failing to recover from pci errors.
+
+Fixes: ef9814deafd0 ("net/mlx5e: Add HW timestamping (TS) support")
+Reported-and-tested-by: Ganesh G R <ganeshgr@linux.ibm.com>
+Closes: https://lore.kernel.org/netdev/9bdb9b9d-140a-7a28-f0de-2e64e873c068@nvidia.com
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Aya Levin <ayal@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+@@ -198,10 +198,15 @@ static void mlx5_timestamp_overflow(stru
+       clock = container_of(timer, struct mlx5_clock, timer);
+       mdev = container_of(clock, struct mlx5_core_dev, clock);
++      if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
++              goto out;
++
+       write_seqlock_irqsave(&clock->lock, flags);
+       timecounter_read(&timer->tc);
+       mlx5_update_clock_info_page(mdev);
+       write_sequnlock_irqrestore(&clock->lock, flags);
++
++out:
+       schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
+ }
diff --git a/queue-6.1/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch b/queue-6.1/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch
new file mode 100644 (file)
index 0000000..8aa3dff
--- /dev/null
@@ -0,0 +1,32 @@
+From d7791cec2304aea22eb2ada944e4d467302f5bfe Mon Sep 17 00:00:00 2001
+From: Li Yang <leoyang.li@nxp.com>
+Date: Wed, 2 Aug 2023 14:13:47 -0500
+Subject: net: phy: at803x: remove set/get wol callbacks for AR8032
+
+From: Li Yang <leoyang.li@nxp.com>
+
+commit d7791cec2304aea22eb2ada944e4d467302f5bfe upstream.
+
+Since the AR8032 part does not support wol, remove related callbacks
+from it.
+
+Fixes: 5800091a2061 ("net: phy: at803x: add support for AR8032 PHY")
+Signed-off-by: Li Yang <leoyang.li@nxp.com>
+Cc: David Bauer <mail@david-bauer.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/at803x.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/net/phy/at803x.c
++++ b/drivers/net/phy/at803x.c
+@@ -2087,8 +2087,6 @@ static struct phy_driver at803x_driver[]
+       .flags                  = PHY_POLL_CABLE_TEST,
+       .config_init            = at803x_config_init,
+       .link_change_notify     = at803x_link_change_notify,
+-      .set_wol                = at803x_set_wol,
+-      .get_wol                = at803x_get_wol,
+       .suspend                = at803x_suspend,
+       .resume                 = at803x_resume,
+       /* PHY_BASIC_FEATURES */
diff --git a/queue-6.1/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch b/queue-6.1/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch
new file mode 100644 (file)
index 0000000..65dc802
--- /dev/null
@@ -0,0 +1,128 @@
+From 8743aeff5bc4dcb5b87b43765f48d5ac3ad7dd9f Mon Sep 17 00:00:00 2001
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Tue, 8 Aug 2023 10:52:33 +0300
+Subject: nexthop: Fix infinite nexthop bucket dump when using maximum nexthop ID
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+commit 8743aeff5bc4dcb5b87b43765f48d5ac3ad7dd9f upstream.
+
+A netlink dump callback can return a positive number to signal that more
+information needs to be dumped or zero to signal that the dump is
+complete. In the second case, the core netlink code will append the
+NLMSG_DONE message to the skb in order to indicate to user space that
+the dump is complete.
+
+The nexthop bucket dump callback always returns a positive number if
+nexthop buckets were filled in the provided skb, even if the dump is
+complete. This means that a dump will span at least two recvmsg() calls
+as long as nexthop buckets are present. In the last recvmsg() call the
+dump callback will not fill in any nexthop buckets because the previous
+call indicated that the dump should restart from the last dumped nexthop
+ID plus one.
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id 10 group 1 type resilient buckets 2
+ # strace -e sendto,recvmsg -s 5 ip nexthop bucket
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOPBUCKET, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691396980, nlmsg_pid=0}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 128
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396980, nlmsg_pid=347}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], [{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396980, nlmsg_pid=347}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 128
+ id 10 index 0 idle_time 6.66 nhid 1
+ id 10 index 1 idle_time 6.66 nhid 1
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 20
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396980, nlmsg_pid=347}, 0], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
+ +++ exited with 0 +++
+
+This behavior is both inefficient and buggy. If the last nexthop to be
+dumped had the maximum ID of 0xffffffff, then the dump will restart from
+0 (0xffffffff + 1) and never end:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id $((2**32-1)) group 1 type resilient buckets 2
+ # ip nexthop bucket
+ id 4294967295 index 0 idle_time 5.55 nhid 1
+ id 4294967295 index 1 idle_time 5.55 nhid 1
+ id 4294967295 index 0 idle_time 5.55 nhid 1
+ id 4294967295 index 1 idle_time 5.55 nhid 1
+ [...]
+
+Fix by adjusting the dump callback to return zero when the dump is
+complete. After the fix only one recvmsg() call is made and the
+NLMSG_DONE message is appended to the RTM_NEWNEXTHOPBUCKET responses:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id $((2**32-1)) group 1 type resilient buckets 2
+ # strace -e sendto,recvmsg -s 5 ip nexthop bucket
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOPBUCKET, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691396737, nlmsg_pid=0}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 148
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396737, nlmsg_pid=350}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], [{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396737, nlmsg_pid=350}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], [{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396737, nlmsg_pid=350}, 0]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 148
+ id 4294967295 index 0 idle_time 6.61 nhid 1
+ id 4294967295 index 1 idle_time 6.61 nhid 1
+ +++ exited with 0 +++
+
+Note that if the NLMSG_DONE message cannot be appended because of size
+limitations, then another recvmsg() will be needed, but the core netlink
+code will not invoke the dump callback and simply reply with a
+NLMSG_DONE message since it knows that the callback previously returned
+zero.
+
+Add a test that fails before the fix:
+
+ # ./fib_nexthops.sh -t basic_res
+ [...]
+ TEST: Maximum nexthop ID dump                                       [FAIL]
+ [...]
+
+And passes after it:
+
+ # ./fib_nexthops.sh -t basic_res
+ [...]
+ TEST: Maximum nexthop ID dump                                       [ OK ]
+ [...]
+
+Fixes: 8a1bbabb034d ("nexthop: Add netlink handlers for bucket dump")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230808075233.3337922-4-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/nexthop.c                          |    6 +-----
+ tools/testing/selftests/net/fib_nexthops.sh |    5 +++++
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -3424,13 +3424,9 @@ static int rtm_dump_nexthop_bucket(struc
+       if (err < 0) {
+               if (likely(skb->len))
+-                      goto out;
+-              goto out_err;
++                      err = skb->len;
+       }
+-out:
+-      err = skb->len;
+-out_err:
+       cb->seq = net->nexthop.seq;
+       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+       return err;
+--- a/tools/testing/selftests/net/fib_nexthops.sh
++++ b/tools/testing/selftests/net/fib_nexthops.sh
+@@ -2206,6 +2206,11 @@ basic_res()
+       run_cmd "$IP nexthop bucket list fdb"
+       log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
++      # Dump should not loop endlessly when maximum nexthop ID is configured.
++      run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
++      run_cmd "timeout 5 $IP nexthop bucket"
++      log_test $? 0 "Maximum nexthop ID dump"
++
+       #
+       # resilient nexthop buckets get requests
+       #
diff --git a/queue-6.1/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch b/queue-6.1/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch
new file mode 100644 (file)
index 0000000..f401b00
--- /dev/null
@@ -0,0 +1,119 @@
+From 913f60cacda73ccac8eead94983e5884c03e04cd Mon Sep 17 00:00:00 2001
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Tue, 8 Aug 2023 10:52:31 +0300
+Subject: nexthop: Fix infinite nexthop dump when using maximum nexthop ID
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+commit 913f60cacda73ccac8eead94983e5884c03e04cd upstream.
+
+A netlink dump callback can return a positive number to signal that more
+information needs to be dumped or zero to signal that the dump is
+complete. In the second case, the core netlink code will append the
+NLMSG_DONE message to the skb in order to indicate to user space that
+the dump is complete.
+
+The nexthop dump callback always returns a positive number if nexthops
+were filled in the provided skb, even if the dump is complete. This
+means that a dump will span at least two recvmsg() calls as long as
+nexthops are present. In the last recvmsg() call the dump callback will
+not fill in any nexthops because the previous call indicated that the
+dump should restart from the last dumped nexthop ID plus one.
+
+ # ip nexthop add id 1 blackhole
+ # strace -e sendto,recvmsg -s 5 ip nexthop
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691394315, nlmsg_pid=0}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 36
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=36, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394315, nlmsg_pid=343}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 1], {nla_len=4, nla_type=NHA_BLACKHOLE}]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
+ id 1 blackhole
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 20
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394315, nlmsg_pid=343}, 0], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
+ +++ exited with 0 +++
+
+This behavior is both inefficient and buggy. If the last nexthop to be
+dumped had the maximum ID of 0xffffffff, then the dump will restart from
+0 (0xffffffff + 1) and never end:
+
+ # ip nexthop add id $((2**32-1)) blackhole
+ # ip nexthop
+ id 4294967295 blackhole
+ id 4294967295 blackhole
+ [...]
+
+Fix by adjusting the dump callback to return zero when the dump is
+complete. After the fix only one recvmsg() call is made and the
+NLMSG_DONE message is appended to the RTM_NEWNEXTHOP response:
+
+ # ip nexthop add id $((2**32-1)) blackhole
+ # strace -e sendto,recvmsg -s 5 ip nexthop
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691394080, nlmsg_pid=0}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 56
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=36, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394080, nlmsg_pid=342}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 4294967295], {nla_len=4, nla_type=NHA_BLACKHOLE}]], [{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394080, nlmsg_pid=342}, 0]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 56
+ id 4294967295 blackhole
+ +++ exited with 0 +++
+
+Note that if the NLMSG_DONE message cannot be appended because of size
+limitations, then another recvmsg() will be needed, but the core netlink
+code will not invoke the dump callback and simply reply with a
+NLMSG_DONE message since it knows that the callback previously returned
+zero.
+
+Add a test that fails before the fix:
+
+ # ./fib_nexthops.sh -t basic
+ [...]
+ TEST: Maximum nexthop ID dump                                       [FAIL]
+ [...]
+
+And passes after it:
+
+ # ./fib_nexthops.sh -t basic
+ [...]
+ TEST: Maximum nexthop ID dump                                       [ OK ]
+ [...]
+
+Fixes: ab84be7e54fc ("net: Initial nexthop code")
+Reported-by: Petr Machata <petrm@nvidia.com>
+Closes: https://lore.kernel.org/netdev/87sf91enuf.fsf@nvidia.com/
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230808075233.3337922-2-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/nexthop.c                          |    6 +-----
+ tools/testing/selftests/net/fib_nexthops.sh |    5 +++++
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -3221,13 +3221,9 @@ static int rtm_dump_nexthop(struct sk_bu
+                                    &rtm_dump_nexthop_cb, &filter);
+       if (err < 0) {
+               if (likely(skb->len))
+-                      goto out;
+-              goto out_err;
++                      err = skb->len;
+       }
+-out:
+-      err = skb->len;
+-out_err:
+       cb->seq = net->nexthop.seq;
+       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+       return err;
+--- a/tools/testing/selftests/net/fib_nexthops.sh
++++ b/tools/testing/selftests/net/fib_nexthops.sh
+@@ -1981,6 +1981,11 @@ basic()
+       run_cmd "$IP link set dev lo up"
++      # Dump should not loop endlessly when maximum nexthop ID is configured.
++      run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
++      run_cmd "timeout 5 $IP nexthop"
++      log_test $? 0 "Maximum nexthop ID dump"
++
+       #
+       # groups
+       #
diff --git a/queue-6.1/nexthop-make-nexthop-bucket-dump-more-efficient.patch b/queue-6.1/nexthop-make-nexthop-bucket-dump-more-efficient.patch
new file mode 100644 (file)
index 0000000..cb3c7d8
--- /dev/null
@@ -0,0 +1,96 @@
+From f10d3d9df49d9e6ee244fda6ca264f901a9c5d85 Mon Sep 17 00:00:00 2001
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Tue, 8 Aug 2023 10:52:32 +0300
+Subject: nexthop: Make nexthop bucket dump more efficient
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+commit f10d3d9df49d9e6ee244fda6ca264f901a9c5d85 upstream.
+
+rtm_dump_nexthop_bucket_nh() is used to dump nexthop buckets belonging
+to a specific resilient nexthop group. The function returns a positive
+return code (the skb length) upon both success and failure.
+
+The above behavior is problematic. When a complete nexthop bucket dump
+is requested, the function that walks the different nexthops treats the
+non-zero return code as an error. This causes buckets belonging to
+different resilient nexthop groups to be dumped using different buffers
+even if they can all fit in the same buffer:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id 10 group 1 type resilient buckets 1
+ # ip nexthop add id 20 group 1 type resilient buckets 1
+ # strace -e recvmsg -s 0 ip nexthop bucket
+ [...]
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[...], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 64
+ id 10 index 0 idle_time 10.27 nhid 1
+ [...]
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[...], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 64
+ id 20 index 0 idle_time 6.44 nhid 1
+ [...]
+
+Fix by only returning a non-zero return code when an error occurred and
+restarting the dump from the bucket index we failed to fill in. This
+allows buckets belonging to different resilient nexthop groups to be
+dumped using the same buffer:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id 10 group 1 type resilient buckets 1
+ # ip nexthop add id 20 group 1 type resilient buckets 1
+ # strace -e recvmsg -s 0 ip nexthop bucket
+ [...]
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[...], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 128
+ id 10 index 0 idle_time 30.21 nhid 1
+ id 20 index 0 idle_time 26.7 nhid 1
+ [...]
+
+While this change is more of a performance improvement change than an
+actual bug fix, it is a prerequisite for a subsequent patch that does
+fix a bug.
+
+Fixes: 8a1bbabb034d ("nexthop: Add netlink handlers for bucket dump")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230808075233.3337922-3-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/nexthop.c |   16 +++++-----------
+ 1 file changed, 5 insertions(+), 11 deletions(-)
+
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -3363,25 +3363,19 @@ static int rtm_dump_nexthop_bucket_nh(st
+                   dd->filter.res_bucket_nh_id != nhge->nh->id)
+                       continue;
++              dd->ctx->bucket_index = bucket_index;
+               err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+                                        RTM_NEWNEXTHOPBUCKET, portid,
+                                        cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                        cb->extack);
+-              if (err < 0) {
+-                      if (likely(skb->len))
+-                              goto out;
+-                      goto out_err;
+-              }
++              if (err)
++                      return err;
+       }
+       dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
+-      bucket_index = 0;
++      dd->ctx->bucket_index = 0;
+-out:
+-      err = skb->len;
+-out_err:
+-      dd->ctx->bucket_index = bucket_index;
+-      return err;
++      return 0;
+ }
+ static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
index 2e9a8fd587d5c4d60bf63cd3e79d74cc84f9d2b4..48aa4306139daa478f543310c61ae5029183df89 100644 (file)
@@ -103,3 +103,25 @@ wifi-cfg80211-fix-sband-iftype-data-lookup-for-ap_vlan.patch
 rdma-umem-set-iova-in-odp-flow.patch
 net-tls-avoid-discarding-data-on-record-close.patch
 net-marvell-prestera-fix-handling-ipv4-routes-with-nhid.patch
+net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch
+net-dsa-ocelot-call-dsa_tag_8021q_unregister-under-rtnl_lock-on-driver-remove.patch
+net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch
+net-hns3-add-wait-until-mac-link-down.patch
+net-hns3-fix-deadlock-issue-when-externel_lb-and-reset-are-executed-together.patch
+nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch
+nexthop-make-nexthop-bucket-dump-more-efficient.patch
+nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch
+net-hns3-fix-strscpy-causing-content-truncation-issue.patch
+dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch
+dmaengine-owl-dma-modify-mismatched-function-name.patch
+net-mlx5-allow-0-for-total-host-vfs.patch
+net-mlx5-lag-check-correct-bucket-when-modifying-lag.patch
+net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch
+net-mlx5-reload-auxiliary-devices-in-pci-error-handlers.patch
+ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch
+ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch
+ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch
+ibmvnic-do-partial-reset-on-login-failure.patch
+ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch
+gpio-ws16c48-fix-off-by-one-error-in-ws16c48-resource-region-extent.patch
+gpio-sim-mark-the-gpio-chip-as-a-one-that-can-sleep.patch