5.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 12 Aug 2023 18:52:18 +0000 (20:52 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 12 Aug 2023 18:52:18 +0000 (20:52 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Aug 2023 18:52:18 +0000 (20:52 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Aug 2023 18:52:18 +0000 (20:52 +0200)
diff --git a/queue-5.15/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch b/queue-5.15/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch

new file mode 100644 (file)

index 0000000..1e6d960
--- /dev/null
+++ b/queue-5.15/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch
@@ -0,0 +1,61 @@
+From 0a46781c89dece85386885a407244ca26e5c1c44 Mon Sep 17 00:00:00 2001
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Date: Wed, 12 Jul 2023 18:26:45 +0530
+Subject: dmaengine: mcf-edma: Fix a potential un-allocated memory access
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+commit 0a46781c89dece85386885a407244ca26e5c1c44 upstream.
+
+When 'mcf_edma' is allocated, some space is allocated for a
+flexible array at the end of the struct. 'chans' item are allocated, that is
+to say 'pdata->dma_channels'.
+
+Then, this number of item is stored in 'mcf_edma->n_chans'.
+
+A few lines later, if 'mcf_edma->n_chans' is 0, then a default value of 64
+is set.
+
+This ends to no space allocated by devm_kzalloc() because chans was 0, but
+64 items are read and/or written in some not allocated memory.
+
+Change the logic to define a default value before allocating the memory.
+
+Fixes: e7a3ff92eaf1 ("dmaengine: fsl-edma: add ColdFire mcf5441x edma support")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Link: https://lore.kernel.org/r/f55d914407c900828f6fad3ea5fa791a5f17b9a4.1685172449.git.christophe.jaillet@wanadoo.fr
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/mcf-edma.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/drivers/dma/mcf-edma.c
++++ b/drivers/dma/mcf-edma.c
+@@ -191,7 +191,13 @@ static int mcf_edma_probe(struct platfor
+               return -EINVAL;
+       }
+ 
+-      chans = pdata->dma_channels;
++      if (!pdata->dma_channels) {
++              dev_info(&pdev->dev, "setting default channel number to 64");
++              chans = 64;
++      } else {
++              chans = pdata->dma_channels;
++      }
++
+       len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
+       mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+       if (!mcf_edma)
+@@ -203,11 +209,6 @@ static int mcf_edma_probe(struct platfor
+       mcf_edma->drvdata = &mcf_data;
+       mcf_edma->big_endian = 1;
+ 
+-      if (!mcf_edma->n_chans) {
+-              dev_info(&pdev->dev, "setting default channel number to 64");
+-              mcf_edma->n_chans = 64;
+-      }
+-
+       mutex_init(&mcf_edma->fsl_edma_mutex);
+ 
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/queue-5.15/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch b/queue-5.15/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch

new file mode 100644 (file)

index 0000000..76a7845
--- /dev/null
+++ b/queue-5.15/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch
@@ -0,0 +1,77 @@
+From db17ba719bceb52f0ae4ebca0e4c17d9a3bebf05 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:34 -0500
+Subject: ibmvnic: Enforce stronger sanity checks on login response
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit db17ba719bceb52f0ae4ebca0e4c17d9a3bebf05 upstream.
+
+Ensure that all offsets in a login response buffer are within the size
+of the allocated response buffer. Any offsets or lengths that surpass
+the allocation are likely the result of an incomplete response buffer.
+In these cases, a full reset is necessary.
+
+When attempting to login, the ibmvnic device will allocate a response
+buffer and pass a reference to the VIOS. The VIOS will then send the
+ibmvnic device a LOGIN_RSP CRQ to signal that the buffer has been filled
+with data. If the ibmvnic device does not get a response in 20 seconds,
+the old buffer is freed and a new login request is sent. With 2
+outstanding requests, any LOGIN_RSP CRQ's could be for the older
+login request. If this is the case then the login response buffer (which
+is for the newer login request) could be incomplete and contain invalid
+data. Therefore, we must enforce strict sanity checks on the response
+buffer values.
+
+Testing has shown that the `off_rxadd_buff_size` value is filled in last
+by the VIOS and will be the smoking gun for these circumstances.
+
+Until VIOS can implement a mechanism for tracking outstanding response
+buffers and a method for mapping a LOGIN_RSP CRQ to a particular login
+response buffer, the best ibmvnic can do in this situation is perform a
+full reset.
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-1-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -4788,6 +4788,7 @@ static int handle_login_rsp(union ibmvni
+       int num_tx_pools;
+       int num_rx_pools;
+       u64 *size_array;
++      u32 rsp_len;
+       int i;
+ 
+       /* CHECK: Test/set of login_pending does not need to be atomic
+@@ -4839,6 +4840,23 @@ static int handle_login_rsp(union ibmvni
+               ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+               return -EIO;
+       }
++
++      rsp_len = be32_to_cpu(login_rsp->len);
++      if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
++          rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
++          rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
++          rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
++          rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
++              /* This can happen if a login request times out and there are
++               * 2 outstanding login requests sent, the LOGIN_RSP crq
++               * could have been for the older login request. So we are
++               * parsing the newer response buffer which may be incomplete
++               */
++              dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
++              ibmvnic_reset(adapter, VNIC_RESET_FATAL);
++              return -EIO;
++      }
++
+       size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+               be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
+       /* variable buffer sizes are not supported, so just read the
diff --git a/queue-5.15/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch b/queue-5.15/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch

new file mode 100644 (file)

index 0000000..44d5853
--- /dev/null
+++ b/queue-5.15/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch
@@ -0,0 +1,73 @@
+From d78a671eb8996af19d6311ecdee9790d2fa479f0 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:36 -0500
+Subject: ibmvnic: Handle DMA unmapping of login buffs in release functions
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit d78a671eb8996af19d6311ecdee9790d2fa479f0 upstream.
+
+Rather than leaving the DMA unmapping of the login buffers to the
+login response handler, move this work into the login release functions.
+Previously, these functions were only used for freeing the allocated
+buffers. This could lead to issues if there are more than one
+outstanding login buffer requests, which is possible if a login request
+times out.
+
+If a login request times out, then there is another call to send login.
+The send login function makes a call to the login buffer release
+function. In the past, this freed the buffers but did not DMA unmap.
+Therefore, the VIOS could still write to the old login (now freed)
+buffer. It is for this reason that it is a good idea to leave the DMA
+unmap call to the login buffers release function.
+
+Since the login buffer release functions now handle DMA unmapping,
+remove the duplicate DMA unmapping in handle_login_rsp().
+
+Fixes: dff515a3e71d ("ibmvnic: Harden device login requests")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-3-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -1007,12 +1007,22 @@ static int ibmvnic_login(struct net_devi
+ 
+ static void release_login_buffer(struct ibmvnic_adapter *adapter)
+ {
++      if (!adapter->login_buf)
++              return;
++
++      dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
++                       adapter->login_buf_sz, DMA_TO_DEVICE);
+       kfree(adapter->login_buf);
+       adapter->login_buf = NULL;
+ }
+ 
+ static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
+ {
++      if (!adapter->login_rsp_buf)
++              return;
++
++      dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
++                       adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
+       kfree(adapter->login_rsp_buf);
+       adapter->login_rsp_buf = NULL;
+ }
+@@ -4803,11 +4813,6 @@ static int handle_login_rsp(union ibmvni
+       }
+       adapter->login_pending = false;
+ 
+-      dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
+-                       DMA_TO_DEVICE);
+-      dma_unmap_single(dev, adapter->login_rsp_buf_token,
+-                       adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
+-
+       /* If the number of queues requested can't be allocated by the
+        * server, the login response will return with code 1. We will need
+        * to resend the login buffer with fewer queues requested.
diff --git a/queue-5.15/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch b/queue-5.15/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch

new file mode 100644 (file)

index 0000000..ac99f13
--- /dev/null
+++ b/queue-5.15/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch
@@ -0,0 +1,41 @@
+From 411c565b4bc63e9584a8493882bd566e35a90588 Mon Sep 17 00:00:00 2001
+From: Nick Child <nnac123@linux.ibm.com>
+Date: Wed, 9 Aug 2023 17:10:35 -0500
+Subject: ibmvnic: Unmap DMA login rsp buffer on send login fail
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+commit 411c565b4bc63e9584a8493882bd566e35a90588 upstream.
+
+If the LOGIN CRQ fails to send then we must DMA unmap the response
+buffer. Previously, if the CRQ failed then the memory was freed without
+DMA unmapping.
+
+Fixes: c98d9cc4170d ("ibmvnic: send_login should check for crq errors")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230809221038.51296-2-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -4220,11 +4220,14 @@ static int send_login(struct ibmvnic_ada
+       if (rc) {
+               adapter->login_pending = false;
+               netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
+-              goto buf_rsp_map_failed;
++              goto buf_send_failed;
+       }
+ 
+       return 0;
+ 
++buf_send_failed:
++      dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
++                       DMA_FROM_DEVICE);
+ buf_rsp_map_failed:
+       kfree(login_rsp_buffer);
+       adapter->login_rsp_buf = NULL;
diff --git a/queue-5.15/net-hns3-add-wait-until-mac-link-down.patch b/queue-5.15/net-hns3-add-wait-until-mac-link-down.patch

new file mode 100644 (file)

index 0000000..4f29712
--- /dev/null
+++ b/queue-5.15/net-hns3-add-wait-until-mac-link-down.patch
@@ -0,0 +1,54 @@
+From 6265e242f7b95f2c1195b42ec912b84ad161470e Mon Sep 17 00:00:00 2001
+From: Jie Wang <wangjie125@huawei.com>
+Date: Mon, 7 Aug 2023 19:34:51 +0800
+Subject: net: hns3: add wait until mac link down
+
+From: Jie Wang <wangjie125@huawei.com>
+
+commit 6265e242f7b95f2c1195b42ec912b84ad161470e upstream.
+
+In some configure flow of hns3 driver, for example, change mtu, it will
+disable MAC through firmware before configuration. But firmware disables
+MAC asynchronously. The rx traffic may be not stopped in this case.
+
+So fixes it by waiting until mac link is down.
+
+Fixes: a9775bb64aa7 ("net: hns3: fix set and get link ksettings issue")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230807113452.474224-4-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -7658,6 +7658,8 @@ static void hclge_enable_fd(struct hnae3
+ 
+ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
+ {
++#define HCLGE_LINK_STATUS_WAIT_CNT  3
++
+       struct hclge_desc desc;
+       struct hclge_config_mac_mode_cmd *req =
+               (struct hclge_config_mac_mode_cmd *)desc.data;
+@@ -7682,9 +7684,15 @@ static void hclge_cfg_mac_mode(struct hc
+       req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
+ 
+       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+-      if (ret)
++      if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "mac enable fail, ret =%d.\n", ret);
++              return;
++      }
++
++      if (!enable)
++              hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN,
++                                         HCLGE_LINK_STATUS_WAIT_CNT);
+ }
+ 
+ static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
diff --git a/queue-5.15/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch b/queue-5.15/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch

new file mode 100644 (file)

index 0000000..eb45f67
--- /dev/null
+++ b/queue-5.15/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch
@@ -0,0 +1,73 @@
+From 08469dacfad25428b66549716811807203744f4f Mon Sep 17 00:00:00 2001
+From: Jie Wang <wangjie125@huawei.com>
+Date: Mon, 7 Aug 2023 19:34:50 +0800
+Subject: net: hns3: refactor hclge_mac_link_status_wait for interface reuse
+
+From: Jie Wang <wangjie125@huawei.com>
+
+commit 08469dacfad25428b66549716811807203744f4f upstream.
+
+Some nic configurations could only be performed after link is down. So this
+patch refactor this API for reuse.
+
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230807113452.474224-3-shaojijie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -70,6 +70,8 @@ static void hclge_sync_mac_table(struct
+ static void hclge_restore_hw_table(struct hclge_dev *hdev);
+ static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
+ static void hclge_sync_fd_table(struct hclge_dev *hdev);
++static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
++                                    int wait_cnt);
+ 
+ static struct hnae3_ae_algo ae_algo;
+ 
+@@ -7745,10 +7747,9 @@ static void hclge_phy_link_status_wait(s
+       } while (++i < HCLGE_PHY_LINK_STATUS_NUM);
+ }
+ 
+-static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
++static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
++                                    int wait_cnt)
+ {
+-#define HCLGE_MAC_LINK_STATUS_NUM  100
+-
+       int link_status;
+       int i = 0;
+       int ret;
+@@ -7761,13 +7762,15 @@ static int hclge_mac_link_status_wait(st
+                       return 0;
+ 
+               msleep(HCLGE_LINK_STATUS_MS);
+-      } while (++i < HCLGE_MAC_LINK_STATUS_NUM);
++      } while (++i < wait_cnt);
+       return -EBUSY;
+ }
+ 
+ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
+                                         bool is_phy)
+ {
++#define HCLGE_MAC_LINK_STATUS_NUM  100
++
+       int link_ret;
+ 
+       link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
+@@ -7775,7 +7778,8 @@ static int hclge_mac_phy_link_status_wai
+       if (is_phy)
+               hclge_phy_link_status_wait(hdev, link_ret);
+ 
+-      return hclge_mac_link_status_wait(hdev, link_ret);
++      return hclge_mac_link_status_wait(hdev, link_ret,
++                                        HCLGE_MAC_LINK_STATUS_NUM);
+ }
+ 
+ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
diff --git a/queue-5.15/net-mlx5-allow-0-for-total-host-vfs.patch b/queue-5.15/net-mlx5-allow-0-for-total-host-vfs.patch

new file mode 100644 (file)

index 0000000..e10b450
--- /dev/null
+++ b/queue-5.15/net-mlx5-allow-0-for-total-host-vfs.patch
@@ -0,0 +1,33 @@
+From 2dc2b3922d3c0f52d3a792d15dcacfbc4cc76b8f Mon Sep 17 00:00:00 2001
+From: Daniel Jurgens <danielj@nvidia.com>
+Date: Tue, 11 Jul 2023 00:28:10 +0300
+Subject: net/mlx5: Allow 0 for total host VFs
+
+From: Daniel Jurgens <danielj@nvidia.com>
+
+commit 2dc2b3922d3c0f52d3a792d15dcacfbc4cc76b8f upstream.
+
+When querying eswitch functions 0 is a valid number of host VFs. After
+introducing ARM SRIOV falling through to getting the max value from PCI
+results in using the total VFs allowed on the ARM for the host.
+
+Fixes: 86eec50beaf3 ("net/mlx5: Support querying max VFs from device");
+Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/sriov.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+@@ -256,8 +256,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_
+               host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+                                         host_params_context.host_total_vfs);
+               kvfree(out);
+-              if (host_total_vfs)
+-                      return host_total_vfs;
++              return host_total_vfs;
+       }
+ 
+ done:
diff --git a/queue-5.15/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch b/queue-5.15/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch

new file mode 100644 (file)

index 0000000..6160f54
--- /dev/null
+++ b/queue-5.15/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch
@@ -0,0 +1,44 @@
+From d006207625657322ba8251b6e7e829f9659755dc Mon Sep 17 00:00:00 2001
+From: Moshe Shemesh <moshe@nvidia.com>
+Date: Wed, 19 Jul 2023 11:33:44 +0300
+Subject: net/mlx5: Skip clock update work when device is in error state
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+commit d006207625657322ba8251b6e7e829f9659755dc upstream.
+
+When device is in error state, marked by the flag
+MLX5_DEVICE_STATE_INTERNAL_ERROR, the HW and PCI may not be accessible
+and so clock update work should be skipped. Furthermore, such access
+through PCI in error state, after calling mlx5_pci_disable_device() can
+result in failing to recover from pci errors.
+
+Fixes: ef9814deafd0 ("net/mlx5e: Add HW timestamping (TS) support")
+Reported-and-tested-by: Ganesh G R <ganeshgr@linux.ibm.com>
+Closes: https://lore.kernel.org/netdev/9bdb9b9d-140a-7a28-f0de-2e64e873c068@nvidia.com
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Aya Levin <ayal@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+@@ -189,10 +189,15 @@ static void mlx5_timestamp_overflow(stru
+       clock = container_of(timer, struct mlx5_clock, timer);
+       mdev = container_of(clock, struct mlx5_core_dev, clock);
+ 
++      if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
++              goto out;
++
+       write_seqlock_irqsave(&clock->lock, flags);
+       timecounter_read(&timer->tc);
+       mlx5_update_clock_info_page(mdev);
+       write_sequnlock_irqrestore(&clock->lock, flags);
++
++out:
+       schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
+ }
+ 
diff --git a/queue-5.15/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch b/queue-5.15/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch

new file mode 100644 (file)

index 0000000..e92f544
--- /dev/null
+++ b/queue-5.15/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch
@@ -0,0 +1,32 @@
+From d7791cec2304aea22eb2ada944e4d467302f5bfe Mon Sep 17 00:00:00 2001
+From: Li Yang <leoyang.li@nxp.com>
+Date: Wed, 2 Aug 2023 14:13:47 -0500
+Subject: net: phy: at803x: remove set/get wol callbacks for AR8032
+
+From: Li Yang <leoyang.li@nxp.com>
+
+commit d7791cec2304aea22eb2ada944e4d467302f5bfe upstream.
+
+Since the AR8032 part does not support wol, remove related callbacks
+from it.
+
+Fixes: 5800091a2061 ("net: phy: at803x: add support for AR8032 PHY")
+Signed-off-by: Li Yang <leoyang.li@nxp.com>
+Cc: David Bauer <mail@david-bauer.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/at803x.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/net/phy/at803x.c
++++ b/drivers/net/phy/at803x.c
+@@ -1375,8 +1375,6 @@ static struct phy_driver at803x_driver[]
+       .flags                  = PHY_POLL_CABLE_TEST,
+       .config_init            = at803x_config_init,
+       .link_change_notify     = at803x_link_change_notify,
+-      .set_wol                = at803x_set_wol,
+-      .get_wol                = at803x_get_wol,
+       .suspend                = at803x_suspend,
+       .resume                 = at803x_resume,
+       /* PHY_BASIC_FEATURES */
diff --git a/queue-5.15/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch b/queue-5.15/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch

new file mode 100644 (file)

index 0000000..e479cb8
--- /dev/null
+++ b/queue-5.15/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch
@@ -0,0 +1,128 @@
+From 8743aeff5bc4dcb5b87b43765f48d5ac3ad7dd9f Mon Sep 17 00:00:00 2001
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Tue, 8 Aug 2023 10:52:33 +0300
+Subject: nexthop: Fix infinite nexthop bucket dump when using maximum nexthop ID
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+commit 8743aeff5bc4dcb5b87b43765f48d5ac3ad7dd9f upstream.
+
+A netlink dump callback can return a positive number to signal that more
+information needs to be dumped or zero to signal that the dump is
+complete. In the second case, the core netlink code will append the
+NLMSG_DONE message to the skb in order to indicate to user space that
+the dump is complete.
+
+The nexthop bucket dump callback always returns a positive number if
+nexthop buckets were filled in the provided skb, even if the dump is
+complete. This means that a dump will span at least two recvmsg() calls
+as long as nexthop buckets are present. In the last recvmsg() call the
+dump callback will not fill in any nexthop buckets because the previous
+call indicated that the dump should restart from the last dumped nexthop
+ID plus one.
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id 10 group 1 type resilient buckets 2
+ # strace -e sendto,recvmsg -s 5 ip nexthop bucket
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOPBUCKET, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691396980, nlmsg_pid=0}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 128
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396980, nlmsg_pid=347}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], [{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396980, nlmsg_pid=347}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 128
+ id 10 index 0 idle_time 6.66 nhid 1
+ id 10 index 1 idle_time 6.66 nhid 1
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 20
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396980, nlmsg_pid=347}, 0], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
+ +++ exited with 0 +++
+
+This behavior is both inefficient and buggy. If the last nexthop to be
+dumped had the maximum ID of 0xffffffff, then the dump will restart from
+0 (0xffffffff + 1) and never end:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id $((2**32-1)) group 1 type resilient buckets 2
+ # ip nexthop bucket
+ id 4294967295 index 0 idle_time 5.55 nhid 1
+ id 4294967295 index 1 idle_time 5.55 nhid 1
+ id 4294967295 index 0 idle_time 5.55 nhid 1
+ id 4294967295 index 1 idle_time 5.55 nhid 1
+ [...]
+
+Fix by adjusting the dump callback to return zero when the dump is
+complete. After the fix only one recvmsg() call is made and the
+NLMSG_DONE message is appended to the RTM_NEWNEXTHOPBUCKET responses:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id $((2**32-1)) group 1 type resilient buckets 2
+ # strace -e sendto,recvmsg -s 5 ip nexthop bucket
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOPBUCKET, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691396737, nlmsg_pid=0}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 148
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396737, nlmsg_pid=350}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], [{nlmsg_len=64, nlmsg_type=RTM_NEWNEXTHOPBUCKET, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396737, nlmsg_pid=350}, {family=AF_UNSPEC, data="\x00\x00\x00\x00\x00"...}], [{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691396737, nlmsg_pid=350}, 0]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 148
+ id 4294967295 index 0 idle_time 6.61 nhid 1
+ id 4294967295 index 1 idle_time 6.61 nhid 1
+ +++ exited with 0 +++
+
+Note that if the NLMSG_DONE message cannot be appended because of size
+limitations, then another recvmsg() will be needed, but the core netlink
+code will not invoke the dump callback and simply reply with a
+NLMSG_DONE message since it knows that the callback previously returned
+zero.
+
+Add a test that fails before the fix:
+
+ # ./fib_nexthops.sh -t basic_res
+ [...]
+ TEST: Maximum nexthop ID dump                                       [FAIL]
+ [...]
+
+And passes after it:
+
+ # ./fib_nexthops.sh -t basic_res
+ [...]
+ TEST: Maximum nexthop ID dump                                       [ OK ]
+ [...]
+
+Fixes: 8a1bbabb034d ("nexthop: Add netlink handlers for bucket dump")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230808075233.3337922-4-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/nexthop.c                          |    6 +-----
+ tools/testing/selftests/net/fib_nexthops.sh |    5 +++++
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -3425,13 +3425,9 @@ static int rtm_dump_nexthop_bucket(struc
+ 
+       if (err < 0) {
+               if (likely(skb->len))
+-                      goto out;
+-              goto out_err;
++                      err = skb->len;
+       }
+ 
+-out:
+-      err = skb->len;
+-out_err:
+       cb->seq = net->nexthop.seq;
+       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+       return err;
+--- a/tools/testing/selftests/net/fib_nexthops.sh
++++ b/tools/testing/selftests/net/fib_nexthops.sh
+@@ -2142,6 +2142,11 @@ basic_res()
+       run_cmd "$IP nexthop bucket list fdb"
+       log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+ 
++      # Dump should not loop endlessly when maximum nexthop ID is configured.
++      run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
++      run_cmd "timeout 5 $IP nexthop bucket"
++      log_test $? 0 "Maximum nexthop ID dump"
++
+       #
+       # resilient nexthop buckets get requests
+       #
diff --git a/queue-5.15/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch b/queue-5.15/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch

new file mode 100644 (file)

index 0000000..a24bab0
--- /dev/null
+++ b/queue-5.15/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch
@@ -0,0 +1,119 @@
+From 913f60cacda73ccac8eead94983e5884c03e04cd Mon Sep 17 00:00:00 2001
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Tue, 8 Aug 2023 10:52:31 +0300
+Subject: nexthop: Fix infinite nexthop dump when using maximum nexthop ID
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+commit 913f60cacda73ccac8eead94983e5884c03e04cd upstream.
+
+A netlink dump callback can return a positive number to signal that more
+information needs to be dumped or zero to signal that the dump is
+complete. In the second case, the core netlink code will append the
+NLMSG_DONE message to the skb in order to indicate to user space that
+the dump is complete.
+
+The nexthop dump callback always returns a positive number if nexthops
+were filled in the provided skb, even if the dump is complete. This
+means that a dump will span at least two recvmsg() calls as long as
+nexthops are present. In the last recvmsg() call the dump callback will
+not fill in any nexthops because the previous call indicated that the
+dump should restart from the last dumped nexthop ID plus one.
+
+ # ip nexthop add id 1 blackhole
+ # strace -e sendto,recvmsg -s 5 ip nexthop
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691394315, nlmsg_pid=0}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 36
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=36, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394315, nlmsg_pid=343}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 1], {nla_len=4, nla_type=NHA_BLACKHOLE}]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
+ id 1 blackhole
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 20
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394315, nlmsg_pid=343}, 0], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
+ +++ exited with 0 +++
+
+This behavior is both inefficient and buggy. If the last nexthop to be
+dumped had the maximum ID of 0xffffffff, then the dump will restart from
+0 (0xffffffff + 1) and never end:
+
+ # ip nexthop add id $((2**32-1)) blackhole
+ # ip nexthop
+ id 4294967295 blackhole
+ id 4294967295 blackhole
+ [...]
+
+Fix by adjusting the dump callback to return zero when the dump is
+complete. After the fix only one recvmsg() call is made and the
+NLMSG_DONE message is appended to the RTM_NEWNEXTHOP response:
+
+ # ip nexthop add id $((2**32-1)) blackhole
+ # strace -e sendto,recvmsg -s 5 ip nexthop
+ sendto(3, [[{nlmsg_len=24, nlmsg_type=RTM_GETNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1691394080, nlmsg_pid=0}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}], {nlmsg_len=0, nlmsg_type=0 /* NLMSG_??? */, nlmsg_flags=0, nlmsg_seq=0, nlmsg_pid=0}], 152, 0, NULL, 0) = 152
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 56
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=36, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394080, nlmsg_pid=342}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 4294967295], {nla_len=4, nla_type=NHA_BLACKHOLE}]], [{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1691394080, nlmsg_pid=342}, 0]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 56
+ id 4294967295 blackhole
+ +++ exited with 0 +++
+
+Note that if the NLMSG_DONE message cannot be appended because of size
+limitations, then another recvmsg() will be needed, but the core netlink
+code will not invoke the dump callback and simply reply with a
+NLMSG_DONE message since it knows that the callback previously returned
+zero.
+
+Add a test that fails before the fix:
+
+ # ./fib_nexthops.sh -t basic
+ [...]
+ TEST: Maximum nexthop ID dump                                       [FAIL]
+ [...]
+
+And passes after it:
+
+ # ./fib_nexthops.sh -t basic
+ [...]
+ TEST: Maximum nexthop ID dump                                       [ OK ]
+ [...]
+
+Fixes: ab84be7e54fc ("net: Initial nexthop code")
+Reported-by: Petr Machata <petrm@nvidia.com>
+Closes: https://lore.kernel.org/netdev/87sf91enuf.fsf@nvidia.com/
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230808075233.3337922-2-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/nexthop.c                          |    6 +-----
+ tools/testing/selftests/net/fib_nexthops.sh |    5 +++++
+ 2 files changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -3222,13 +3222,9 @@ static int rtm_dump_nexthop(struct sk_bu
+                                    &rtm_dump_nexthop_cb, &filter);
+       if (err < 0) {
+               if (likely(skb->len))
+-                      goto out;
+-              goto out_err;
++                      err = skb->len;
+       }
+ 
+-out:
+-      err = skb->len;
+-out_err:
+       cb->seq = net->nexthop.seq;
+       nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+       return err;
+--- a/tools/testing/selftests/net/fib_nexthops.sh
++++ b/tools/testing/selftests/net/fib_nexthops.sh
+@@ -1917,6 +1917,11 @@ basic()
+ 
+       run_cmd "$IP link set dev lo up"
+ 
++      # Dump should not loop endlessly when maximum nexthop ID is configured.
++      run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
++      run_cmd "timeout 5 $IP nexthop"
++      log_test $? 0 "Maximum nexthop ID dump"
++
+       #
+       # groups
+       #
diff --git a/queue-5.15/nexthop-make-nexthop-bucket-dump-more-efficient.patch b/queue-5.15/nexthop-make-nexthop-bucket-dump-more-efficient.patch

new file mode 100644 (file)

index 0000000..74da045
--- /dev/null
+++ b/queue-5.15/nexthop-make-nexthop-bucket-dump-more-efficient.patch
@@ -0,0 +1,96 @@
+From f10d3d9df49d9e6ee244fda6ca264f901a9c5d85 Mon Sep 17 00:00:00 2001
+From: Ido Schimmel <idosch@nvidia.com>
+Date: Tue, 8 Aug 2023 10:52:32 +0300
+Subject: nexthop: Make nexthop bucket dump more efficient
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+commit f10d3d9df49d9e6ee244fda6ca264f901a9c5d85 upstream.
+
+rtm_dump_nexthop_bucket_nh() is used to dump nexthop buckets belonging
+to a specific resilient nexthop group. The function returns a positive
+return code (the skb length) upon both success and failure.
+
+The above behavior is problematic. When a complete nexthop bucket dump
+is requested, the function that walks the different nexthops treats the
+non-zero return code as an error. This causes buckets belonging to
+different resilient nexthop groups to be dumped using different buffers
+even if they can all fit in the same buffer:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id 10 group 1 type resilient buckets 1
+ # ip nexthop add id 20 group 1 type resilient buckets 1
+ # strace -e recvmsg -s 0 ip nexthop bucket
+ [...]
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[...], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 64
+ id 10 index 0 idle_time 10.27 nhid 1
+ [...]
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[...], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 64
+ id 20 index 0 idle_time 6.44 nhid 1
+ [...]
+
+Fix by only returning a non-zero return code when an error occurred and
+restarting the dump from the bucket index we failed to fill in. This
+allows buckets belonging to different resilient nexthop groups to be
+dumped using the same buffer:
+
+ # ip link add name dummy1 up type dummy
+ # ip nexthop add id 1 dev dummy1
+ # ip nexthop add id 10 group 1 type resilient buckets 1
+ # ip nexthop add id 20 group 1 type resilient buckets 1
+ # strace -e recvmsg -s 0 ip nexthop bucket
+ [...]
+ recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[...], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 128
+ id 10 index 0 idle_time 30.21 nhid 1
+ id 20 index 0 idle_time 26.7 nhid 1
+ [...]
+
+While this change is more of a performance improvement change than an
+actual bug fix, it is a prerequisite for a subsequent patch that does
+fix a bug.
+
+Fixes: 8a1bbabb034d ("nexthop: Add netlink handlers for bucket dump")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230808075233.3337922-3-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/nexthop.c |   16 +++++-----------
+ 1 file changed, 5 insertions(+), 11 deletions(-)
+
+--- a/net/ipv4/nexthop.c
++++ b/net/ipv4/nexthop.c
+@@ -3364,25 +3364,19 @@ static int rtm_dump_nexthop_bucket_nh(st
+                   dd->filter.res_bucket_nh_id != nhge->nh->id)
+                       continue;
+ 
++              dd->ctx->bucket_index = bucket_index;
+               err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+                                        RTM_NEWNEXTHOPBUCKET, portid,
+                                        cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                        cb->extack);
+-              if (err < 0) {
+-                      if (likely(skb->len))
+-                              goto out;
+-                      goto out_err;
+-              }
++              if (err)
++                      return err;
+       }
+ 
+       dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
+-      bucket_index = 0;
++      dd->ctx->bucket_index = 0;
+ 
+-out:
+-      err = skb->len;
+-out_err:
+-      dd->ctx->bucket_index = bucket_index;
+-      return err;
++      return 0;
+ }
+ 
+ static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
diff --git a/queue-5.15/series b/queue-5.15/series

index c897dd71adf725811cb11dc4ac18fd9be9639da6..d8ca87dafc9aeafbe63b0230f0e9843e444874a8 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -56,3 +56,15 @@ ib-hfi1-fix-possible-panic-during-hotplug-remove.patch
  drm-rockchip-don-t-spam-logs-in-atomic-check.patch
  wifi-cfg80211-fix-sband-iftype-data-lookup-for-ap_vlan.patch
  rdma-umem-set-iova-in-odp-flow.patch
+net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch
+net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch
+net-hns3-add-wait-until-mac-link-down.patch
+nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch
+nexthop-make-nexthop-bucket-dump-more-efficient.patch
+nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch
+dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch
+net-mlx5-allow-0-for-total-host-vfs.patch
+net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch
+ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch
+ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch
+ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 12 Aug 2023 18:52:18 +0000 (20:52 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 12 Aug 2023 18:52:18 +0000 (20:52 +0200)
queue-5.15/dmaengine-mcf-edma-fix-a-potential-un-allocated-memory-access.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-hns3-add-wait-until-mac-link-down.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-hns3-refactor-hclge_mac_link_status_wait-for-interface-reuse.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5-allow-0-for-total-host-vfs.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-phy-at803x-remove-set-get-wol-callbacks-for-ar8032.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/nexthop-fix-infinite-nexthop-bucket-dump-when-using-maximum-nexthop-id.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/nexthop-fix-infinite-nexthop-dump-when-using-maximum-nexthop-id.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/nexthop-make-nexthop-bucket-dump-more-efficient.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history