Fixes for 5.15

author Sasha Levin <sashal@kernel.org>

Sun, 2 Jan 2022 21:57:04 +0000 (16:57 -0500)

committer Sasha Levin <sashal@kernel.org>

Sun, 2 Jan 2022 21:57:04 +0000 (16:57 -0500)
author Sasha Levin <sashal@kernel.org>
Sun, 2 Jan 2022 21:57:04 +0000 (16:57 -0500)
committer Sasha Levin <sashal@kernel.org>
Sun, 2 Jan 2022 21:57:04 +0000 (16:57 -0500)
diff --git a/queue-5.15/drm-amd-display-send-s0i2_rdy-in-stream_count-0-opti.patch b/queue-5.15/drm-amd-display-send-s0i2_rdy-in-stream_count-0-opti.patch

new file mode 100644 (file)

index 0000000..0c274f3
--- /dev/null
+++ b/queue-5.15/drm-amd-display-send-s0i2_rdy-in-stream_count-0-opti.patch
@@ -0,0 +1,45 @@
+From 0e39974b65cf94a71260ccf06b0ebabd1dd77272 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Dec 2021 13:53:36 -0500
+Subject: drm/amd/display: Send s0i2_rdy in stream_count == 0 optimization
+
+From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+
+[ Upstream commit a07f8b9983543d465b50870ab4f845d4d710ed3f ]
+
+[Why]
+Otherwise SMU won't mark Display as idle when trying to perform s2idle.
+
+[How]
+Mark the bit in the dcn31 codepath, doesn't apply to older ASIC.
+
+It needed to be split from phy refclk off to prevent entering s2idle
+when PSR was engaged but driver was not ready.
+
+Fixes: 118a33151658 ("drm/amd/display: Add DCN3.1 clock manager support")
+
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Reviewed-by: Eric Yang <Eric.Yang2@amd.com>
+Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
+Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+index 377c4e53a2b37..407e19412a949 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+@@ -157,6 +157,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
+                               union display_idle_optimization_u idle_info = { 0 };
+                               idle_info.idle_info.df_request_disabled = 1;
+                               idle_info.idle_info.phy_ref_clk_off = 1;
++                              idle_info.idle_info.s0i2_rdy = 1;
+                               dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+                               /* update power state */
+                               clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+-- 
+2.34.1
+
diff --git a/queue-5.15/drm-amd-display-set-optimize_pwr_state-for-dcn31.patch b/queue-5.15/drm-amd-display-set-optimize_pwr_state-for-dcn31.patch

new file mode 100644 (file)

index 0000000..e18ac64
--- /dev/null
+++ b/queue-5.15/drm-amd-display-set-optimize_pwr_state-for-dcn31.patch
@@ -0,0 +1,47 @@
+From b82599716deab4c90ff6873216c3e073f05f827a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Dec 2021 16:05:36 -0500
+Subject: drm/amd/display: Set optimize_pwr_state for DCN31
+
+From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+
+[ Upstream commit 33735c1c8d0223170d79dbe166976d9cd7339c7a ]
+
+[Why]
+We'll exit optimized power state to do link detection but we won't enter
+back into the optimized power state.
+
+This could potentially block s2idle entry depending on the sequencing,
+but it also means we're losing some power during the transition period.
+
+[How]
+Hook up the handler like DCN21. It was also missed like the
+exit_optimized_pwr_state callback.
+
+Fixes: 64b1d0e8d500 ("drm/amd/display: Add DCN3.1 HWSEQ")
+
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Reviewed-by: Eric Yang <Eric.Yang2@amd.com>
+Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
+Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+index ac8fb202fd5ee..4e9fe090b770a 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+@@ -100,6 +100,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
+       .z10_save_init = dcn31_z10_save_init,
+       .is_abm_supported = dcn31_is_abm_supported,
+       .set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
++      .optimize_pwr_state = dcn21_optimize_pwr_state,
+       .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+       .update_visual_confirm_color = dcn20_update_visual_confirm_color,
+ };
+-- 
+2.34.1
+
diff --git a/queue-5.15/fsl-fman-fix-missing-put_device-call-in-fman_port_pr.patch b/queue-5.15/fsl-fman-fix-missing-put_device-call-in-fman_port_pr.patch

new file mode 100644 (file)

index 0000000..76a4c64
--- /dev/null
+++ b/queue-5.15/fsl-fman-fix-missing-put_device-call-in-fman_port_pr.patch
@@ -0,0 +1,82 @@
+From ffa66093d553c6b3f173dc76f2d4b89035d526f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Dec 2021 12:26:27 +0000
+Subject: fsl/fman: Fix missing put_device() call in fman_port_probe
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit bf2b09fedc17248b315f80fb249087b7d28a69a6 ]
+
+The reference taken by 'of_find_device_by_node()' must be released when
+not needed anymore.
+Add the corresponding 'put_device()' in the and error handling paths.
+
+Fixes: 18a6c85fcc78 ("fsl/fman: Add FMan Port Support")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fman/fman_port.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
+index d9baac0dbc7d0..4c9d05c45c033 100644
+--- a/drivers/net/ethernet/freescale/fman/fman_port.c
++++ b/drivers/net/ethernet/freescale/fman/fman_port.c
+@@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+       fman = dev_get_drvdata(&fm_pdev->dev);
+       if (!fman) {
+               err = -EINVAL;
+-              goto return_err;
++              goto put_device;
+       }
+ 
+       err = of_property_read_u32(port_node, "cell-index", &val);
+@@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+               dev_err(port->dev, "%s: reading cell-index for %pOF failed\n",
+                       __func__, port_node);
+               err = -EINVAL;
+-              goto return_err;
++              goto put_device;
+       }
+       port_id = (u8)val;
+       port->dts_params.id = port_id;
+@@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+       }  else {
+               dev_err(port->dev, "%s: Illegal port type\n", __func__);
+               err = -EINVAL;
+-              goto return_err;
++              goto put_device;
+       }
+ 
+       port->dts_params.type = port_type;
+@@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+                       dev_err(port->dev, "%s: incorrect qman-channel-id\n",
+                               __func__);
+                       err = -EINVAL;
+-                      goto return_err;
++                      goto put_device;
+               }
+               port->dts_params.qman_channel_id = qman_channel_id;
+       }
+@@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev)
+               dev_err(port->dev, "%s: of_address_to_resource() failed\n",
+                       __func__);
+               err = -ENOMEM;
+-              goto return_err;
++              goto put_device;
+       }
+ 
+       port->dts_params.fman = fman;
+@@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev)
+ 
+       return 0;
+ 
++put_device:
++      put_device(&fm_pdev->dev);
+ return_err:
+       of_node_put(port_node);
+ free_port:
+-- 
+2.34.1
+
diff --git a/queue-5.15/igc-do-not-enable-crosstimestamping-for-i225-v-model.patch b/queue-5.15/igc-do-not-enable-crosstimestamping-for-i225-v-model.patch

new file mode 100644 (file)

index 0000000..90893ef
--- /dev/null
+++ b/queue-5.15/igc-do-not-enable-crosstimestamping-for-i225-v-model.patch
@@ -0,0 +1,56 @@
+From 20ec2803317bb1fe0fd98e87d3accdfb82d14a3e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Dec 2021 16:39:49 -0800
+Subject: igc: Do not enable crosstimestamping for i225-V models
+
+From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+
+[ Upstream commit 1e81dcc1ab7de7a789e60042ce82d5a612632599 ]
+
+It was reported that when PCIe PTM is enabled, some lockups could
+be observed with some integrated i225-V models.
+
+While the issue is investigated, we can disable crosstimestamp for
+those models and see no loss of functionality, because those models
+don't have any support for time synchronization.
+
+Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()")
+Link: https://lore.kernel.org/all/924175a188159f4e03bd69908a91e606b574139b.camel@gmx.de/
+Reported-by: Stefan Dietrich <roots@gmx.de>
+Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Tested-by: Nechama Kraus <nechamax.kraus@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_ptp.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
+index 30568e3544cda..4f9245aa79a18 100644
+--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
++++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
+@@ -768,7 +768,20 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr)
+  */
+ static bool igc_is_crosststamp_supported(struct igc_adapter *adapter)
+ {
+-      return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false;
++      if (!IS_ENABLED(CONFIG_X86_TSC))
++              return false;
++
++      /* FIXME: it was noticed that enabling support for PCIe PTM in
++       * some i225-V models could cause lockups when bringing the
++       * interface up/down. There should be no downsides to
++       * disabling crosstimestamping support for i225-V, as it
++       * doesn't have any PTP support. That way we gain some time
++       * while root causing the issue.
++       */
++      if (adapter->pdev->device == IGC_DEV_ID_I225_V)
++              return false;
++
++      return pcie_ptm_enabled(adapter->pdev);
+ }
+ 
+ static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp)
+-- 
+2.34.1
+
diff --git a/queue-5.15/igc-fix-tx-timestamp-support-for-non-msi-x-platforms.patch b/queue-5.15/igc-fix-tx-timestamp-support-for-non-msi-x-platforms.patch

new file mode 100644 (file)

index 0000000..877e9cb
--- /dev/null
+++ b/queue-5.15/igc-fix-tx-timestamp-support-for-non-msi-x-platforms.patch
@@ -0,0 +1,48 @@
+From 505fabb777c6ffdfd7ef2606ded3361a6f3505d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Dec 2021 16:49:33 -0700
+Subject: igc: Fix TX timestamp support for non-MSI-X platforms
+
+From: James McLaughlin <james.mclaughlin@qsc.com>
+
+[ Upstream commit f85846bbf43de38fb2c89fe7d2a085608c4eb25a ]
+
+Time synchronization was not properly enabled on non-MSI-X platforms.
+
+Fixes: 2c344ae24501 ("igc: Add support for TX timestamping")
+Signed-off-by: James McLaughlin <james.mclaughlin@qsc.com>
+Reviewed-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Tested-by: Nechama Kraus <nechamax.kraus@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 0e19b4d02e628..0a96627391a8c 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -5466,6 +5466,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data)
+                       mod_timer(&adapter->watchdog_timer, jiffies + 1);
+       }
+ 
++      if (icr & IGC_ICR_TS)
++              igc_tsync_interrupt(adapter);
++
+       napi_schedule(&q_vector->napi);
+ 
+       return IRQ_HANDLED;
+@@ -5509,6 +5512,9 @@ static irqreturn_t igc_intr(int irq, void *data)
+                       mod_timer(&adapter->watchdog_timer, jiffies + 1);
+       }
+ 
++      if (icr & IGC_ICR_TS)
++              igc_tsync_interrupt(adapter);
++
+       napi_schedule(&q_vector->napi);
+ 
+       return IRQ_HANDLED;
+-- 
+2.34.1
+
diff --git a/queue-5.15/ionic-initialize-the-lif-dbid_inuse-bitmap.patch b/queue-5.15/ionic-initialize-the-lif-dbid_inuse-bitmap.patch

new file mode 100644 (file)

index 0000000..48a54c8
--- /dev/null
+++ b/queue-5.15/ionic-initialize-the-lif-dbid_inuse-bitmap.patch
@@ -0,0 +1,40 @@
+From 0ada5a1c5013662da138c2ad161989ef0703a083 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 26 Dec 2021 15:06:17 +0100
+Subject: ionic: Initialize the 'lif->dbid_inuse' bitmap
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit 140c7bc7d1195750342ea0e6ab76179499ae7cd7 ]
+
+When allocated, this bitmap is not initialized. Only the first bit is set a
+few lines below.
+
+Use bitmap_zalloc() to make sure that it is cleared before being used.
+
+Fixes: 6461b446f2a0 ("ionic: Add interrupts and doorbells")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Signed-off-by: Shannon Nelson <snelson@pensando.io>
+Link: https://lore.kernel.org/r/6a478eae0b5e6c63774e1f0ddb1a3f8c38fa8ade.1640527506.git.christophe.jaillet@wanadoo.fr
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+index 7f3322ce044c7..6ac507ddf09af 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+@@ -3283,7 +3283,7 @@ int ionic_lif_init(struct ionic_lif *lif)
+               return -EINVAL;
+       }
+ 
+-      lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL);
++      lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
+       if (!lif->dbid_inuse) {
+               dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
+               return -ENOMEM;
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-ag71xx-fix-a-potential-double-free-in-error-hand.patch b/queue-5.15/net-ag71xx-fix-a-potential-double-free-in-error-hand.patch

new file mode 100644 (file)

index 0000000..bae119a
--- /dev/null
+++ b/queue-5.15/net-ag71xx-fix-a-potential-double-free-in-error-hand.patch
@@ -0,0 +1,98 @@
+From 0378ead13b77522d1402aa2ebfdd3e9574dc2cf1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 26 Dec 2021 18:51:44 +0100
+Subject: net: ag71xx: Fix a potential double free in error handling paths
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit 1cd5384c88af5b59bf9f3b6c1a151bc14b88c2cd ]
+
+'ndev' is a managed resource allocated with devm_alloc_etherdev(), so there
+is no need to call free_netdev() explicitly or there will be a double
+free().
+
+Simplify all error handling paths accordingly.
+
+Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/atheros/ag71xx.c | 23 ++++++++---------------
+ 1 file changed, 8 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
+index 02ae98aabf91c..416a5c99db5a2 100644
+--- a/drivers/net/ethernet/atheros/ag71xx.c
++++ b/drivers/net/ethernet/atheros/ag71xx.c
+@@ -1915,15 +1915,12 @@ static int ag71xx_probe(struct platform_device *pdev)
+       ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac");
+       if (IS_ERR(ag->mac_reset)) {
+               netif_err(ag, probe, ndev, "missing mac reset\n");
+-              err = PTR_ERR(ag->mac_reset);
+-              goto err_free;
++              return PTR_ERR(ag->mac_reset);
+       }
+ 
+       ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+-      if (!ag->mac_base) {
+-              err = -ENOMEM;
+-              goto err_free;
+-      }
++      if (!ag->mac_base)
++              return -ENOMEM;
+ 
+       ndev->irq = platform_get_irq(pdev, 0);
+       err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
+@@ -1931,7 +1928,7 @@ static int ag71xx_probe(struct platform_device *pdev)
+       if (err) {
+               netif_err(ag, probe, ndev, "unable to request IRQ %d\n",
+                         ndev->irq);
+-              goto err_free;
++              return err;
+       }
+ 
+       ndev->netdev_ops = &ag71xx_netdev_ops;
+@@ -1959,10 +1956,8 @@ static int ag71xx_probe(struct platform_device *pdev)
+       ag->stop_desc = dmam_alloc_coherent(&pdev->dev,
+                                           sizeof(struct ag71xx_desc),
+                                           &ag->stop_desc_dma, GFP_KERNEL);
+-      if (!ag->stop_desc) {
+-              err = -ENOMEM;
+-              goto err_free;
+-      }
++      if (!ag->stop_desc)
++              return -ENOMEM;
+ 
+       ag->stop_desc->data = 0;
+       ag->stop_desc->ctrl = 0;
+@@ -1977,7 +1972,7 @@ static int ag71xx_probe(struct platform_device *pdev)
+       err = of_get_phy_mode(np, &ag->phy_if_mode);
+       if (err) {
+               netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
+-              goto err_free;
++              return err;
+       }
+ 
+       netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT);
+@@ -1985,7 +1980,7 @@ static int ag71xx_probe(struct platform_device *pdev)
+       err = clk_prepare_enable(ag->clk_eth);
+       if (err) {
+               netif_err(ag, probe, ndev, "Failed to enable eth clk.\n");
+-              goto err_free;
++              return err;
+       }
+ 
+       ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0);
+@@ -2021,8 +2016,6 @@ err_mdio_remove:
+       ag71xx_mdio_remove(ag);
+ err_put_clk:
+       clk_disable_unprepare(ag->clk_eth);
+-err_free:
+-      free_netdev(ndev);
+       return err;
+ }
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-bridge-mcast-add-and-enforce-query-interval-mini.patch b/queue-5.15/net-bridge-mcast-add-and-enforce-query-interval-mini.patch

new file mode 100644 (file)

index 0000000..416487e
--- /dev/null
+++ b/queue-5.15/net-bridge-mcast-add-and-enforce-query-interval-mini.patch
@@ -0,0 +1,122 @@
+From 7909cb874988801170bd4382d2b5e9cdcbfbf4ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Dec 2021 19:21:15 +0200
+Subject: net: bridge: mcast: add and enforce query interval minimum
+
+From: Nikolay Aleksandrov <nikolay@nvidia.com>
+
+[ Upstream commit 99b40610956a8a8755653a67392e2a8b772453be ]
+
+As reported[1] if query interval is set too low and we have multiple
+bridges or even a single bridge with multiple querier vlans configured
+we can crash the machine. Add a 1 second minimum which must be enforced
+by overwriting the value if set lower (i.e. without returning an error) to
+avoid breaking user-space. If that happens a log message is emitted to let
+the administrator know that the interval has been set to the minimum.
+The issue has been present since these intervals could be user-controlled.
+
+[1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/
+
+Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries")
+Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_multicast.c    | 16 ++++++++++++++++
+ net/bridge/br_netlink.c      |  2 +-
+ net/bridge/br_private.h      |  3 +++
+ net/bridge/br_sysfs_br.c     |  2 +-
+ net/bridge/br_vlan_options.c |  2 +-
+ 5 files changed, 22 insertions(+), 3 deletions(-)
+
+diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
+index f3d751105343c..998da4a2d2092 100644
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -4522,6 +4522,22 @@ int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx,
+ }
+ #endif
+ 
++void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx,
++                                unsigned long val)
++{
++      unsigned long intvl_jiffies = clock_t_to_jiffies(val);
++
++      if (intvl_jiffies < BR_MULTICAST_QUERY_INTVL_MIN) {
++              br_info(brmctx->br,
++                      "trying to set multicast query interval below minimum, setting to %lu (%ums)\n",
++                      jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MIN),
++                      jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MIN));
++              intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN;
++      }
++
++      brmctx->multicast_query_interval = intvl_jiffies;
++}
++
+ /**
+  * br_multicast_list_adjacent - Returns snooped multicast addresses
+  * @dev:      The bridge port adjacent to which to retrieve addresses
+diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
+index 5c6c4305ed235..09812df3bc91d 100644
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -1357,7 +1357,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
+       if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
+               u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
+ 
+-              br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
++              br_multicast_set_query_intvl(&br->multicast_ctx, val);
+       }
+ 
+       if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index fd5e7e74573ce..30c9411bfb646 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -28,6 +28,7 @@
+ #define BR_MAX_PORTS  (1<<BR_PORT_BITS)
+ 
+ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096
++#define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000)
+ 
+ #define BR_HWDOM_MAX BITS_PER_LONG
+ 
+@@ -968,6 +969,8 @@ int br_multicast_dump_querier_state(struct sk_buff *skb,
+                                   int nest_attr);
+ size_t br_multicast_querier_state_size(void);
+ size_t br_rports_size(const struct net_bridge_mcast *brmctx);
++void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx,
++                                unsigned long val);
+ 
+ static inline bool br_group_is_l2(const struct br_ip *group)
+ {
+diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
+index d9a89ddd03310..f5bd1114a434d 100644
+--- a/net/bridge/br_sysfs_br.c
++++ b/net/bridge/br_sysfs_br.c
+@@ -658,7 +658,7 @@ static ssize_t multicast_query_interval_show(struct device *d,
+ static int set_query_interval(struct net_bridge *br, unsigned long val,
+                             struct netlink_ext_ack *extack)
+ {
+-      br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
++      br_multicast_set_query_intvl(&br->multicast_ctx, val);
+       return 0;
+ }
+ 
+diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
+index 8ffd4ed2563c6..bf1ac08742794 100644
+--- a/net/bridge/br_vlan_options.c
++++ b/net/bridge/br_vlan_options.c
+@@ -521,7 +521,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br,
+               u64 val;
+ 
+               val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]);
+-              v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
++              br_multicast_set_query_intvl(&v->br_mcast_ctx, val);
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) {
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-bridge-mcast-add-and-enforce-startup-query-inter.patch b/queue-5.15/net-bridge-mcast-add-and-enforce-startup-query-inter.patch

new file mode 100644 (file)

index 0000000..477e39a
--- /dev/null
+++ b/queue-5.15/net-bridge-mcast-add-and-enforce-startup-query-inter.patch
@@ -0,0 +1,125 @@
+From 8789201d2750332a0b4b18aa7a8fd2e2459b3de6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Dec 2021 19:21:16 +0200
+Subject: net: bridge: mcast: add and enforce startup query interval minimum
+
+From: Nikolay Aleksandrov <nikolay@nvidia.com>
+
+[ Upstream commit f83a112bd91a494cdee671aec74e777470fb4a07 ]
+
+As reported[1] if startup query interval is set too low in combination with
+large number of startup queries and we have multiple bridges or even a
+single bridge with multiple querier vlans configured we can crash the
+machine. Add a 1 second minimum which must be enforced by overwriting the
+value if set lower (i.e. without returning an error) to avoid breaking
+user-space. If that happens a log message is emitted to let the admin know
+that the startup interval has been set to the minimum. It doesn't make
+sense to make the startup interval lower than the normal query interval
+so use the same value of 1 second. The issue has been present since these
+intervals could be user-controlled.
+
+[1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/
+
+Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries")
+Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_multicast.c    | 16 ++++++++++++++++
+ net/bridge/br_netlink.c      |  2 +-
+ net/bridge/br_private.h      |  3 +++
+ net/bridge/br_sysfs_br.c     |  2 +-
+ net/bridge/br_vlan_options.c |  2 +-
+ 5 files changed, 22 insertions(+), 3 deletions(-)
+
+diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
+index 998da4a2d2092..de24098894897 100644
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -4538,6 +4538,22 @@ void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx,
+       brmctx->multicast_query_interval = intvl_jiffies;
+ }
+ 
++void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx,
++                                        unsigned long val)
++{
++      unsigned long intvl_jiffies = clock_t_to_jiffies(val);
++
++      if (intvl_jiffies < BR_MULTICAST_STARTUP_QUERY_INTVL_MIN) {
++              br_info(brmctx->br,
++                      "trying to set multicast startup query interval below minimum, setting to %lu (%ums)\n",
++                      jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN),
++                      jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN));
++              intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN;
++      }
++
++      brmctx->multicast_startup_query_interval = intvl_jiffies;
++}
++
+ /**
+  * br_multicast_list_adjacent - Returns snooped multicast addresses
+  * @dev:      The bridge port adjacent to which to retrieve addresses
+diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
+index 09812df3bc91d..e365cf82f0615 100644
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -1369,7 +1369,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
+       if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
+               u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
+ 
+-              br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
++              br_multicast_set_startup_query_intvl(&br->multicast_ctx, val);
+       }
+ 
+       if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index 30c9411bfb646..5951e3142fe94 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -29,6 +29,7 @@
+ 
+ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096
+ #define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000)
++#define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN
+ 
+ #define BR_HWDOM_MAX BITS_PER_LONG
+ 
+@@ -971,6 +972,8 @@ size_t br_multicast_querier_state_size(void);
+ size_t br_rports_size(const struct net_bridge_mcast *brmctx);
+ void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx,
+                                 unsigned long val);
++void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx,
++                                        unsigned long val);
+ 
+ static inline bool br_group_is_l2(const struct br_ip *group)
+ {
+diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
+index f5bd1114a434d..7b0c19772111c 100644
+--- a/net/bridge/br_sysfs_br.c
++++ b/net/bridge/br_sysfs_br.c
+@@ -706,7 +706,7 @@ static ssize_t multicast_startup_query_interval_show(
+ static int set_startup_query_interval(struct net_bridge *br, unsigned long val,
+                                     struct netlink_ext_ack *extack)
+ {
+-      br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
++      br_multicast_set_startup_query_intvl(&br->multicast_ctx, val);
+       return 0;
+ }
+ 
+diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
+index bf1ac08742794..a6382973b3e70 100644
+--- a/net/bridge/br_vlan_options.c
++++ b/net/bridge/br_vlan_options.c
+@@ -535,7 +535,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br,
+               u64 val;
+ 
+               val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]);
+-              v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
++              br_multicast_set_startup_query_intvl(&v->br_mcast_ctx, val);
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) {
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-bridge-mcast-fix-br_multicast_ctx_vlan_global_di.patch b/queue-5.15/net-bridge-mcast-fix-br_multicast_ctx_vlan_global_di.patch

new file mode 100644 (file)

index 0000000..e9ab61d
--- /dev/null
+++ b/queue-5.15/net-bridge-mcast-fix-br_multicast_ctx_vlan_global_di.patch
@@ -0,0 +1,43 @@
+From ff9646f81b11c79f524685fab51204ca0f2ec1fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Dec 2021 17:31:42 +0200
+Subject: net: bridge: mcast: fix br_multicast_ctx_vlan_global_disabled helper
+
+From: Nikolay Aleksandrov <nikolay@nvidia.com>
+
+[ Upstream commit 168fed986b3a7ec7b98cab1fe84e2f282b9e6a8f ]
+
+We need to first check if the context is a vlan one, then we need to
+check the global bridge multicast vlan snooping flag, and finally the
+vlan's multicast flag, otherwise we will unnecessarily enable vlan mcast
+processing (e.g. querier timers).
+
+Fixes: 7b54aaaf53cb ("net: bridge: multicast: add vlan state initialization and control")
+Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Link: https://lore.kernel.org/r/20211228153142.536969-1-nikolay@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_private.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index 5951e3142fe94..bd218c2b2cd97 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -1158,9 +1158,9 @@ br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx)
+ static inline bool
+ br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx)
+ {
+-      return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+-             br_multicast_ctx_is_vlan(brmctx) &&
+-             !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED);
++      return br_multicast_ctx_is_vlan(brmctx) &&
++             (!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) ||
++              !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED));
+ }
+ 
+ static inline bool
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-lantiq_xrx200-fix-statistics-of-received-bytes.patch b/queue-5.15/net-lantiq_xrx200-fix-statistics-of-received-bytes.patch

new file mode 100644 (file)

index 0000000..f92b7e1
--- /dev/null
+++ b/queue-5.15/net-lantiq_xrx200-fix-statistics-of-received-bytes.patch
@@ -0,0 +1,36 @@
+From e1ccda59b6c9b7cfa873b5a48df06eafb26257ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Dec 2021 17:22:03 +0100
+Subject: net: lantiq_xrx200: fix statistics of received bytes
+
+From: Aleksander Jan Bajkowski <olek2@wp.pl>
+
+[ Upstream commit 5be60a945329d82f06fc755a43eeefbfc5f77d72 ]
+
+Received frames have FCS truncated. There is no need
+to subtract FCS length from the statistics.
+
+Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver")
+Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/lantiq_xrx200.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
+index fb78f17d734fe..b02f796b5422f 100644
+--- a/drivers/net/ethernet/lantiq_xrx200.c
++++ b/drivers/net/ethernet/lantiq_xrx200.c
+@@ -209,7 +209,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
+       skb->protocol = eth_type_trans(skb, net_dev);
+       netif_receive_skb(skb);
+       net_dev->stats.rx_packets++;
+-      net_dev->stats.rx_bytes += len - ETH_FCS_LEN;
++      net_dev->stats.rx_bytes += len;
+ 
+       return 0;
+ }
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5-dr-fix-null-vs-is_err-checking-in-dr_domain.patch b/queue-5.15/net-mlx5-dr-fix-null-vs-is_err-checking-in-dr_domain.patch

new file mode 100644 (file)

index 0000000..1996f16
--- /dev/null
+++ b/queue-5.15/net-mlx5-dr-fix-null-vs-is_err-checking-in-dr_domain.patch
@@ -0,0 +1,47 @@
+From 6a805c457e7dc9698bf878c313367b9e811d14fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 Dec 2021 06:54:53 +0000
+Subject: net/mlx5: DR, Fix NULL vs IS_ERR checking in dr_domain_init_resources
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit 6b8b42585886c59a008015083282aae434349094 ]
+
+The mlx5_get_uars_page() function  returns error pointers.
+Using IS_ERR() to check the return value to fix this.
+
+Fixes: 4ec9e7b02697 ("net/mlx5: DR, Expose steering domain functionality")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+index 0fe159809ba15..ea1b8ca5bf3aa 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+@@ -2,6 +2,7 @@
+ /* Copyright (c) 2019 Mellanox Technologies. */
+ 
+ #include <linux/mlx5/eswitch.h>
++#include <linux/err.h>
+ #include "dr_types.h"
+ 
+ #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type)        \
+@@ -75,9 +76,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
+       }
+ 
+       dmn->uar = mlx5_get_uars_page(dmn->mdev);
+-      if (!dmn->uar) {
++      if (IS_ERR(dmn->uar)) {
+               mlx5dr_err(dmn, "Couldn't allocate UAR\n");
+-              ret = -ENOMEM;
++              ret = PTR_ERR(dmn->uar);
+               goto clean_pd;
+       }
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5-fix-error-print-in-case-of-irq-request-fail.patch b/queue-5.15/net-mlx5-fix-error-print-in-case-of-irq-request-fail.patch

new file mode 100644 (file)

index 0000000..f56df2f
--- /dev/null
+++ b/queue-5.15/net-mlx5-fix-error-print-in-case-of-irq-request-fail.patch
@@ -0,0 +1,43 @@
+From 3aebafe5fba999948f7fe6a958d7a0bc5476c69d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Nov 2021 23:10:57 +0200
+Subject: net/mlx5: Fix error print in case of IRQ request failed
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit aa968f922039706f6d13e8870b49e424d0a8d9ad ]
+
+In case IRQ layer failed to find or to request irq, the driver is
+printing the first cpu of the provided affinity as part of the error
+print. Empty affinity is a valid input for the IRQ layer, and it is
+an error to call cpumask_first() on empty affinity.
+
+Remove the first cpu print from the error message.
+
+Fixes: c36326d38d93 ("net/mlx5: Round-Robin EQs over IRQs")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+index 763c83a023809..11f3649fdaab1 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+@@ -346,8 +346,8 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
+       new_irq = irq_pool_create_irq(pool, affinity);
+       if (IS_ERR(new_irq)) {
+               if (!least_loaded_irq) {
+-                      mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n",
+-                                    cpumask_first(affinity));
++                      mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
++                                    PTR_ERR(new_irq));
+                       mutex_unlock(&pool->lock);
+                       return new_irq;
+               }
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5-fix-sf-health-recovery-flow.patch b/queue-5.15/net-mlx5-fix-sf-health-recovery-flow.patch

new file mode 100644 (file)

index 0000000..c36f92f
--- /dev/null
+++ b/queue-5.15/net-mlx5-fix-sf-health-recovery-flow.patch
@@ -0,0 +1,60 @@
+From 663b1c674df8f0094d7b23fbd8980d509f3da3a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Nov 2021 20:08:13 +0200
+Subject: net/mlx5: Fix SF health recovery flow
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+[ Upstream commit 33de865f7bce3968676e43b0182af0a2dd359dae ]
+
+SF do not directly control the PCI device. During recovery flow SF
+should not be allowed to do pci disable or pci reset, its PF will do it.
+
+It fixes the following kernel trace:
+mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:387:(pid 40948): starting health recovery flow
+mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called
+mlx5_core 0000:03:00.0: wait vital counter value 0xab175 after 1 iterations
+mlx5_core.sf mlx5_core.sf.25: firmware version: 24.32.532
+mlx5_core.sf mlx5_core.sf.23: mlx5_health_try_recover:387:(pid 40946): starting health recovery flow
+mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called
+mlx5_core 0000:03:00.0: wait vital counter value 0xab193 after 1 iterations
+mlx5_core.sf mlx5_core.sf.23: firmware version: 24.32.532
+mlx5_core.sf mlx5_core.sf.25: mlx5_cmd_check:813:(pid 40948): ENABLE_HCA(0x104) op_mod(0x0) failed,
+status bad resource state(0x9), syndrome (0x658908)
+mlx5_core.sf mlx5_core.sf.25: mlx5_function_setup:1292:(pid 40948): enable hca failed
+mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:389:(pid 40948): health recovery failed
+
+Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver")
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 92b08fa07efae..92b01858d7f3e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1775,12 +1775,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
+ 
+ int mlx5_recover_device(struct mlx5_core_dev *dev)
+ {
+-      int ret = -EIO;
++      if (!mlx5_core_is_sf(dev)) {
++              mlx5_pci_disable_device(dev);
++              if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED)
++                      return -EIO;
++      }
+ 
+-      mlx5_pci_disable_device(dev);
+-      if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
+-              ret = mlx5_load_one(dev);
+-      return ret;
++      return mlx5_load_one(dev);
+ }
+ 
+ static struct pci_driver mlx5_core_driver = {
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5-fix-tc-max-supported-prio-for-nic-mode.patch b/queue-5.15/net-mlx5-fix-tc-max-supported-prio-for-nic-mode.patch

new file mode 100644 (file)

index 0000000..4159aaa
--- /dev/null
+++ b/queue-5.15/net-mlx5-fix-tc-max-supported-prio-for-nic-mode.patch
@@ -0,0 +1,39 @@
+From 023f1b3301047804e171b5ff57102010f8e8ec9c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Dec 2021 03:52:53 +0200
+Subject: net/mlx5: Fix tc max supported prio for nic mode
+
+From: Chris Mi <cmi@nvidia.com>
+
+[ Upstream commit d671e109bd8548d067b27e39e183a484430bf102 ]
+
+Only prio 1 is supported if firmware doesn't support ignore flow
+level for nic mode. The offending commit removed the check wrongly.
+Add it back.
+
+Fixes: 9a99c8f1253a ("net/mlx5e: E-Switch, Offload all chain 0 priorities when modify header and forward action is not supported")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+index 97e5845b4cfdd..d5e47630e2849 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
+ 
+ u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
+ {
++      if (!mlx5_chains_prios_supported(chains))
++              return 1;
++
+       if (mlx5_chains_ignore_flow_level_supported(chains))
+               return UINT_MAX;
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5e-delete-forward-rule-for-ct-or-sample-actio.patch b/queue-5.15/net-mlx5e-delete-forward-rule-for-ct-or-sample-actio.patch

new file mode 100644 (file)

index 0000000..a516d85
--- /dev/null
+++ b/queue-5.15/net-mlx5e-delete-forward-rule-for-ct-or-sample-actio.patch
@@ -0,0 +1,60 @@
+From c222de48a8bac56cbad50b9d9baf44208f3e6bb3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Dec 2021 11:18:02 +0800
+Subject: net/mlx5e: Delete forward rule for ct or sample action
+
+From: Chris Mi <cmi@nvidia.com>
+
+[ Upstream commit 2820110d945923ab2f4901753e4ccbb2a506fa8e ]
+
+When there is ct or sample action, the ct or sample rule will be deleted
+and return. But if there is an extra mirror action, the forward rule can't
+be deleted because of the return.
+
+Fix it by removing the return.
+
+Fixes: 69e2916ebce4 ("net/mlx5: CT: Add support for mirroring")
+Fixes: f94d6389f6a8 ("net/mlx5e: TC, Add support to offload sample action")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 17 ++++++-----------
+ 1 file changed, 6 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index e7736421d1bc2..fa461bc57baee 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1179,21 +1179,16 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+       if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
+               goto offload_rule_0;
+ 
+-      if (flow_flag_test(flow, CT)) {
+-              mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+-              return;
+-      }
+-
+-      if (flow_flag_test(flow, SAMPLE)) {
+-              mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
+-              return;
+-      }
+-
+       if (attr->esw_attr->split_count)
+               mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
+ 
++      if (flow_flag_test(flow, CT))
++              mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
++      else if (flow_flag_test(flow, SAMPLE))
++              mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
++      else
+ offload_rule_0:
+-      mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
++              mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ }
+ 
+ struct mlx5_flow_handle *
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5e-fix-icosq-recovery-flow-for-xsk.patch b/queue-5.15/net-mlx5e-fix-icosq-recovery-flow-for-xsk.patch

new file mode 100644 (file)

index 0000000..44538f1
--- /dev/null
+++ b/queue-5.15/net-mlx5e-fix-icosq-recovery-flow-for-xsk.patch
@@ -0,0 +1,126 @@
+From 90287a4b22d88a87325567122dd796214cb82386 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 Jul 2020 16:32:44 +0300
+Subject: net/mlx5e: Fix ICOSQ recovery flow for XSK
+
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+
+[ Upstream commit 19c4aba2d4e23997061fb11aed8a3e41334bfa14 ]
+
+There are two ICOSQs per channel: one is needed for RX, and the other
+for async operations (XSK TX, kTLS offload). Currently, the recovery
+flow for both is the same, and async ICOSQ is mistakenly treated like
+the regular ICOSQ.
+
+This patch prevents running the regular ICOSQ recovery on async ICOSQ.
+The purpose of async ICOSQ is to handle XSK wakeup requests and post
+kTLS offload RX parameters, it has nothing to do with RQ and XSKRQ UMRs,
+so the regular recovery sequence is not applicable here.
+
+Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ")
+Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
+Reviewed-by: Aya Levin <ayal@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h  |  3 --
+ .../net/ethernet/mellanox/mlx5/core/en_main.c | 30 ++++++++++++++-----
+ 2 files changed, 22 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+index 54757117071db..7204bc86e4741 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -956,9 +956,6 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param);
+ void mlx5e_destroy_rq(struct mlx5e_rq *rq);
+ 
+ struct mlx5e_sq_param;
+-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+-                   struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
+-void mlx5e_close_icosq(struct mlx5e_icosq *sq);
+ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+                    struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
+                    struct mlx5e_xdpsq *sq, bool is_redirect);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 611c8a0cbf4f0..685fe77de5ce4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -1036,9 +1036,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
+       mlx5e_reporter_icosq_cqe_err(sq);
+ }
+ 
++static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work)
++{
++      struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
++                                            recover_work);
++
++      /* Not implemented yet. */
++
++      netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n");
++}
++
+ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
+                            struct mlx5e_sq_param *param,
+-                           struct mlx5e_icosq *sq)
++                           struct mlx5e_icosq *sq,
++                           work_func_t recover_work_func)
+ {
+       void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
+       struct mlx5_core_dev *mdev = c->mdev;
+@@ -1059,7 +1070,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
+       if (err)
+               goto err_sq_wq_destroy;
+ 
+-      INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
++      INIT_WORK(&sq->recover_work, recover_work_func);
+ 
+       return 0;
+ 
+@@ -1397,13 +1408,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
+       mlx5e_reporter_tx_err_cqe(sq);
+ }
+ 
+-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+-                   struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
++static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
++                          struct mlx5e_sq_param *param, struct mlx5e_icosq *sq,
++                          work_func_t recover_work_func)
+ {
+       struct mlx5e_create_sq_param csp = {};
+       int err;
+ 
+-      err = mlx5e_alloc_icosq(c, param, sq);
++      err = mlx5e_alloc_icosq(c, param, sq, recover_work_func);
+       if (err)
+               return err;
+ 
+@@ -1442,7 +1454,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
+       synchronize_net(); /* Sync with NAPI. */
+ }
+ 
+-void mlx5e_close_icosq(struct mlx5e_icosq *sq)
++static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+ {
+       struct mlx5e_channel *c = sq->channel;
+ 
+@@ -1869,13 +1881,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
+ 
+       spin_lock_init(&c->async_icosq_lock);
+ 
+-      err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq);
++      err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq,
++                             mlx5e_async_icosq_err_cqe_work);
+       if (err)
+               goto err_close_xdpsq_cq;
+ 
+       mutex_init(&c->icosq_recovery_lock);
+ 
+-      err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
++      err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq,
++                             mlx5e_icosq_err_cqe_work);
+       if (err)
+               goto err_close_async_icosq;
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5e-fix-interoperability-between-xsk-and-icosq.patch b/queue-5.15/net-mlx5e-fix-interoperability-between-xsk-and-icosq.patch

new file mode 100644 (file)

index 0000000..a2714a7
--- /dev/null
+++ b/queue-5.15/net-mlx5e-fix-interoperability-between-xsk-and-icosq.patch
@@ -0,0 +1,219 @@
+From 84708c3fb67cacc68517e97e053cfe59c4848699 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Oct 2021 19:40:09 +0300
+Subject: net/mlx5e: Fix interoperability between XSK and ICOSQ recovery flow
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+[ Upstream commit 17958d7cd731b977ae7d4af38d891c3a1235b5f1 ]
+
+Both regular RQ and XSKRQ use the same ICOSQ for UMRs. When doing
+recovery for the ICOSQ, don't forget to deactivate XSKRQ.
+
+XSK can be opened and closed while channels are active, so a new mutex
+prevents the ICOSQ recovery from running at the same time. The ICOSQ
+recovery deactivates and reactivates XSKRQ, so any parallel change in
+XSK state would break consistency. As the regular RQ is running, it's
+not enough to just flush the recovery work, because it can be
+rescheduled.
+
+Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h  |  2 ++
+ .../ethernet/mellanox/mlx5/core/en/health.h   |  2 ++
+ .../mellanox/mlx5/core/en/reporter_rx.c       | 35 ++++++++++++++++++-
+ .../mellanox/mlx5/core/en/xsk/setup.c         | 16 ++++++++-
+ .../net/ethernet/mellanox/mlx5/core/en_main.c |  7 ++--
+ 5 files changed, 58 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+index c10a107a3ea53..54757117071db 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -727,6 +727,8 @@ struct mlx5e_channel {
+       DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
+       int                        ix;
+       int                        cpu;
++      /* Sync between icosq recovery and XSK enable/disable. */
++      struct mutex               icosq_recovery_lock;
+ };
+ 
+ struct mlx5e_ptp;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+index 018262d0164b3..3aaf3c2752feb 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+@@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
+ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+ void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
+ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
++void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c);
++void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c);
+ 
+ #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
+ #define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+index 0eb125316fe20..e329158fdc555 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+@@ -59,6 +59,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
+ 
+ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+ {
++      struct mlx5e_rq *xskrq = NULL;
+       struct mlx5_core_dev *mdev;
+       struct mlx5e_icosq *icosq;
+       struct net_device *dev;
+@@ -67,7 +68,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+       int err;
+ 
+       icosq = ctx;
++
++      mutex_lock(&icosq->channel->icosq_recovery_lock);
++
++      /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
+       rq = &icosq->channel->rq;
++      if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
++              xskrq = &icosq->channel->xskrq;
+       mdev = icosq->channel->mdev;
+       dev = icosq->channel->netdev;
+       err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
+@@ -81,6 +88,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+               goto out;
+ 
+       mlx5e_deactivate_rq(rq);
++      if (xskrq)
++              mlx5e_deactivate_rq(xskrq);
++
+       err = mlx5e_wait_for_icosq_flush(icosq);
+       if (err)
+               goto out;
+@@ -94,15 +104,28 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
+               goto out;
+ 
+       mlx5e_reset_icosq_cc_pc(icosq);
++
+       mlx5e_free_rx_in_progress_descs(rq);
++      if (xskrq)
++              mlx5e_free_rx_in_progress_descs(xskrq);
++
+       clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+       mlx5e_activate_icosq(icosq);
+-      mlx5e_activate_rq(rq);
+ 
++      mlx5e_activate_rq(rq);
+       rq->stats->recover++;
++
++      if (xskrq) {
++              mlx5e_activate_rq(xskrq);
++              xskrq->stats->recover++;
++      }
++
++      mutex_unlock(&icosq->channel->icosq_recovery_lock);
++
+       return 0;
+ out:
+       clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
++      mutex_unlock(&icosq->channel->icosq_recovery_lock);
+       return err;
+ }
+ 
+@@ -703,6 +726,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
+       mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+ }
+ 
++void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
++{
++      mutex_lock(&c->icosq_recovery_lock);
++}
++
++void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
++{
++      mutex_unlock(&c->icosq_recovery_lock);
++}
++
+ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
+       .name = "rx",
+       .recover = mlx5e_rx_reporter_recover,
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+index 538bc2419bd83..8526a5fbbf0bf 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+@@ -4,6 +4,7 @@
+ #include "setup.h"
+ #include "en/params.h"
+ #include "en/txrx.h"
++#include "en/health.h"
+ 
+ /* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may
+  * change unexpectedly, and mlx5e has a minimum valid stride size for striding
+@@ -170,7 +171,13 @@ void mlx5e_close_xsk(struct mlx5e_channel *c)
+ 
+ void mlx5e_activate_xsk(struct mlx5e_channel *c)
+ {
++      /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid
++       * activating XSKRQ in the middle of recovery.
++       */
++      mlx5e_reporter_icosq_suspend_recovery(c);
+       set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
++      mlx5e_reporter_icosq_resume_recovery(c);
++
+       /* TX queue is created active. */
+ 
+       spin_lock_bh(&c->async_icosq_lock);
+@@ -180,6 +187,13 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c)
+ 
+ void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
+ {
+-      mlx5e_deactivate_rq(&c->xskrq);
++      /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the
++       * middle of recovery. Suspend the recovery to avoid it.
++       */
++      mlx5e_reporter_icosq_suspend_recovery(c);
++      clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
++      mlx5e_reporter_icosq_resume_recovery(c);
++      synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
++
+       /* TX queue is disabled on close. */
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 8cf5fbebd674b..611c8a0cbf4f0 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -911,8 +911,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+ void mlx5e_close_rq(struct mlx5e_rq *rq)
+ {
+       cancel_work_sync(&rq->dim.work);
+-      if (rq->icosq)
+-              cancel_work_sync(&rq->icosq->recover_work);
+       cancel_work_sync(&rq->recover_work);
+       mlx5e_destroy_rq(rq);
+       mlx5e_free_rx_descs(rq);
+@@ -1875,6 +1873,8 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
+       if (err)
+               goto err_close_xdpsq_cq;
+ 
++      mutex_init(&c->icosq_recovery_lock);
++
+       err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
+       if (err)
+               goto err_close_async_icosq;
+@@ -1943,9 +1943,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c)
+       mlx5e_close_xdpsq(&c->xdpsq);
+       if (c->xdp)
+               mlx5e_close_xdpsq(&c->rq_xdpsq);
++      /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */
++      cancel_work_sync(&c->icosq.recover_work);
+       mlx5e_close_rq(&c->rq);
+       mlx5e_close_sqs(c);
+       mlx5e_close_icosq(&c->icosq);
++      mutex_destroy(&c->icosq_recovery_lock);
+       mlx5e_close_icosq(&c->async_icosq);
+       if (c->xdp)
+               mlx5e_close_cq(&c->rq_xdpsq.cq);
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5e-fix-wrong-features-assignment-in-case-of-e.patch b/queue-5.15/net-mlx5e-fix-wrong-features-assignment-in-case-of-e.patch

new file mode 100644 (file)

index 0000000..eddd204
--- /dev/null
+++ b/queue-5.15/net-mlx5e-fix-wrong-features-assignment-in-case-of-e.patch
@@ -0,0 +1,86 @@
+From 28d18fe129915b757906f23fc095ab72b1903856 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 11:08:41 +0200
+Subject: net/mlx5e: Fix wrong features assignment in case of error
+
+From: Gal Pressman <gal@nvidia.com>
+
+[ Upstream commit 992d8a4e38f0527f24e273ce3a9cd6dea1a6a436 ]
+
+In case of an error in mlx5e_set_features(), 'netdev->features' must be
+updated with the correct state of the device to indicate which features
+were updated successfully.
+To do that we maintain a copy of 'netdev->features' and update it after
+successful feature changes, so we can assign it to back to
+'netdev->features' if needed.
+
+However, since not all netdev features are handled by the driver (e.g.
+GRO/TSO/etc), some features may not be updated correctly in case of an
+error updating another feature.
+
+For example, while requesting to disable TSO (feature which is not
+handled by the driver) and enable HW-GRO, if an error occurs during
+HW-GRO enable, 'oper_features' will be assigned with 'netdev->features'
+and HW-GRO turned off. TSO will remain enabled in such case, which is a
+bug.
+
+To solve that, instead of using 'netdev->features' as the baseline of
+'oper_features' and changing it on set feature success, use 'features'
+instead and update it in case of errors.
+
+Fixes: 75b81ce719b7 ("net/mlx5e: Don't override netdev features field unless in error flow")
+Signed-off-by: Gal Pressman <gal@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 685fe77de5ce4..baa0d7d48fc0c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -3450,12 +3450,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
+ 
+ static int mlx5e_handle_feature(struct net_device *netdev,
+                               netdev_features_t *features,
+-                              netdev_features_t wanted_features,
+                               netdev_features_t feature,
+                               mlx5e_feature_handler feature_handler)
+ {
+-      netdev_features_t changes = wanted_features ^ netdev->features;
+-      bool enable = !!(wanted_features & feature);
++      netdev_features_t changes = *features ^ netdev->features;
++      bool enable = !!(*features & feature);
+       int err;
+ 
+       if (!(changes & feature))
+@@ -3463,22 +3462,22 @@ static int mlx5e_handle_feature(struct net_device *netdev,
+ 
+       err = feature_handler(netdev, enable);
+       if (err) {
++              MLX5E_SET_FEATURE(features, feature, !enable);
+               netdev_err(netdev, "%s feature %pNF failed, err %d\n",
+                          enable ? "Enable" : "Disable", &feature, err);
+               return err;
+       }
+ 
+-      MLX5E_SET_FEATURE(features, feature, enable);
+       return 0;
+ }
+ 
+ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
+ {
+-      netdev_features_t oper_features = netdev->features;
++      netdev_features_t oper_features = features;
+       int err = 0;
+ 
+ #define MLX5E_HANDLE_FEATURE(feature, handler) \
+-      mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
++      mlx5e_handle_feature(netdev, &oper_features, feature, handler)
+ 
+       err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+       err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5e-use-tc-sample-stubs-instead-of-ifdefs-in-s.patch b/queue-5.15/net-mlx5e-use-tc-sample-stubs-instead-of-ifdefs-in-s.patch

new file mode 100644 (file)

index 0000000..bfe4e90
--- /dev/null
+++ b/queue-5.15/net-mlx5e-use-tc-sample-stubs-instead-of-ifdefs-in-s.patch
@@ -0,0 +1,157 @@
+From e35babfd02149aabcf23a23de8b0e1a88219fd42 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 Aug 2021 13:33:17 +0300
+Subject: net/mlx5e: Use tc sample stubs instead of ifdefs in source file
+
+From: Roi Dayan <roid@nvidia.com>
+
+[ Upstream commit f3e02e479debb37777696c9f984f75152beeb56d ]
+
+Instead of having sparse ifdefs in source files use a single
+ifdef in the tc sample header file and use stubs.
+
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlx5/core/en/rep/tc.c   |  2 --
+ .../mellanox/mlx5/core/en/tc/sample.h         | 27 +++++++++++++++++++
+ .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 12 ---------
+ 3 files changed, 27 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+index de03684528bbf..8451940c16ab9 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+@@ -647,9 +647,7 @@ static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *sk
+                          "Failed to restore tunnel info for sampled packet\n");
+               return;
+       }
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+       mlx5e_tc_sample_skb(skb, mapped_obj);
+-#endif /* CONFIG_MLX5_TC_SAMPLE */
+       mlx5_rep_tc_post_napi_receive(tc_priv);
+ }
+ 
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
+index db0146df9b303..9ef8a49d78014 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
+@@ -19,6 +19,8 @@ struct mlx5e_sample_attr {
+       struct mlx5e_sample_flow *sample_flow;
+ };
+ 
++#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
++
+ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
+ 
+ struct mlx5_flow_handle *
+@@ -38,4 +40,29 @@ mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act);
+ void
+ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
+ 
++#else /* CONFIG_MLX5_TC_SAMPLE */
++
++static inline struct mlx5_flow_handle *
++mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
++                      struct mlx5_flow_spec *spec,
++                      struct mlx5_flow_attr *attr,
++                      u32 tunnel_id)
++{ return ERR_PTR(-EOPNOTSUPP); }
++
++static inline void
++mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
++                        struct mlx5_flow_handle *rule,
++                        struct mlx5_flow_attr *attr) {}
++
++static inline struct mlx5e_tc_psample *
++mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
++{ return ERR_PTR(-EOPNOTSUPP); }
++
++static inline void
++mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) {}
++
++static inline void
++mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) {}
++
++#endif /* CONFIG_MLX5_TC_SAMPLE */
+ #endif /* __MLX5_EN_TC_SAMPLE_H__ */
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index e3b320b6d85b9..e7736421d1bc2 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -248,7 +248,6 @@ get_ct_priv(struct mlx5e_priv *priv)
+       return priv->fs.tc.ct;
+ }
+ 
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+ static struct mlx5e_tc_psample *
+ get_sample_priv(struct mlx5e_priv *priv)
+ {
+@@ -265,7 +264,6 @@ get_sample_priv(struct mlx5e_priv *priv)
+ 
+       return NULL;
+ }
+-#endif
+ 
+ struct mlx5_flow_handle *
+ mlx5_tc_rule_insert(struct mlx5e_priv *priv,
+@@ -1148,11 +1146,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+               rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
+                                              flow, spec, attr,
+                                              mod_hdr_acts);
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+       } else if (flow_flag_test(flow, SAMPLE)) {
+               rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
+                                              mlx5e_tc_get_flow_tun_id(flow));
+-#endif
+       } else {
+               rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+       }
+@@ -1188,12 +1184,10 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+               return;
+       }
+ 
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+       if (flow_flag_test(flow, SAMPLE)) {
+               mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
+               return;
+       }
+-#endif
+ 
+       if (attr->esw_attr->split_count)
+               mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
+@@ -5014,9 +5008,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
+                                              MLX5_FLOW_NAMESPACE_FDB,
+                                              uplink_priv->post_act);
+ 
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+       uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
+-#endif
+ 
+       mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+ 
+@@ -5060,9 +5052,7 @@ err_ht_init:
+ err_enc_opts_mapping:
+       mapping_destroy(uplink_priv->tunnel_mapping);
+ err_tun_mapping:
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+       mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+-#endif
+       mlx5_tc_ct_clean(uplink_priv->ct_priv);
+       netdev_warn(priv->netdev,
+                   "Failed to initialize tc (eswitch), err: %d", err);
+@@ -5082,9 +5072,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
+       mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
+       mapping_destroy(uplink_priv->tunnel_mapping);
+ 
+-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+       mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+-#endif
+       mlx5_tc_ct_clean(uplink_priv->ct_priv);
+       mlx5e_tc_post_act_destroy(uplink_priv->post_act);
+ }
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-mlx5e-wrap-the-tx-reporter-dump-callback-to-extr.patch b/queue-5.15/net-mlx5e-wrap-the-tx-reporter-dump-callback-to-extr.patch

new file mode 100644 (file)

index 0000000..f6c3f09
--- /dev/null
+++ b/queue-5.15/net-mlx5e-wrap-the-tx-reporter-dump-callback-to-extr.patch
@@ -0,0 +1,92 @@
+From e09777ed4c1e36dadf6cd8ea95a2f4f1e4612f3e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 Nov 2021 16:05:44 +0200
+Subject: net/mlx5e: Wrap the tx reporter dump callback to extract the sq
+
+From: Amir Tzin <amirtz@nvidia.com>
+
+[ Upstream commit 918fc3855a6507a200e9cf22c20be852c0982687 ]
+
+Function mlx5e_tx_reporter_dump_sq() casts its void * argument to struct
+mlx5e_txqsq *, but in TX-timeout-recovery flow the argument is actually
+of type struct mlx5e_tx_timeout_ctx *.
+
+ mlx5_core 0000:08:00.1 enp8s0f1: TX timeout detected
+ mlx5_core 0000:08:00.1 enp8s0f1: TX timeout on queue: 1, SQ: 0x11ec, CQ: 0x146d, SQ Cons: 0x0 SQ Prod: 0x1, usecs since last trans: 21565000
+ BUG: stack guard page was hit at 0000000093f1a2de (stack is 00000000b66ea0dc..000000004d932dae)
+ kernel stack overflow (page fault): 0000 [#1] SMP NOPTI
+ CPU: 5 PID: 95 Comm: kworker/u20:1 Tainted: G W OE 5.13.0_mlnx #1
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core]
+ RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180
+ [mlx5_core]
+ Call Trace:
+ mlx5e_tx_reporter_dump+0x43/0x1c0 [mlx5_core]
+ devlink_health_do_dump.part.91+0x71/0xd0
+ devlink_health_report+0x157/0x1b0
+ mlx5e_reporter_tx_timeout+0xb9/0xf0 [mlx5_core]
+ ? mlx5e_tx_reporter_err_cqe_recover+0x1d0/0x1d0
+ [mlx5_core]
+ ? mlx5e_health_queue_dump+0xd0/0xd0 [mlx5_core]
+ ? update_load_avg+0x19b/0x550
+ ? set_next_entity+0x72/0x80
+ ? pick_next_task_fair+0x227/0x340
+ ? finish_task_switch+0xa2/0x280
+   mlx5e_tx_timeout_work+0x83/0xb0 [mlx5_core]
+   process_one_work+0x1de/0x3a0
+   worker_thread+0x2d/0x3c0
+ ? process_one_work+0x3a0/0x3a0
+   kthread+0x115/0x130
+ ? kthread_park+0x90/0x90
+   ret_from_fork+0x1f/0x30
+ --[ end trace 51ccabea504edaff ]---
+ RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180
+ PKRU: 55555554
+ Kernel panic - not syncing: Fatal exception
+ Kernel Offset: disabled
+ end Kernel panic - not syncing: Fatal exception
+
+To fix this bug add a wrapper for mlx5e_tx_reporter_dump_sq() which
+extracts the sq from struct mlx5e_tx_timeout_ctx and set it as the
+TX-timeout-recovery flow dump callback.
+
+Fixes: 5f29458b77d5 ("net/mlx5e: Support dump callback in TX reporter")
+Signed-off-by: Aya Levin <ayal@nvidia.com>
+Signed-off-by: Amir Tzin <amirtz@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en/reporter_tx.c   | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+index bb682fd751c98..8024599994642 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+@@ -463,6 +463,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms
+       return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ }
+ 
++static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
++                                        void *ctx)
++{
++      struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
++
++      return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
++}
++
+ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
+                                         struct devlink_fmsg *fmsg)
+ {
+@@ -558,7 +566,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
+       to_ctx.sq = sq;
+       err_ctx.ctx = &to_ctx;
+       err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+-      err_ctx.dump = mlx5e_tx_reporter_dump_sq;
++      err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
+       snprintf(err_str, sizeof(err_str),
+                "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
+                sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-ncsi-check-for-error-return-from-call-to-nla_put.patch b/queue-5.15/net-ncsi-check-for-error-return-from-call-to-nla_put.patch

new file mode 100644 (file)

index 0000000..947a7cd
--- /dev/null
+++ b/queue-5.15/net-ncsi-check-for-error-return-from-call-to-nla_put.patch
@@ -0,0 +1,45 @@
+From 02683fd674fedc945cc702a20935fabbb4240e2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Dec 2021 11:21:18 +0800
+Subject: net/ncsi: check for error return from call to nla_put_u32
+
+From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
+
+[ Upstream commit 92a34ab169f9eefe29cd420ce96b0a0a2a1da853 ]
+
+As we can see from the comment of the nla_put() that it could return
+-EMSGSIZE if the tailroom of the skb is insufficient.
+Therefore, it should be better to check the return value of the
+nla_put_u32 and return the error code if error accurs.
+Also, there are many other functions have the same problem, and if this
+patch is correct, I will commit a new version to fix all.
+
+Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family")
+Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
+Link: https://lore.kernel.org/r/20211229032118.1706294-1-jiasheng@iscas.ac.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ncsi/ncsi-netlink.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
+index bb5f1650f11cb..c189b4c8a1823 100644
+--- a/net/ncsi/ncsi-netlink.c
++++ b/net/ncsi/ncsi-netlink.c
+@@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb,
+               pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR);
+               if (!pnest)
+                       return -ENOMEM;
+-              nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
++              rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
++              if (rc) {
++                      nla_nest_cancel(skb, pnest);
++                      return rc;
++              }
+               if ((0x1 << np->id) == ndp->package_whitelist)
+                       nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
+               cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-phy-fixed_phy-fix-null-vs-is_err-checking-in-__f.patch b/queue-5.15/net-phy-fixed_phy-fix-null-vs-is_err-checking-in-__f.patch

new file mode 100644 (file)

index 0000000..0dab7ec
--- /dev/null
+++ b/queue-5.15/net-phy-fixed_phy-fix-null-vs-is_err-checking-in-__f.patch
@@ -0,0 +1,40 @@
+From 818c9aa9da0404a2c39402486f8369462b84d392 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Dec 2021 02:14:59 +0000
+Subject: net: phy: fixed_phy: Fix NULL vs IS_ERR() checking in
+ __fixed_phy_register
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit b45396afa4177f2b1ddfeff7185da733fade1dc3 ]
+
+The fixed_phy_get_gpiod function() returns NULL, it doesn't return error
+pointers, using NULL checking to fix this.i
+
+Fixes: 5468e82f7034 ("net: phy: fixed-phy: Drop GPIO from fixed_phy_add()")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Link: https://lore.kernel.org/r/20211224021500.10362-1-linmq006@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/fixed_phy.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
+index c65fb5f5d2dc5..a0c256bd54417 100644
+--- a/drivers/net/phy/fixed_phy.c
++++ b/drivers/net/phy/fixed_phy.c
+@@ -239,8 +239,8 @@ static struct phy_device *__fixed_phy_register(unsigned int irq,
+       /* Check if we have a GPIO associated with this fixed phy */
+       if (!gpiod) {
+               gpiod = fixed_phy_get_gpiod(np);
+-              if (IS_ERR(gpiod))
+-                      return ERR_CAST(gpiod);
++              if (!gpiod)
++                      return ERR_PTR(-EINVAL);
+       }
+ 
+       /* Get the next available PHY address, up to PHY_MAX_ADDR */
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-smc-don-t-send-cdc-llc-message-if-link-not-ready.patch b/queue-5.15/net-smc-don-t-send-cdc-llc-message-if-link-not-ready.patch

new file mode 100644 (file)

index 0000000..a3d5fc0
--- /dev/null
+++ b/queue-5.15/net-smc-don-t-send-cdc-llc-message-if-link-not-ready.patch
@@ -0,0 +1,120 @@
+From 56ab6da51734dd0dfa422124402931c737c42741 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Dec 2021 17:03:24 +0800
+Subject: net/smc: don't send CDC/LLC message if link not ready
+
+From: Dust Li <dust.li@linux.alibaba.com>
+
+[ Upstream commit 90cee52f2e780345d3629e278291aea5ac74f40f ]
+
+We found smc_llc_send_link_delete_all() sometimes wait
+for 2s timeout when testing with RDMA link up/down.
+It is possible when a smc_link is in ACTIVATING state,
+the underlaying QP is still in RESET or RTR state, which
+cannot send any messages out.
+
+smc_llc_send_link_delete_all() use smc_link_usable() to
+checks whether the link is usable, if the QP is still in
+RESET or RTR state, but the smc_link is in ACTIVATING, this
+LLC message will always fail without any CQE entering the
+CQ, and we will always wait 2s before timeout.
+
+Since we cannot send any messages through the QP before
+the QP enter RTS. I add a wrapper smc_link_sendable()
+which checks the state of QP along with the link state.
+And replace smc_link_usable() with smc_link_sendable()
+in all LLC & CDC message sending routine.
+
+Fixes: 5f08318f617b ("smc: connection data control (CDC)")
+Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/smc_core.c | 2 +-
+ net/smc/smc_core.h | 6 ++++++
+ net/smc/smc_llc.c  | 2 +-
+ net/smc/smc_wr.c   | 4 ++--
+ net/smc/smc_wr.h   | 2 +-
+ 5 files changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
+index 5a9c22ee75fa4..cb06568cf422f 100644
+--- a/net/smc/smc_core.c
++++ b/net/smc/smc_core.c
+@@ -604,7 +604,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+               struct smc_link *lnk = &lgr->lnk[i];
+ 
+-              if (smc_link_usable(lnk))
++              if (smc_link_sendable(lnk))
+                       lnk->state = SMC_LNK_INACTIVE;
+       }
+       wake_up_all(&lgr->llc_msg_waiter);
+diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
+index c043ecdca5c44..51a3e8248ade2 100644
+--- a/net/smc/smc_core.h
++++ b/net/smc/smc_core.h
+@@ -366,6 +366,12 @@ static inline bool smc_link_usable(struct smc_link *lnk)
+       return true;
+ }
+ 
++static inline bool smc_link_sendable(struct smc_link *lnk)
++{
++      return smc_link_usable(lnk) &&
++              lnk->qp_attr.cur_qp_state == IB_QPS_RTS;
++}
++
+ static inline bool smc_link_active(struct smc_link *lnk)
+ {
+       return lnk->state == SMC_LNK_ACTIVE;
+diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
+index f1d323439a2af..ee1f0fdba0855 100644
+--- a/net/smc/smc_llc.c
++++ b/net/smc/smc_llc.c
+@@ -1358,7 +1358,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
+       delllc.reason = htonl(rsn);
+ 
+       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+-              if (!smc_link_usable(&lgr->lnk[i]))
++              if (!smc_link_sendable(&lgr->lnk[i]))
+                       continue;
+               if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
+                       break;
+diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
+index c9cd7a4c5acfc..fcc1942001760 100644
+--- a/net/smc/smc_wr.c
++++ b/net/smc/smc_wr.c
+@@ -169,7 +169,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
+ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
+ {
+       *idx = link->wr_tx_cnt;
+-      if (!smc_link_usable(link))
++      if (!smc_link_sendable(link))
+               return -ENOLINK;
+       for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
+               if (!test_and_set_bit(*idx, link->wr_tx_mask))
+@@ -212,7 +212,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
+       } else {
+               rc = wait_event_interruptible_timeout(
+                       link->wr_tx_wait,
+-                      !smc_link_usable(link) ||
++                      !smc_link_sendable(link) ||
+                       lgr->terminating ||
+                       (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
+                       SMC_WR_TX_WAIT_FREE_SLOT_TIME);
+diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
+index 2bc626f230a56..102d515757ee2 100644
+--- a/net/smc/smc_wr.h
++++ b/net/smc/smc_wr.h
+@@ -62,7 +62,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
+ 
+ static inline bool smc_wr_tx_link_hold(struct smc_link *link)
+ {
+-      if (!smc_link_usable(link))
++      if (!smc_link_sendable(link))
+               return false;
+       atomic_inc(&link->wr_tx_refcnt);
+       return true;
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-smc-fix-kernel-panic-caused-by-race-of-smc_sock.patch b/queue-5.15/net-smc-fix-kernel-panic-caused-by-race-of-smc_sock.patch

new file mode 100644 (file)

index 0000000..6b3365e
--- /dev/null
+++ b/queue-5.15/net-smc-fix-kernel-panic-caused-by-race-of-smc_sock.patch
@@ -0,0 +1,437 @@
+From 29c300dc7e5e909bdd0d944d1524dd615de05c67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Dec 2021 17:03:25 +0800
+Subject: net/smc: fix kernel panic caused by race of smc_sock
+
+From: Dust Li <dust.li@linux.alibaba.com>
+
+[ Upstream commit 349d43127dac00c15231e8ffbcaabd70f7b0e544 ]
+
+A crash occurs when smc_cdc_tx_handler() tries to access smc_sock
+but smc_release() has already freed it.
+
+[ 4570.695099] BUG: unable to handle page fault for address: 000000002eae9e88
+[ 4570.696048] #PF: supervisor write access in kernel mode
+[ 4570.696728] #PF: error_code(0x0002) - not-present page
+[ 4570.697401] PGD 0 P4D 0
+[ 4570.697716] Oops: 0002 [#1] PREEMPT SMP NOPTI
+[ 4570.698228] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-rc4+ #111
+[ 4570.699013] Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 8c24b4c 04/0
+[ 4570.699933] RIP: 0010:_raw_spin_lock+0x1a/0x30
+<...>
+[ 4570.711446] Call Trace:
+[ 4570.711746]  <IRQ>
+[ 4570.711992]  smc_cdc_tx_handler+0x41/0xc0
+[ 4570.712470]  smc_wr_tx_tasklet_fn+0x213/0x560
+[ 4570.712981]  ? smc_cdc_tx_dismisser+0x10/0x10
+[ 4570.713489]  tasklet_action_common.isra.17+0x66/0x140
+[ 4570.714083]  __do_softirq+0x123/0x2f4
+[ 4570.714521]  irq_exit_rcu+0xc4/0xf0
+[ 4570.714934]  common_interrupt+0xba/0xe0
+
+Though smc_cdc_tx_handler() checked the existence of smc connection,
+smc_release() may have already dismissed and released the smc socket
+before smc_cdc_tx_handler() further visits it.
+
+smc_cdc_tx_handler()           |smc_release()
+if (!conn)                     |
+                               |
+                               |smc_cdc_tx_dismiss_slots()
+                               |      smc_cdc_tx_dismisser()
+                               |
+                               |sock_put(&smc->sk) <- last sock_put,
+                               |                      smc_sock freed
+bh_lock_sock(&smc->sk) (panic) |
+
+To make sure we won't receive any CDC messages after we free the
+smc_sock, add a refcount on the smc_connection for inflight CDC
+message(posted to the QP but haven't received related CQE), and
+don't release the smc_connection until all the inflight CDC messages
+haven been done, for both success or failed ones.
+
+Using refcount on CDC messages brings another problem: when the link
+is going to be destroyed, smcr_link_clear() will reset the QP, which
+then remove all the pending CQEs related to the QP in the CQ. To make
+sure all the CQEs will always come back so the refcount on the
+smc_connection can always reach 0, smc_ib_modify_qp_reset() was replaced
+by smc_ib_modify_qp_error().
+And remove the timeout in smc_wr_tx_wait_no_pending_sends() since we
+need to wait for all pending WQEs done, or we may encounter use-after-
+free when handling CQEs.
+
+For IB device removal routine, we need to wait for all the QPs on that
+device been destroyed before we can destroy CQs on the device, or
+the refcount on smc_connection won't reach 0 and smc_sock cannot be
+released.
+
+Fixes: 5f08318f617b ("smc: connection data control (CDC)")
+Reported-by: Wen Gu <guwen@linux.alibaba.com>
+Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/smc.h      |  5 +++++
+ net/smc/smc_cdc.c  | 52 +++++++++++++++++++++-------------------------
+ net/smc/smc_cdc.h  |  2 +-
+ net/smc/smc_core.c | 25 +++++++++++++++++-----
+ net/smc/smc_ib.c   |  4 ++--
+ net/smc/smc_ib.h   |  1 +
+ net/smc/smc_wr.c   | 41 +++---------------------------------
+ net/smc/smc_wr.h   |  3 +--
+ 8 files changed, 57 insertions(+), 76 deletions(-)
+
+diff --git a/net/smc/smc.h b/net/smc/smc.h
+index d65e15f0c944c..e6919fe31617b 100644
+--- a/net/smc/smc.h
++++ b/net/smc/smc.h
+@@ -170,6 +170,11 @@ struct smc_connection {
+       u16                     tx_cdc_seq;     /* sequence # for CDC send */
+       u16                     tx_cdc_seq_fin; /* sequence # - tx completed */
+       spinlock_t              send_lock;      /* protect wr_sends */
++      atomic_t                cdc_pend_tx_wr; /* number of pending tx CDC wqe
++                                               * - inc when post wqe,
++                                               * - dec on polled tx cqe
++                                               */
++      wait_queue_head_t       cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
+       struct delayed_work     tx_work;        /* retry of smc_cdc_msg_send */
+       u32                     tx_off;         /* base offset in peer rmb */
+ 
+diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
+index 99acd337ba90d..84c8a4374fddd 100644
+--- a/net/smc/smc_cdc.c
++++ b/net/smc/smc_cdc.c
+@@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
+       struct smc_sock *smc;
+       int diff;
+ 
+-      if (!conn)
+-              /* already dismissed */
+-              return;
+-
+       smc = container_of(conn, struct smc_sock, conn);
+       bh_lock_sock(&smc->sk);
+       if (!wc_status) {
+@@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
+                             conn);
+               conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
+       }
++
++      if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) &&
++          unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
++              wake_up(&conn->cdc_pend_tx_wq);
++      WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0);
++
+       smc_tx_sndbuf_nonfull(smc);
+       bh_unlock_sock(&smc->sk);
+ }
+@@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
+       conn->tx_cdc_seq++;
+       conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
+       smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed);
++
++      atomic_inc(&conn->cdc_pend_tx_wr);
++      smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */
++
+       rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
+       if (!rc) {
+               smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
+@@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
+       } else {
+               conn->tx_cdc_seq--;
+               conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
++              atomic_dec(&conn->cdc_pend_tx_wr);
+       }
+ 
+       return rc;
+@@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn,
+       peer->token = htonl(local->token);
+       peer->prod_flags.failover_validation = 1;
+ 
++      /* We need to set pend->conn here to make sure smc_cdc_tx_handler()
++       * can handle properly
++       */
++      smc_cdc_add_pending_send(conn, pend);
++
++      atomic_inc(&conn->cdc_pend_tx_wr);
++      smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */
++
+       rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
++      if (unlikely(rc))
++              atomic_dec(&conn->cdc_pend_tx_wr);
++
+       return rc;
+ }
+ 
+@@ -193,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
+       return rc;
+ }
+ 
+-static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
+-                            unsigned long data)
++void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn)
+ {
+-      struct smc_connection *conn = (struct smc_connection *)data;
+-      struct smc_cdc_tx_pend *cdc_pend =
+-              (struct smc_cdc_tx_pend *)tx_pend;
+-
+-      return cdc_pend->conn == conn;
+-}
+-
+-static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
+-{
+-      struct smc_cdc_tx_pend *cdc_pend =
+-              (struct smc_cdc_tx_pend *)tx_pend;
+-
+-      cdc_pend->conn = NULL;
+-}
+-
+-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
+-{
+-      struct smc_link *link = conn->lnk;
+-
+-      smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
+-                              smc_cdc_tx_filter, smc_cdc_tx_dismisser,
+-                              (unsigned long)conn);
++      wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr));
+ }
+ 
+ /* Send a SMC-D CDC header.
+diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
+index 0a0a89abd38b2..696cc11f2303b 100644
+--- a/net/smc/smc_cdc.h
++++ b/net/smc/smc_cdc.h
+@@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
+                         struct smc_wr_buf **wr_buf,
+                         struct smc_rdma_wr **wr_rdma_buf,
+                         struct smc_cdc_tx_pend **pend);
+-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
++void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn);
+ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
+                    struct smc_cdc_tx_pend *pend);
+ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
+diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
+index cb06568cf422f..506b8498623b0 100644
+--- a/net/smc/smc_core.c
++++ b/net/smc/smc_core.c
+@@ -1056,7 +1056,7 @@ void smc_conn_free(struct smc_connection *conn)
+                       smc_ism_unset_conn(conn);
+               tasklet_kill(&conn->rx_tsklet);
+       } else {
+-              smc_cdc_tx_dismiss_slots(conn);
++              smc_cdc_wait_pend_tx_wr(conn);
+               if (current_work() != &conn->abort_work)
+                       cancel_work_sync(&conn->abort_work);
+       }
+@@ -1133,7 +1133,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
+       smc_llc_link_clear(lnk, log);
+       smcr_buf_unmap_lgr(lnk);
+       smcr_rtoken_clear_link(lnk);
+-      smc_ib_modify_qp_reset(lnk);
++      smc_ib_modify_qp_error(lnk);
+       smc_wr_free_link(lnk);
+       smc_ib_destroy_queue_pair(lnk);
+       smc_ib_dealloc_protection_domain(lnk);
+@@ -1264,7 +1264,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
+               else
+                       tasklet_unlock_wait(&conn->rx_tsklet);
+       } else {
+-              smc_cdc_tx_dismiss_slots(conn);
++              smc_cdc_wait_pend_tx_wr(conn);
+       }
+       smc_lgr_unregister_conn(conn);
+       smc_close_active_abort(smc);
+@@ -1387,11 +1387,16 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd)
+ /* Called when an SMCR device is removed or the smc module is unloaded.
+  * If smcibdev is given, all SMCR link groups using this device are terminated.
+  * If smcibdev is NULL, all SMCR link groups are terminated.
++ *
++ * We must wait here for QPs been destroyed before we destroy the CQs,
++ * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
++ * smc_sock cannot be released.
+  */
+ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
+ {
+       struct smc_link_group *lgr, *lg;
+       LIST_HEAD(lgr_free_list);
++      LIST_HEAD(lgr_linkdown_list);
+       int i;
+ 
+       spin_lock_bh(&smc_lgr_list.lock);
+@@ -1403,7 +1408,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
+               list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
+                       for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+                               if (lgr->lnk[i].smcibdev == smcibdev)
+-                                      smcr_link_down_cond_sched(&lgr->lnk[i]);
++                                      list_move_tail(&lgr->list, &lgr_linkdown_list);
+                       }
+               }
+       }
+@@ -1415,6 +1420,16 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
+               __smc_lgr_terminate(lgr, false);
+       }
+ 
++      list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
++              for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
++                      if (lgr->lnk[i].smcibdev == smcibdev) {
++                              mutex_lock(&lgr->llc_conf_mutex);
++                              smcr_link_down_cond(&lgr->lnk[i]);
++                              mutex_unlock(&lgr->llc_conf_mutex);
++                      }
++              }
++      }
++
+       if (smcibdev) {
+               if (atomic_read(&smcibdev->lnk_cnt))
+                       wait_event(smcibdev->lnks_deleted,
+@@ -1514,7 +1529,6 @@ static void smcr_link_down(struct smc_link *lnk)
+       if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
+               return;
+ 
+-      smc_ib_modify_qp_reset(lnk);
+       to_lnk = smc_switch_conns(lgr, lnk, true);
+       if (!to_lnk) { /* no backup link available */
+               smcr_link_clear(lnk, true);
+@@ -1742,6 +1756,7 @@ create:
+       conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
+       conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
+       conn->urg_state = SMC_URG_READ;
++      init_waitqueue_head(&conn->cdc_pend_tx_wq);
+       INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
+       if (ini->is_smcd) {
+               conn->rx_off = sizeof(struct smcd_cdc_msg);
+diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
+index a8845343d183e..f0ec1f1d50fac 100644
+--- a/net/smc/smc_ib.c
++++ b/net/smc/smc_ib.c
+@@ -101,12 +101,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk)
+                           IB_QP_MAX_QP_RD_ATOMIC);
+ }
+ 
+-int smc_ib_modify_qp_reset(struct smc_link *lnk)
++int smc_ib_modify_qp_error(struct smc_link *lnk)
+ {
+       struct ib_qp_attr qp_attr;
+ 
+       memset(&qp_attr, 0, sizeof(qp_attr));
+-      qp_attr.qp_state = IB_QPS_RESET;
++      qp_attr.qp_state = IB_QPS_ERR;
+       return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE);
+ }
+ 
+diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
+index 3085f5180da79..6967c3d52b03e 100644
+--- a/net/smc/smc_ib.h
++++ b/net/smc/smc_ib.h
+@@ -79,6 +79,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk);
+ int smc_ib_ready_link(struct smc_link *lnk);
+ int smc_ib_modify_qp_rts(struct smc_link *lnk);
+ int smc_ib_modify_qp_reset(struct smc_link *lnk);
++int smc_ib_modify_qp_error(struct smc_link *lnk);
+ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
+ int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+                            struct smc_buf_desc *buf_slot, u8 link_idx);
+diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
+index fcc1942001760..59ca1a2d5c650 100644
+--- a/net/smc/smc_wr.c
++++ b/net/smc/smc_wr.c
+@@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link)
+ }
+ 
+ /* wait till all pending tx work requests on the given link are completed */
+-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
++void smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
+ {
+-      if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link),
+-                             SMC_WR_TX_WAIT_PENDING_TIME))
+-              return 0;
+-      else /* timeout */
+-              return -EPIPE;
++      wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link));
+ }
+ 
+ static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
+@@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
+       struct smc_wr_tx_pend pnd_snd;
+       struct smc_link *link;
+       u32 pnd_snd_idx;
+-      int i;
+ 
+       link = wc->qp->qp_context;
+ 
+@@ -115,14 +110,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
+       if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
+               return;
+       if (wc->status) {
+-              for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
+-                      /* clear full struct smc_wr_tx_pend including .priv */
+-                      memset(&link->wr_tx_pends[i], 0,
+-                             sizeof(link->wr_tx_pends[i]));
+-                      memset(&link->wr_tx_bufs[i], 0,
+-                             sizeof(link->wr_tx_bufs[i]));
+-                      clear_bit(i, link->wr_tx_mask);
+-              }
+               /* terminate link */
+               smcr_link_down_cond_sched(link);
+       }
+@@ -351,25 +338,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
+       return rc;
+ }
+ 
+-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
+-                           smc_wr_tx_filter filter,
+-                           smc_wr_tx_dismisser dismisser,
+-                           unsigned long data)
+-{
+-      struct smc_wr_tx_pend_priv *tx_pend;
+-      struct smc_wr_rx_hdr *wr_tx;
+-      int i;
+-
+-      for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
+-              wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
+-              if (wr_tx->type != wr_tx_hdr_type)
+-                      continue;
+-              tx_pend = &link->wr_tx_pends[i].priv;
+-              if (filter(tx_pend, data))
+-                      dismisser(tx_pend);
+-      }
+-}
+-
+ /****************************** receive queue ********************************/
+ 
+ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
+@@ -574,10 +542,7 @@ void smc_wr_free_link(struct smc_link *lnk)
+       smc_wr_wakeup_reg_wait(lnk);
+       smc_wr_wakeup_tx_wait(lnk);
+ 
+-      if (smc_wr_tx_wait_no_pending_sends(lnk))
+-              memset(lnk->wr_tx_mask, 0,
+-                     BITS_TO_LONGS(SMC_WR_BUF_CNT) *
+-                                              sizeof(*lnk->wr_tx_mask));
++      smc_wr_tx_wait_no_pending_sends(lnk);
+       wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
+       wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
+ 
+diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
+index 102d515757ee2..cb58e60078f57 100644
+--- a/net/smc/smc_wr.h
++++ b/net/smc/smc_wr.h
+@@ -22,7 +22,6 @@
+ #define SMC_WR_BUF_CNT 16     /* # of ctrl buffers per link */
+ 
+ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ)
+-#define SMC_WR_TX_WAIT_PENDING_TIME   (5 * HZ)
+ 
+ #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */
+ 
+@@ -122,7 +121,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
+                            smc_wr_tx_filter filter,
+                            smc_wr_tx_dismisser dismisser,
+                            unsigned long data);
+-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
++void smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
+ 
+ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
+ int smc_wr_rx_post_init(struct smc_link *link);
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-smc-fix-using-of-uninitialized-completions.patch b/queue-5.15/net-smc-fix-using-of-uninitialized-completions.patch

new file mode 100644 (file)

index 0000000..024b406
--- /dev/null
+++ b/queue-5.15/net-smc-fix-using-of-uninitialized-completions.patch
@@ -0,0 +1,54 @@
+From b65035dd9568a56faab05b6267f5ca21aba336e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Dec 2021 14:35:30 +0100
+Subject: net/smc: fix using of uninitialized completions
+
+From: Karsten Graul <kgraul@linux.ibm.com>
+
+[ Upstream commit 6d7373dabfd3933ee30c40fc8c09d2a788f6ece1 ]
+
+In smc_wr_tx_send_wait() the completion on index specified by
+pend->idx is initialized and after smc_wr_tx_send() was called the wait
+for completion starts. pend->idx is used to get the correct index for
+the wait, but the pend structure could already be cleared in
+smc_wr_tx_process_cqe().
+Introduce pnd_idx to hold and use a local copy of the correct index.
+
+Fixes: 09c61d24f96d ("net/smc: wait for departure of an IB message")
+Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/smc_wr.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
+index a419e9af36b98..c9cd7a4c5acfc 100644
+--- a/net/smc/smc_wr.c
++++ b/net/smc/smc_wr.c
+@@ -288,18 +288,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+                       unsigned long timeout)
+ {
+       struct smc_wr_tx_pend *pend;
++      u32 pnd_idx;
+       int rc;
+ 
+       pend = container_of(priv, struct smc_wr_tx_pend, priv);
+       pend->compl_requested = 1;
+-      init_completion(&link->wr_tx_compl[pend->idx]);
++      pnd_idx = pend->idx;
++      init_completion(&link->wr_tx_compl[pnd_idx]);
+ 
+       rc = smc_wr_tx_send(link, priv);
+       if (rc)
+               return rc;
+       /* wait for completion by smc_wr_tx_process_cqe() */
+       rc = wait_for_completion_interruptible_timeout(
+-                                      &link->wr_tx_compl[pend->idx], timeout);
++                                      &link->wr_tx_compl[pnd_idx], timeout);
+       if (rc <= 0)
+               rc = -ENODATA;
+       if (rc > 0)
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-usb-pegasus-do-not-drop-long-ethernet-frames.patch b/queue-5.15/net-usb-pegasus-do-not-drop-long-ethernet-frames.patch

new file mode 100644 (file)

index 0000000..1cdb85f
--- /dev/null
+++ b/queue-5.15/net-usb-pegasus-do-not-drop-long-ethernet-frames.patch
@@ -0,0 +1,63 @@
+From 91eb5db8ed2352072ed153fbb42555333251d059 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 26 Dec 2021 23:12:08 +0100
+Subject: net: usb: pegasus: Do not drop long Ethernet frames
+
+From: Matthias-Christian Ott <ott@mirix.org>
+
+[ Upstream commit ca506fca461b260ab32952b610c3d4aadc6c11fd ]
+
+The D-Link DSB-650TX (2001:4002) is unable to receive Ethernet frames
+that are longer than 1518 octets, for example, Ethernet frames that
+contain 802.1Q VLAN tags.
+
+The frames are sent to the pegasus driver via USB but the driver
+discards them because they have the Long_pkt field set to 1 in the
+received status report. The function read_bulk_callback of the pegasus
+driver treats such received "packets" (in the terminology of the
+hardware) as errors but the field simply does just indicate that the
+Ethernet frame (MAC destination to FCS) is longer than 1518 octets.
+
+It seems that in the 1990s there was a distinction between
+"giant" (> 1518) and "runt" (< 64) frames and the hardware includes
+flags to indicate this distinction. It seems that the purpose of the
+distinction "giant" frames was to not allow infinitely long frames due
+to transmission errors and to allow hardware to have an upper limit of
+the frame size. However, the hardware already has such limit with its
+2048 octet receive buffer and, therefore, Long_pkt is merely a
+convention and should not be treated as a receive error.
+
+Actually, the hardware is even able to receive Ethernet frames with 2048
+octets which exceeds the claimed limit frame size limit of the driver of
+1536 octets (PEGASUS_MTU).
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Matthias-Christian Ott <ott@mirix.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/pegasus.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
+index 6a92a3fef75e5..cd063f45785b7 100644
+--- a/drivers/net/usb/pegasus.c
++++ b/drivers/net/usb/pegasus.c
+@@ -493,11 +493,11 @@ static void read_bulk_callback(struct urb *urb)
+               goto goon;
+ 
+       rx_status = buf[count - 2];
+-      if (rx_status & 0x1e) {
++      if (rx_status & 0x1c) {
+               netif_dbg(pegasus, rx_err, net,
+                         "RX packet error %x\n", rx_status);
+               net->stats.rx_errors++;
+-              if (rx_status & 0x06)   /* long or runt */
++              if (rx_status & 0x04)   /* runt */
+                       net->stats.rx_length_errors++;
+               if (rx_status & 0x08)
+                       net->stats.rx_crc_errors++;
+-- 
+2.34.1
+
diff --git a/queue-5.15/nfc-st21nfca-fix-memory-leak-in-device-probe-and-rem.patch b/queue-5.15/nfc-st21nfca-fix-memory-leak-in-device-probe-and-rem.patch

new file mode 100644 (file)

index 0000000..23ada3f
--- /dev/null
+++ b/queue-5.15/nfc-st21nfca-fix-memory-leak-in-device-probe-and-rem.patch
@@ -0,0 +1,101 @@
+From 1fd82e11c90e79735e09f7f6d2b43f94cf1fb08f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Dec 2021 12:48:11 +0000
+Subject: NFC: st21nfca: Fix memory leak in device probe and remove
+
+From: Wei Yongjun <weiyongjun1@huawei.com>
+
+[ Upstream commit 1b9dadba502234eea7244879b8d5d126bfaf9f0c ]
+
+'phy->pending_skb' is alloced when device probe, but forgot to free
+in the error handling path and remove path, this cause memory leak
+as follows:
+
+unreferenced object 0xffff88800bc06800 (size 512):
+  comm "8", pid 11775, jiffies 4295159829 (age 9.032s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<00000000d66c09ce>] __kmalloc_node_track_caller+0x1ed/0x450
+    [<00000000c93382b3>] kmalloc_reserve+0x37/0xd0
+    [<000000005fea522c>] __alloc_skb+0x124/0x380
+    [<0000000019f29f9a>] st21nfca_hci_i2c_probe+0x170/0x8f2
+
+Fix it by freeing 'pending_skb' in error and remove.
+
+Fixes: 68957303f44a ("NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nfc/st21nfca/i2c.c | 29 ++++++++++++++++++++---------
+ 1 file changed, 20 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
+index 279d88128b2e4..d56bc24709b5c 100644
+--- a/drivers/nfc/st21nfca/i2c.c
++++ b/drivers/nfc/st21nfca/i2c.c
+@@ -528,7 +528,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
+       phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
+       if (IS_ERR(phy->gpiod_ena)) {
+               nfc_err(dev, "Unable to get ENABLE GPIO\n");
+-              return PTR_ERR(phy->gpiod_ena);
++              r = PTR_ERR(phy->gpiod_ena);
++              goto out_free;
+       }
+ 
+       phy->se_status.is_ese_present =
+@@ -539,7 +540,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
+       r = st21nfca_hci_platform_init(phy);
+       if (r < 0) {
+               nfc_err(&client->dev, "Unable to reboot st21nfca\n");
+-              return r;
++              goto out_free;
+       }
+ 
+       r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+@@ -548,15 +549,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
+                               ST21NFCA_HCI_DRIVER_NAME, phy);
+       if (r < 0) {
+               nfc_err(&client->dev, "Unable to register IRQ handler\n");
+-              return r;
++              goto out_free;
+       }
+ 
+-      return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
+-                                      ST21NFCA_FRAME_HEADROOM,
+-                                      ST21NFCA_FRAME_TAILROOM,
+-                                      ST21NFCA_HCI_LLC_MAX_PAYLOAD,
+-                                      &phy->hdev,
+-                                      &phy->se_status);
++      r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
++                             ST21NFCA_FRAME_HEADROOM,
++                             ST21NFCA_FRAME_TAILROOM,
++                             ST21NFCA_HCI_LLC_MAX_PAYLOAD,
++                             &phy->hdev,
++                             &phy->se_status);
++      if (r)
++              goto out_free;
++
++      return 0;
++
++out_free:
++      kfree_skb(phy->pending_skb);
++      return r;
+ }
+ 
+ static int st21nfca_hci_i2c_remove(struct i2c_client *client)
+@@ -567,6 +576,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client)
+ 
+       if (phy->powered)
+               st21nfca_hci_i2c_disable(phy);
++      if (phy->pending_skb)
++              kfree_skb(phy->pending_skb);
+ 
+       return 0;
+ }
+-- 
+2.34.1
+
diff --git a/queue-5.15/scsi-lpfc-terminate-string-in-lpfc_debugfs_nvmeio_tr.patch b/queue-5.15/scsi-lpfc-terminate-string-in-lpfc_debugfs_nvmeio_tr.patch

new file mode 100644 (file)

index 0000000..f831b50
--- /dev/null
+++ b/queue-5.15/scsi-lpfc-terminate-string-in-lpfc_debugfs_nvmeio_tr.patch
@@ -0,0 +1,40 @@
+From 822bc7568b9406dd083542000b8ccf0bbd7f0b66 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Dec 2021 10:05:27 +0300
+Subject: scsi: lpfc: Terminate string in lpfc_debugfs_nvmeio_trc_write()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 9020be114a47bf7ff33e179b3bb0016b91a098e6 ]
+
+The "mybuf" string comes from the user, so we need to ensure that it is NUL
+terminated.
+
+Link: https://lore.kernel.org/r/20211214070527.GA27934@kili
+Fixes: bd2cdd5e400f ("scsi: lpfc: NVME Initiator: Add debugfs support")
+Reviewed-by: James Smart <jsmart2021@gmail.com>
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/lpfc/lpfc_debugfs.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
+index bd6d459afce54..08b2e85dcd7d8 100644
+--- a/drivers/scsi/lpfc/lpfc_debugfs.c
++++ b/drivers/scsi/lpfc/lpfc_debugfs.c
+@@ -2954,8 +2954,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf,
+       char mybuf[64];
+       char *pbuf;
+ 
+-      if (nbytes > 64)
+-              nbytes = 64;
++      if (nbytes > 63)
++              nbytes = 63;
+ 
+       memset(mybuf, 0, sizeof(mybuf));
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/sctp-use-call_rcu-to-free-endpoint.patch b/queue-5.15/sctp-use-call_rcu-to-free-endpoint.patch

new file mode 100644 (file)

index 0000000..d9845e2
--- /dev/null
+++ b/queue-5.15/sctp-use-call_rcu-to-free-endpoint.patch
@@ -0,0 +1,277 @@
+From 49d75b97300b0d80b0e3b05dc4b2ea4fa86fb965 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Dec 2021 13:04:30 -0500
+Subject: sctp: use call_rcu to free endpoint
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 5ec7d18d1813a5bead0b495045606c93873aecbb ]
+
+This patch is to delay the endpoint free by calling call_rcu() to fix
+another use-after-free issue in sctp_sock_dump():
+
+  BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20
+  Call Trace:
+    __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218
+    lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844
+    __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline]
+    _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168
+    spin_lock_bh include/linux/spinlock.h:334 [inline]
+    __lock_sock+0x203/0x350 net/core/sock.c:2253
+    lock_sock_nested+0xfe/0x120 net/core/sock.c:2774
+    lock_sock include/net/sock.h:1492 [inline]
+    sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324
+    sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091
+    sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527
+    __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049
+    inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065
+    netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244
+    __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352
+    netlink_dump_start include/linux/netlink.h:216 [inline]
+    inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170
+    __sock_diag_cmd net/core/sock_diag.c:232 [inline]
+    sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263
+    netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477
+    sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274
+
+This issue occurs when asoc is peeled off and the old sk is freed after
+getting it by asoc->base.sk and before calling lock_sock(sk).
+
+To prevent the sk free, as a holder of the sk, ep should be alive when
+calling lock_sock(). This patch uses call_rcu() and moves sock_put and
+ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to
+hold the ep under rcu_read_lock in sctp_transport_traverse_process().
+
+If sctp_endpoint_hold() returns true, it means this ep is still alive
+and we have held it and can continue to dump it; If it returns false,
+it means this ep is dead and can be freed after rcu_read_unlock, and
+we should skip it.
+
+In sctp_sock_dump(), after locking the sk, if this ep is different from
+tsp->asoc->ep, it means during this dumping, this asoc was peeled off
+before calling lock_sock(), and the sk should be skipped; If this ep is
+the same with tsp->asoc->ep, it means no peeloff happens on this asoc,
+and due to lock_sock, no peeloff will happen either until release_sock.
+
+Note that delaying endpoint free won't delay the port release, as the
+port release happens in sctp_endpoint_destroy() before calling call_rcu().
+Also, freeing endpoint by call_rcu() makes it safe to access the sk by
+asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv().
+
+Thanks Jones to bring this issue up.
+
+v1->v2:
+  - improve the changelog.
+  - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed.
+
+Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com
+Reported-by: Lee Jones <lee.jones@linaro.org>
+Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sctp/sctp.h    |  6 +++---
+ include/net/sctp/structs.h |  3 ++-
+ net/sctp/diag.c            | 12 ++++++------
+ net/sctp/endpointola.c     | 23 +++++++++++++++--------
+ net/sctp/socket.c          | 23 +++++++++++++++--------
+ 5 files changed, 41 insertions(+), 26 deletions(-)
+
+diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
+index 189fdb9db1622..d314a180ab93d 100644
+--- a/include/net/sctp/sctp.h
++++ b/include/net/sctp/sctp.h
+@@ -105,6 +105,7 @@ extern struct percpu_counter sctp_sockets_allocated;
+ int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
+ struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
+ 
++typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *);
+ void sctp_transport_walk_start(struct rhashtable_iter *iter);
+ void sctp_transport_walk_stop(struct rhashtable_iter *iter);
+ struct sctp_transport *sctp_transport_get_next(struct net *net,
+@@ -115,9 +116,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
+                                 struct net *net,
+                                 const union sctp_addr *laddr,
+                                 const union sctp_addr *paddr, void *p);
+-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
+-                          int (*cb_done)(struct sctp_transport *, void *),
+-                          struct net *net, int *pos, void *p);
++int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
++                                  struct net *net, int *pos, void *p);
+ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
+ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
+                      struct sctp_info *info);
+diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
+index 651bba654d77d..8d2c3dd9f5953 100644
+--- a/include/net/sctp/structs.h
++++ b/include/net/sctp/structs.h
+@@ -1365,6 +1365,7 @@ struct sctp_endpoint {
+ 
+       u32 secid;
+       u32 peer_secid;
++      struct rcu_head rcu;
+ };
+ 
+ /* Recover the outter endpoint structure. */
+@@ -1380,7 +1381,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
+ struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t);
+ void sctp_endpoint_free(struct sctp_endpoint *);
+ void sctp_endpoint_put(struct sctp_endpoint *);
+-void sctp_endpoint_hold(struct sctp_endpoint *);
++int sctp_endpoint_hold(struct sctp_endpoint *ep);
+ void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *);
+ struct sctp_association *sctp_endpoint_lookup_assoc(
+       const struct sctp_endpoint *ep,
+diff --git a/net/sctp/diag.c b/net/sctp/diag.c
+index 760b367644c12..a7d6231715013 100644
+--- a/net/sctp/diag.c
++++ b/net/sctp/diag.c
+@@ -290,9 +290,8 @@ out:
+       return err;
+ }
+ 
+-static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
++static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
+ {
+-      struct sctp_endpoint *ep = tsp->asoc->ep;
+       struct sctp_comm_param *commp = p;
+       struct sock *sk = ep->base.sk;
+       struct sk_buff *skb = commp->skb;
+@@ -302,6 +301,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
+       int err = 0;
+ 
+       lock_sock(sk);
++      if (ep != tsp->asoc->ep)
++              goto release;
+       list_for_each_entry(assoc, &ep->asocs, asocs) {
+               if (cb->args[4] < cb->args[1])
+                       goto next;
+@@ -344,9 +345,8 @@ release:
+       return err;
+ }
+ 
+-static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
++static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
+ {
+-      struct sctp_endpoint *ep = tsp->asoc->ep;
+       struct sctp_comm_param *commp = p;
+       struct sock *sk = ep->base.sk;
+       const struct inet_diag_req_v2 *r = commp->r;
+@@ -505,8 +505,8 @@ skip:
+       if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
+               goto done;
+ 
+-      sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
+-                              net, &pos, &commp);
++      sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump,
++                                      net, &pos, &commp);
+       cb->args[2] = pos;
+ 
+ done:
+diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
+index 48c9c2c7602f7..efffde7f2328e 100644
+--- a/net/sctp/endpointola.c
++++ b/net/sctp/endpointola.c
+@@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
+ }
+ 
+ /* Final destructor for endpoint.  */
++static void sctp_endpoint_destroy_rcu(struct rcu_head *head)
++{
++      struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu);
++      struct sock *sk = ep->base.sk;
++
++      sctp_sk(sk)->ep = NULL;
++      sock_put(sk);
++
++      kfree(ep);
++      SCTP_DBG_OBJCNT_DEC(ep);
++}
++
+ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
+ {
+       struct sock *sk;
+@@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
+       if (sctp_sk(sk)->bind_hash)
+               sctp_put_port(sk);
+ 
+-      sctp_sk(sk)->ep = NULL;
+-      /* Give up our hold on the sock */
+-      sock_put(sk);
+-
+-      kfree(ep);
+-      SCTP_DBG_OBJCNT_DEC(ep);
++      call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu);
+ }
+ 
+ /* Hold a reference to an endpoint. */
+-void sctp_endpoint_hold(struct sctp_endpoint *ep)
++int sctp_endpoint_hold(struct sctp_endpoint *ep)
+ {
+-      refcount_inc(&ep->base.refcnt);
++      return refcount_inc_not_zero(&ep->base.refcnt);
+ }
+ 
+ /* Release a reference to an endpoint and clean up if there are
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index 6b937bfd47515..d2215d24634e8 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -5338,11 +5338,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
+ }
+ EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
+ 
+-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
+-                          int (*cb_done)(struct sctp_transport *, void *),
+-                          struct net *net, int *pos, void *p) {
++int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
++                                  struct net *net, int *pos, void *p)
++{
+       struct rhashtable_iter hti;
+       struct sctp_transport *tsp;
++      struct sctp_endpoint *ep;
+       int ret;
+ 
+ again:
+@@ -5351,26 +5352,32 @@ again:
+ 
+       tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
+       for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
+-              ret = cb(tsp, p);
+-              if (ret)
+-                      break;
++              ep = tsp->asoc->ep;
++              if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */
++                      ret = cb(ep, tsp, p);
++                      if (ret)
++                              break;
++                      sctp_endpoint_put(ep);
++              }
+               (*pos)++;
+               sctp_transport_put(tsp);
+       }
+       sctp_transport_walk_stop(&hti);
+ 
+       if (ret) {
+-              if (cb_done && !cb_done(tsp, p)) {
++              if (cb_done && !cb_done(ep, tsp, p)) {
+                       (*pos)++;
++                      sctp_endpoint_put(ep);
+                       sctp_transport_put(tsp);
+                       goto again;
+               }
++              sctp_endpoint_put(ep);
+               sctp_transport_put(tsp);
+       }
+ 
+       return ret;
+ }
+-EXPORT_SYMBOL_GPL(sctp_for_each_transport);
++EXPORT_SYMBOL_GPL(sctp_transport_traverse_process);
+ 
+ /* 7.2.1 Association Status (SCTP_STATUS)
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/selftests-calculate-udpgso-segment-count-without-hea.patch b/queue-5.15/selftests-calculate-udpgso-segment-count-without-hea.patch

new file mode 100644 (file)

index 0000000..f63edcc
--- /dev/null
+++ b/queue-5.15/selftests-calculate-udpgso-segment-count-without-hea.patch
@@ -0,0 +1,70 @@
+From ef2ac44b8f3185802b7a3941e5e9c1a6111f6fa8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Dec 2021 22:24:41 +0000
+Subject: selftests: Calculate udpgso segment count without header adjustment
+
+From: Coco Li <lixiaoyan@google.com>
+
+[ Upstream commit 5471d5226c3b39b3d2f7011c082d5715795bd65c ]
+
+The below referenced commit correctly updated the computation of number
+of segments (gso_size) by using only the gso payload size and
+removing the header lengths.
+
+With this change the regression test started failing. Update
+the tests to match this new behavior.
+
+Both IPv4 and IPv6 tests are updated, as a separate patch in this series
+will update udp_v6_send_skb to match this change in udp_send_skb.
+
+Fixes: 158390e45612 ("udp: using datalen to cap max gso segments")
+Signed-off-by: Coco Li <lixiaoyan@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://lore.kernel.org/r/20211223222441.2975883-2-lixiaoyan@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/udpgso.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
+index c66da6ffd6d8d..7badaf215de28 100644
+--- a/tools/testing/selftests/net/udpgso.c
++++ b/tools/testing/selftests/net/udpgso.c
+@@ -156,13 +156,13 @@ struct testcase testcases_v4[] = {
+       },
+       {
+               /* send max number of min sized segments */
+-              .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
++              .tlen = UDP_MAX_SEGMENTS,
+               .gso_len = 1,
+-              .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
++              .r_num_mss = UDP_MAX_SEGMENTS,
+       },
+       {
+               /* send max number + 1 of min sized segments: fail */
+-              .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
++              .tlen = UDP_MAX_SEGMENTS + 1,
+               .gso_len = 1,
+               .tfail = true,
+       },
+@@ -259,13 +259,13 @@ struct testcase testcases_v6[] = {
+       },
+       {
+               /* send max number of min sized segments */
+-              .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
++              .tlen = UDP_MAX_SEGMENTS,
+               .gso_len = 1,
+-              .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
++              .r_num_mss = UDP_MAX_SEGMENTS,
+       },
+       {
+               /* send max number + 1 of min sized segments: fail */
+-              .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
++              .tlen = UDP_MAX_SEGMENTS + 1,
+               .gso_len = 1,
+               .tfail = true,
+       },
+-- 
+2.34.1
+
diff --git a/queue-5.15/selftests-net-fix-a-typo-in-udpgro_fwd.sh.patch b/queue-5.15/selftests-net-fix-a-typo-in-udpgro_fwd.sh.patch

new file mode 100644 (file)

index 0000000..c31b492
--- /dev/null
+++ b/queue-5.15/selftests-net-fix-a-typo-in-udpgro_fwd.sh.patch
@@ -0,0 +1,36 @@
+From e7ad5b2e0bdbde171280e4e45c73c3b9303e6888 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Dec 2021 15:27:30 +0800
+Subject: selftests: net: Fix a typo in udpgro_fwd.sh
+
+From: Jianguo Wu <wujianguo@chinatelecom.cn>
+
+[ Upstream commit add25d6d6c85f7b6d00a055ee0a4169acf845681 ]
+
+$rvs -> $rcv
+
+Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests")
+Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
+Link: https://lore.kernel.org/r/d247d7c8-a03a-0abf-3c71-4006a051d133@163.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/udpgro_fwd.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
+index 7f26591f236b9..6a3985b8cd7f6 100755
+--- a/tools/testing/selftests/net/udpgro_fwd.sh
++++ b/tools/testing/selftests/net/udpgro_fwd.sh
+@@ -132,7 +132,7 @@ run_test() {
+       local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
+                                                         sed -e 's/\[//' -e 's/:.*//'`
+       if [ $rcv != $pkts ]; then
+-              echo " fail - received $rvs packets, expected $pkts"
++              echo " fail - received $rcv packets, expected $pkts"
+               ret=1
+               return
+       fi
+-- 
+2.34.1
+
diff --git a/queue-5.15/selftests-net-udpgso_bench_tx-fix-dst-ip-argument.patch b/queue-5.15/selftests-net-udpgso_bench_tx-fix-dst-ip-argument.patch

new file mode 100644 (file)

index 0000000..8c234d8
--- /dev/null
+++ b/queue-5.15/selftests-net-udpgso_bench_tx-fix-dst-ip-argument.patch
@@ -0,0 +1,63 @@
+From f15cb0802fd81ce5167c38990348abadd17fd45c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Dec 2021 18:58:10 +0800
+Subject: selftests/net: udpgso_bench_tx: fix dst ip argument
+
+From: wujianguo <wujianguo@chinatelecom.cn>
+
+[ Upstream commit 9c1952aeaa98b3cfc49e2a79cb2c7d6a674213e9 ]
+
+udpgso_bench_tx call setup_sockaddr() for dest address before
+parsing all arguments, if we specify "-p ${dst_port}" after "-D ${dst_ip}",
+then ${dst_port} will be ignored, and using default cfg_port 8000.
+
+This will cause test case "multiple GRO socks" failed in udpgro.sh.
+
+Setup sockaddr after parsing all arguments.
+
+Fixes: 3a687bef148d ("selftests: udp gso benchmark")
+Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://lore.kernel.org/r/ff620d9f-5b52-06ab-5286-44b945453002@163.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/udpgso_bench_tx.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
+index 17512a43885e7..f1fdaa2702913 100644
+--- a/tools/testing/selftests/net/udpgso_bench_tx.c
++++ b/tools/testing/selftests/net/udpgso_bench_tx.c
+@@ -419,6 +419,7 @@ static void usage(const char *filepath)
+ 
+ static void parse_opts(int argc, char **argv)
+ {
++      const char *bind_addr = NULL;
+       int max_len, hdrlen;
+       int c;
+ 
+@@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv)
+                       cfg_cpu = strtol(optarg, NULL, 0);
+                       break;
+               case 'D':
+-                      setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
++                      bind_addr = optarg;
+                       break;
+               case 'l':
+                       cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+@@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv)
+               }
+       }
+ 
++      if (!bind_addr)
++              bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
++
++      setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
++
+       if (optind != argc)
+               usage(argv[0]);
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/selftests-net-using-ping6-for-ipv6-in-udpgro_fwd.sh.patch b/queue-5.15/selftests-net-using-ping6-for-ipv6-in-udpgro_fwd.sh.patch

new file mode 100644 (file)

index 0000000..b22e46e
--- /dev/null
+++ b/queue-5.15/selftests-net-using-ping6-for-ipv6-in-udpgro_fwd.sh.patch
@@ -0,0 +1,54 @@
+From 207cab2684cdb4af6c1387774f09a3effefd66b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Dec 2021 18:40:29 +0800
+Subject: selftests: net: using ping6 for IPv6 in udpgro_fwd.sh
+
+From: Jianguo Wu <wujianguo@chinatelecom.cn>
+
+[ Upstream commit 8b3170e07539855ee91bc5e2fa7780a4c9b5c7aa ]
+
+udpgro_fwd.sh output following message:
+  ping: 2001:db8:1::100: Address family for hostname not supported
+
+Using ping6 when pinging IPv6 addresses.
+
+Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests")
+Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/udpgro_fwd.sh | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
+index 6a3985b8cd7f6..3ea73013d9568 100755
+--- a/tools/testing/selftests/net/udpgro_fwd.sh
++++ b/tools/testing/selftests/net/udpgro_fwd.sh
+@@ -185,6 +185,7 @@ for family in 4 6; do
+       IPT=iptables
+       SUFFIX=24
+       VXDEV=vxlan
++      PING=ping
+ 
+       if [ $family = 6 ]; then
+               BM_NET=$BM_NET_V6
+@@ -192,6 +193,7 @@ for family in 4 6; do
+               SUFFIX="64 nodad"
+               VXDEV=vxlan6
+               IPT=ip6tables
++              PING="ping6"
+       fi
+ 
+       echo "IPv$family"
+@@ -237,7 +239,7 @@ for family in 4 6; do
+ 
+       # load arp cache before running the test to reduce the amount of
+       # stray traffic on top of the UDP tunnel
+-      ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null
++      ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
+       run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
+       cleanup
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/series b/queue-5.15/series

index 28aaa51e725355e37b7d499448cfb730b933f675..74e4307a8eeabe922e2984b7b0b04278dd485025 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -13,3 +13,38 @@ recordmcount.pl-fix-typo-in-s390-mcount-regex.patch
  powerpc-ptdump-fix-debug_wx-since-generic-ptdump-conversion.patch
  efi-move-efifb_setup_from_dmi-prototype-from-arch-headers.patch
  selinux-initialize-proto-variable-in-selinux_ip_postroute_compat.patch
+scsi-lpfc-terminate-string-in-lpfc_debugfs_nvmeio_tr.patch
+net-mlx5-dr-fix-null-vs-is_err-checking-in-dr_domain.patch
+net-mlx5-fix-error-print-in-case-of-irq-request-fail.patch
+net-mlx5-fix-sf-health-recovery-flow.patch
+net-mlx5-fix-tc-max-supported-prio-for-nic-mode.patch
+net-mlx5e-wrap-the-tx-reporter-dump-callback-to-extr.patch
+net-mlx5e-fix-interoperability-between-xsk-and-icosq.patch
+net-mlx5e-fix-icosq-recovery-flow-for-xsk.patch
+net-mlx5e-use-tc-sample-stubs-instead-of-ifdefs-in-s.patch
+net-mlx5e-delete-forward-rule-for-ct-or-sample-actio.patch
+udp-using-datalen-to-cap-ipv6-udp-max-gso-segments.patch
+selftests-calculate-udpgso-segment-count-without-hea.patch
+net-phy-fixed_phy-fix-null-vs-is_err-checking-in-__f.patch
+sctp-use-call_rcu-to-free-endpoint.patch
+net-smc-fix-using-of-uninitialized-completions.patch
+net-usb-pegasus-do-not-drop-long-ethernet-frames.patch
+net-ag71xx-fix-a-potential-double-free-in-error-hand.patch
+net-lantiq_xrx200-fix-statistics-of-received-bytes.patch
+nfc-st21nfca-fix-memory-leak-in-device-probe-and-rem.patch
+net-smc-don-t-send-cdc-llc-message-if-link-not-ready.patch
+net-smc-fix-kernel-panic-caused-by-race-of-smc_sock.patch
+igc-do-not-enable-crosstimestamping-for-i225-v-model.patch
+igc-fix-tx-timestamp-support-for-non-msi-x-platforms.patch
+drm-amd-display-send-s0i2_rdy-in-stream_count-0-opti.patch
+drm-amd-display-set-optimize_pwr_state-for-dcn31.patch
+ionic-initialize-the-lif-dbid_inuse-bitmap.patch
+net-mlx5e-fix-wrong-features-assignment-in-case-of-e.patch
+net-bridge-mcast-add-and-enforce-query-interval-mini.patch
+net-bridge-mcast-add-and-enforce-startup-query-inter.patch
+selftests-net-udpgso_bench_tx-fix-dst-ip-argument.patch
+selftests-net-fix-a-typo-in-udpgro_fwd.sh.patch
+net-bridge-mcast-fix-br_multicast_ctx_vlan_global_di.patch
+net-ncsi-check-for-error-return-from-call-to-nla_put.patch
+selftests-net-using-ping6-for-ipv6-in-udpgro_fwd.sh.patch
+fsl-fman-fix-missing-put_device-call-in-fman_port_pr.patch
diff --git a/queue-5.15/udp-using-datalen-to-cap-ipv6-udp-max-gso-segments.patch b/queue-5.15/udp-using-datalen-to-cap-ipv6-udp-max-gso-segments.patch

new file mode 100644 (file)

index 0000000..5c7a7c9
--- /dev/null
+++ b/queue-5.15/udp-using-datalen-to-cap-ipv6-udp-max-gso-segments.patch
@@ -0,0 +1,44 @@
+From 1587073fcd199f7fa580a8b8f1e4b5175ba690bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Dec 2021 22:24:40 +0000
+Subject: udp: using datalen to cap ipv6 udp max gso segments
+
+From: Coco Li <lixiaoyan@google.com>
+
+[ Upstream commit 736ef37fd9a44f5966e25319d08ff7ea99ac79e8 ]
+
+The max number of UDP gso segments is intended to cap to
+UDP_MAX_SEGMENTS, this is checked in udp_send_skb().
+
+skb->len contains network and transport header len here, we should use
+only data len instead.
+
+This is the ipv6 counterpart to the below referenced commit,
+which missed the ipv6 change
+
+Fixes: 158390e45612 ("udp: using datalen to cap max gso segments")
+Signed-off-by: Coco Li <lixiaoyan@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://lore.kernel.org/r/20211223222441.2975883-1-lixiaoyan@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/udp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 7bee95d8d2df0..8cd8c0bce0986 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1204,7 +1204,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
+                       kfree_skb(skb);
+                       return -EINVAL;
+               }
+-              if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
++              if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
+                       kfree_skb(skb);
+                       return -EINVAL;
+               }
+-- 
+2.34.1
+
author	Sasha Levin <sashal@kernel.org>
	Sun, 2 Jan 2022 21:57:04 +0000 (16:57 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 2 Jan 2022 21:57:04 +0000 (16:57 -0500)
queue-5.15/drm-amd-display-send-s0i2_rdy-in-stream_count-0-opti.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/drm-amd-display-set-optimize_pwr_state-for-dcn31.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/fsl-fman-fix-missing-put_device-call-in-fman_port_pr.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/igc-do-not-enable-crosstimestamping-for-i225-v-model.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/igc-fix-tx-timestamp-support-for-non-msi-x-platforms.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ionic-initialize-the-lif-dbid_inuse-bitmap.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-ag71xx-fix-a-potential-double-free-in-error-hand.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-bridge-mcast-add-and-enforce-query-interval-mini.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-bridge-mcast-add-and-enforce-startup-query-inter.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-bridge-mcast-fix-br_multicast_ctx_vlan_global_di.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-lantiq_xrx200-fix-statistics-of-received-bytes.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5-dr-fix-null-vs-is_err-checking-in-dr_domain.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5-fix-error-print-in-case-of-irq-request-fail.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5-fix-sf-health-recovery-flow.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5-fix-tc-max-supported-prio-for-nic-mode.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5e-delete-forward-rule-for-ct-or-sample-actio.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5e-fix-icosq-recovery-flow-for-xsk.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5e-fix-interoperability-between-xsk-and-icosq.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5e-fix-wrong-features-assignment-in-case-of-e.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5e-use-tc-sample-stubs-instead-of-ifdefs-in-s.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-mlx5e-wrap-the-tx-reporter-dump-callback-to-extr.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-ncsi-check-for-error-return-from-call-to-nla_put.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-phy-fixed_phy-fix-null-vs-is_err-checking-in-__f.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-smc-don-t-send-cdc-llc-message-if-link-not-ready.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-smc-fix-kernel-panic-caused-by-race-of-smc_sock.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-smc-fix-using-of-uninitialized-completions.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-usb-pegasus-do-not-drop-long-ethernet-frames.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/nfc-st21nfca-fix-memory-leak-in-device-probe-and-rem.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/scsi-lpfc-terminate-string-in-lpfc_debugfs_nvmeio_tr.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/sctp-use-call_rcu-to-free-endpoint.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/selftests-calculate-udpgso-segment-count-without-hea.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/selftests-net-fix-a-typo-in-udpgro_fwd.sh.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/selftests-net-udpgso_bench_tx-fix-dst-ip-argument.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/selftests-net-using-ping6-for-ipv6-in-udpgro_fwd.sh.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history
queue-5.15/udp-using-datalen-to-cap-ipv6-udp-max-gso-segments.patch	[new file with mode: 0644]	patch \| blob