From: Sasha Levin Date: Sun, 2 Jan 2022 21:57:04 +0000 (-0500) Subject: Fixes for 5.15 X-Git-Tag: v4.4.298~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b0e8cac8d79d3eb6ea72a28eb4a7b7f912b9eb83;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.15 Signed-off-by: Sasha Levin --- diff --git a/queue-5.15/drm-amd-display-send-s0i2_rdy-in-stream_count-0-opti.patch b/queue-5.15/drm-amd-display-send-s0i2_rdy-in-stream_count-0-opti.patch new file mode 100644 index 00000000000..0c274f39486 --- /dev/null +++ b/queue-5.15/drm-amd-display-send-s0i2_rdy-in-stream_count-0-opti.patch @@ -0,0 +1,45 @@ +From 0e39974b65cf94a71260ccf06b0ebabd1dd77272 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Dec 2021 13:53:36 -0500 +Subject: drm/amd/display: Send s0i2_rdy in stream_count == 0 optimization + +From: Nicholas Kazlauskas + +[ Upstream commit a07f8b9983543d465b50870ab4f845d4d710ed3f ] + +[Why] +Otherwise SMU won't mark Display as idle when trying to perform s2idle. + +[How] +Mark the bit in the dcn31 codepath, doesn't apply to older ASIC. + +It needed to be split from phy refclk off to prevent entering s2idle +when PSR was engaged but driver was not ready. + +Fixes: 118a33151658 ("drm/amd/display: Add DCN3.1 clock manager support") + +Tested-by: Daniel Wheeler +Reviewed-by: Eric Yang +Acked-by: Rodrigo Siqueira +Signed-off-by: Nicholas Kazlauskas +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +index 377c4e53a2b37..407e19412a949 100644 +--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c ++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +@@ -157,6 +157,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, + union display_idle_optimization_u idle_info = { 0 }; + idle_info.idle_info.df_request_disabled = 1; + idle_info.idle_info.phy_ref_clk_off = 1; ++ idle_info.idle_info.s0i2_rdy = 1; + dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data); + /* update power state */ + clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; +-- +2.34.1 + diff --git a/queue-5.15/drm-amd-display-set-optimize_pwr_state-for-dcn31.patch b/queue-5.15/drm-amd-display-set-optimize_pwr_state-for-dcn31.patch new file mode 100644 index 00000000000..e18ac64a927 --- /dev/null +++ b/queue-5.15/drm-amd-display-set-optimize_pwr_state-for-dcn31.patch @@ -0,0 +1,47 @@ +From b82599716deab4c90ff6873216c3e073f05f827a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Dec 2021 16:05:36 -0500 +Subject: drm/amd/display: Set optimize_pwr_state for DCN31 + +From: Nicholas Kazlauskas + +[ Upstream commit 33735c1c8d0223170d79dbe166976d9cd7339c7a ] + +[Why] +We'll exit optimized power state to do link detection but we won't enter +back into the optimized power state. + +This could potentially block s2idle entry depending on the sequencing, +but it also means we're losing some power during the transition period. + +[How] +Hook up the handler like DCN21. It was also missed like the +exit_optimized_pwr_state callback. + +Fixes: 64b1d0e8d500 ("drm/amd/display: Add DCN3.1 HWSEQ") + +Tested-by: Daniel Wheeler +Reviewed-by: Eric Yang +Acked-by: Rodrigo Siqueira +Signed-off-by: Nicholas Kazlauskas +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +index ac8fb202fd5ee..4e9fe090b770a 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +@@ -100,6 +100,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { + .z10_save_init = dcn31_z10_save_init, + .is_abm_supported = dcn31_is_abm_supported, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, ++ .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, + .update_visual_confirm_color = dcn20_update_visual_confirm_color, + }; +-- +2.34.1 + diff --git a/queue-5.15/fsl-fman-fix-missing-put_device-call-in-fman_port_pr.patch b/queue-5.15/fsl-fman-fix-missing-put_device-call-in-fman_port_pr.patch new file mode 100644 index 00000000000..76a4c6456d1 --- /dev/null +++ b/queue-5.15/fsl-fman-fix-missing-put_device-call-in-fman_port_pr.patch @@ -0,0 +1,82 @@ +From ffa66093d553c6b3f173dc76f2d4b89035d526f3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 Dec 2021 12:26:27 +0000 +Subject: fsl/fman: Fix missing put_device() call in fman_port_probe + +From: Miaoqian Lin + +[ Upstream commit bf2b09fedc17248b315f80fb249087b7d28a69a6 ] + +The reference taken by 'of_find_device_by_node()' must be released when +not needed anymore. +Add the corresponding 'put_device()' in the and error handling paths. + +Fixes: 18a6c85fcc78 ("fsl/fman: Add FMan Port Support") +Signed-off-by: Miaoqian Lin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/fman/fman_port.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c +index d9baac0dbc7d0..4c9d05c45c033 100644 +--- a/drivers/net/ethernet/freescale/fman/fman_port.c ++++ b/drivers/net/ethernet/freescale/fman/fman_port.c +@@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev) + fman = dev_get_drvdata(&fm_pdev->dev); + if (!fman) { + err = -EINVAL; +- goto return_err; ++ goto put_device; + } + + err = of_property_read_u32(port_node, "cell-index", &val); +@@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev) + dev_err(port->dev, "%s: reading cell-index for %pOF failed\n", + __func__, port_node); + err = -EINVAL; +- goto return_err; ++ goto put_device; + } + port_id = (u8)val; + port->dts_params.id = port_id; +@@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev) + } else { + dev_err(port->dev, "%s: Illegal port type\n", __func__); + err = -EINVAL; +- goto return_err; ++ goto put_device; + } + + port->dts_params.type = port_type; +@@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev) + dev_err(port->dev, "%s: incorrect qman-channel-id\n", + __func__); + err = -EINVAL; +- goto return_err; ++ goto put_device; + } + port->dts_params.qman_channel_id = qman_channel_id; + } +@@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev) + dev_err(port->dev, "%s: of_address_to_resource() failed\n", + __func__); + err = -ENOMEM; +- goto return_err; ++ goto put_device; + } + + port->dts_params.fman = fman; +@@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev) + + return 0; + ++put_device: ++ put_device(&fm_pdev->dev); + return_err: + of_node_put(port_node); + free_port: +-- +2.34.1 + diff --git a/queue-5.15/igc-do-not-enable-crosstimestamping-for-i225-v-model.patch b/queue-5.15/igc-do-not-enable-crosstimestamping-for-i225-v-model.patch new file mode 100644 index 00000000000..90893efeea7 --- /dev/null +++ b/queue-5.15/igc-do-not-enable-crosstimestamping-for-i225-v-model.patch @@ -0,0 +1,56 @@ +From 20ec2803317bb1fe0fd98e87d3accdfb82d14a3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Dec 2021 16:39:49 -0800 +Subject: igc: Do not enable crosstimestamping for i225-V models + +From: Vinicius Costa Gomes + +[ Upstream commit 1e81dcc1ab7de7a789e60042ce82d5a612632599 ] + +It was reported that when PCIe PTM is enabled, some lockups could +be observed with some integrated i225-V models. + +While the issue is investigated, we can disable crosstimestamp for +those models and see no loss of functionality, because those models +don't have any support for time synchronization. + +Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") +Link: https://lore.kernel.org/all/924175a188159f4e03bd69908a91e606b574139b.camel@gmx.de/ +Reported-by: Stefan Dietrich +Signed-off-by: Vinicius Costa Gomes +Tested-by: Nechama Kraus +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_ptp.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c +index 30568e3544cda..4f9245aa79a18 100644 +--- a/drivers/net/ethernet/intel/igc/igc_ptp.c ++++ b/drivers/net/ethernet/intel/igc/igc_ptp.c +@@ -768,7 +768,20 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) + */ + static bool igc_is_crosststamp_supported(struct igc_adapter *adapter) + { +- return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false; ++ if (!IS_ENABLED(CONFIG_X86_TSC)) ++ return false; ++ ++ /* FIXME: it was noticed that enabling support for PCIe PTM in ++ * some i225-V models could cause lockups when bringing the ++ * interface up/down. There should be no downsides to ++ * disabling crosstimestamping support for i225-V, as it ++ * doesn't have any PTP support. That way we gain some time ++ * while root causing the issue. ++ */ ++ if (adapter->pdev->device == IGC_DEV_ID_I225_V) ++ return false; ++ ++ return pcie_ptm_enabled(adapter->pdev); + } + + static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp) +-- +2.34.1 + diff --git a/queue-5.15/igc-fix-tx-timestamp-support-for-non-msi-x-platforms.patch b/queue-5.15/igc-fix-tx-timestamp-support-for-non-msi-x-platforms.patch new file mode 100644 index 00000000000..877e9cb2942 --- /dev/null +++ b/queue-5.15/igc-fix-tx-timestamp-support-for-non-msi-x-platforms.patch @@ -0,0 +1,48 @@ +From 505fabb777c6ffdfd7ef2606ded3361a6f3505d1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 Dec 2021 16:49:33 -0700 +Subject: igc: Fix TX timestamp support for non-MSI-X platforms + +From: James McLaughlin + +[ Upstream commit f85846bbf43de38fb2c89fe7d2a085608c4eb25a ] + +Time synchronization was not properly enabled on non-MSI-X platforms. + +Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") +Signed-off-by: James McLaughlin +Reviewed-by: Vinicius Costa Gomes +Tested-by: Nechama Kraus +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index 0e19b4d02e628..0a96627391a8c 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -5466,6 +5466,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + ++ if (icr & IGC_ICR_TS) ++ igc_tsync_interrupt(adapter); ++ + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +@@ -5509,6 +5512,9 @@ static irqreturn_t igc_intr(int irq, void *data) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + ++ if (icr & IGC_ICR_TS) ++ igc_tsync_interrupt(adapter); ++ + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +-- +2.34.1 + diff --git a/queue-5.15/ionic-initialize-the-lif-dbid_inuse-bitmap.patch b/queue-5.15/ionic-initialize-the-lif-dbid_inuse-bitmap.patch new file mode 100644 index 00000000000..48a54c80a3c --- /dev/null +++ b/queue-5.15/ionic-initialize-the-lif-dbid_inuse-bitmap.patch @@ -0,0 +1,40 @@ +From 0ada5a1c5013662da138c2ad161989ef0703a083 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 Dec 2021 15:06:17 +0100 +Subject: ionic: Initialize the 'lif->dbid_inuse' bitmap + +From: Christophe JAILLET + +[ Upstream commit 140c7bc7d1195750342ea0e6ab76179499ae7cd7 ] + +When allocated, this bitmap is not initialized. Only the first bit is set a +few lines below. + +Use bitmap_zalloc() to make sure that it is cleared before being used. + +Fixes: 6461b446f2a0 ("ionic: Add interrupts and doorbells") +Signed-off-by: Christophe JAILLET +Signed-off-by: Shannon Nelson +Link: https://lore.kernel.org/r/6a478eae0b5e6c63774e1f0ddb1a3f8c38fa8ade.1640527506.git.christophe.jaillet@wanadoo.fr +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +index 7f3322ce044c7..6ac507ddf09af 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +@@ -3283,7 +3283,7 @@ int ionic_lif_init(struct ionic_lif *lif) + return -EINVAL; + } + +- lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL); ++ lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL); + if (!lif->dbid_inuse) { + dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); + return -ENOMEM; +-- +2.34.1 + diff --git a/queue-5.15/net-ag71xx-fix-a-potential-double-free-in-error-hand.patch b/queue-5.15/net-ag71xx-fix-a-potential-double-free-in-error-hand.patch new file mode 100644 index 00000000000..bae119acab5 --- /dev/null +++ b/queue-5.15/net-ag71xx-fix-a-potential-double-free-in-error-hand.patch @@ -0,0 +1,98 @@ +From 0378ead13b77522d1402aa2ebfdd3e9574dc2cf1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 Dec 2021 18:51:44 +0100 +Subject: net: ag71xx: Fix a potential double free in error handling paths + +From: Christophe JAILLET + +[ Upstream commit 1cd5384c88af5b59bf9f3b6c1a151bc14b88c2cd ] + +'ndev' is a managed resource allocated with devm_alloc_etherdev(), so there +is no need to call free_netdev() explicitly or there will be a double +free(). + +Simplify all error handling paths accordingly. + +Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") +Signed-off-by: Christophe JAILLET +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/atheros/ag71xx.c | 23 ++++++++--------------- + 1 file changed, 8 insertions(+), 15 deletions(-) + +diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c +index 02ae98aabf91c..416a5c99db5a2 100644 +--- a/drivers/net/ethernet/atheros/ag71xx.c ++++ b/drivers/net/ethernet/atheros/ag71xx.c +@@ -1915,15 +1915,12 @@ static int ag71xx_probe(struct platform_device *pdev) + ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac"); + if (IS_ERR(ag->mac_reset)) { + netif_err(ag, probe, ndev, "missing mac reset\n"); +- err = PTR_ERR(ag->mac_reset); +- goto err_free; ++ return PTR_ERR(ag->mac_reset); + } + + ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); +- if (!ag->mac_base) { +- err = -ENOMEM; +- goto err_free; +- } ++ if (!ag->mac_base) ++ return -ENOMEM; + + ndev->irq = platform_get_irq(pdev, 0); + err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, +@@ -1931,7 +1928,7 @@ static int ag71xx_probe(struct platform_device *pdev) + if (err) { + netif_err(ag, probe, ndev, "unable to request IRQ %d\n", + ndev->irq); +- goto err_free; ++ return err; + } + + ndev->netdev_ops = &ag71xx_netdev_ops; +@@ -1959,10 +1956,8 @@ static int ag71xx_probe(struct platform_device *pdev) + ag->stop_desc = dmam_alloc_coherent(&pdev->dev, + sizeof(struct ag71xx_desc), + &ag->stop_desc_dma, GFP_KERNEL); +- if (!ag->stop_desc) { +- err = -ENOMEM; +- goto err_free; +- } ++ if (!ag->stop_desc) ++ return -ENOMEM; + + ag->stop_desc->data = 0; + ag->stop_desc->ctrl = 0; +@@ -1977,7 +1972,7 @@ static int ag71xx_probe(struct platform_device *pdev) + err = of_get_phy_mode(np, &ag->phy_if_mode); + if (err) { + netif_err(ag, probe, ndev, "missing phy-mode property in DT\n"); +- goto err_free; ++ return err; + } + + netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT); +@@ -1985,7 +1980,7 @@ static int ag71xx_probe(struct platform_device *pdev) + err = clk_prepare_enable(ag->clk_eth); + if (err) { + netif_err(ag, probe, ndev, "Failed to enable eth clk.\n"); +- goto err_free; ++ return err; + } + + ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0); +@@ -2021,8 +2016,6 @@ err_mdio_remove: + ag71xx_mdio_remove(ag); + err_put_clk: + clk_disable_unprepare(ag->clk_eth); +-err_free: +- free_netdev(ndev); + return err; + } + +-- +2.34.1 + diff --git a/queue-5.15/net-bridge-mcast-add-and-enforce-query-interval-mini.patch b/queue-5.15/net-bridge-mcast-add-and-enforce-query-interval-mini.patch new file mode 100644 index 00000000000..416487ed482 --- /dev/null +++ b/queue-5.15/net-bridge-mcast-add-and-enforce-query-interval-mini.patch @@ -0,0 +1,122 @@ +From 7909cb874988801170bd4382d2b5e9cdcbfbf4ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Dec 2021 19:21:15 +0200 +Subject: net: bridge: mcast: add and enforce query interval minimum + +From: Nikolay Aleksandrov + +[ Upstream commit 99b40610956a8a8755653a67392e2a8b772453be ] + +As reported[1] if query interval is set too low and we have multiple +bridges or even a single bridge with multiple querier vlans configured +we can crash the machine. Add a 1 second minimum which must be enforced +by overwriting the value if set lower (i.e. without returning an error) to +avoid breaking user-space. If that happens a log message is emitted to let +the administrator know that the interval has been set to the minimum. +The issue has been present since these intervals could be user-controlled. + +[1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ + +Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") +Reported-by: Eric Dumazet +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/bridge/br_multicast.c | 16 ++++++++++++++++ + net/bridge/br_netlink.c | 2 +- + net/bridge/br_private.h | 3 +++ + net/bridge/br_sysfs_br.c | 2 +- + net/bridge/br_vlan_options.c | 2 +- + 5 files changed, 22 insertions(+), 3 deletions(-) + +diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c +index f3d751105343c..998da4a2d2092 100644 +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -4522,6 +4522,22 @@ int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx, + } + #endif + ++void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, ++ unsigned long val) ++{ ++ unsigned long intvl_jiffies = clock_t_to_jiffies(val); ++ ++ if (intvl_jiffies < BR_MULTICAST_QUERY_INTVL_MIN) { ++ br_info(brmctx->br, ++ "trying to set multicast query interval below minimum, setting to %lu (%ums)\n", ++ jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MIN), ++ jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MIN)); ++ intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN; ++ } ++ ++ brmctx->multicast_query_interval = intvl_jiffies; ++} ++ + /** + * br_multicast_list_adjacent - Returns snooped multicast addresses + * @dev: The bridge port adjacent to which to retrieve addresses +diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c +index 5c6c4305ed235..09812df3bc91d 100644 +--- a/net/bridge/br_netlink.c ++++ b/net/bridge/br_netlink.c +@@ -1357,7 +1357,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], + if (data[IFLA_BR_MCAST_QUERY_INTVL]) { + u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]); + +- br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); ++ br_multicast_set_query_intvl(&br->multicast_ctx, val); + } + + if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) { +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h +index fd5e7e74573ce..30c9411bfb646 100644 +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -28,6 +28,7 @@ + #define BR_MAX_PORTS (1<multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); ++ br_multicast_set_query_intvl(&br->multicast_ctx, val); + return 0; + } + +diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c +index 8ffd4ed2563c6..bf1ac08742794 100644 +--- a/net/bridge/br_vlan_options.c ++++ b/net/bridge/br_vlan_options.c +@@ -521,7 +521,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, + u64 val; + + val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]); +- v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val); ++ br_multicast_set_query_intvl(&v->br_mcast_ctx, val); + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) { +-- +2.34.1 + diff --git a/queue-5.15/net-bridge-mcast-add-and-enforce-startup-query-inter.patch b/queue-5.15/net-bridge-mcast-add-and-enforce-startup-query-inter.patch new file mode 100644 index 00000000000..477e39a4344 --- /dev/null +++ b/queue-5.15/net-bridge-mcast-add-and-enforce-startup-query-inter.patch @@ -0,0 +1,125 @@ +From 8789201d2750332a0b4b18aa7a8fd2e2459b3de6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Dec 2021 19:21:16 +0200 +Subject: net: bridge: mcast: add and enforce startup query interval minimum + +From: Nikolay Aleksandrov + +[ Upstream commit f83a112bd91a494cdee671aec74e777470fb4a07 ] + +As reported[1] if startup query interval is set too low in combination with +large number of startup queries and we have multiple bridges or even a +single bridge with multiple querier vlans configured we can crash the +machine. Add a 1 second minimum which must be enforced by overwriting the +value if set lower (i.e. without returning an error) to avoid breaking +user-space. If that happens a log message is emitted to let the admin know +that the startup interval has been set to the minimum. It doesn't make +sense to make the startup interval lower than the normal query interval +so use the same value of 1 second. The issue has been present since these +intervals could be user-controlled. + +[1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ + +Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") +Reported-by: Eric Dumazet +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/bridge/br_multicast.c | 16 ++++++++++++++++ + net/bridge/br_netlink.c | 2 +- + net/bridge/br_private.h | 3 +++ + net/bridge/br_sysfs_br.c | 2 +- + net/bridge/br_vlan_options.c | 2 +- + 5 files changed, 22 insertions(+), 3 deletions(-) + +diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c +index 998da4a2d2092..de24098894897 100644 +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -4538,6 +4538,22 @@ void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, + brmctx->multicast_query_interval = intvl_jiffies; + } + ++void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, ++ unsigned long val) ++{ ++ unsigned long intvl_jiffies = clock_t_to_jiffies(val); ++ ++ if (intvl_jiffies < BR_MULTICAST_STARTUP_QUERY_INTVL_MIN) { ++ br_info(brmctx->br, ++ "trying to set multicast startup query interval below minimum, setting to %lu (%ums)\n", ++ jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN), ++ jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN)); ++ intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN; ++ } ++ ++ brmctx->multicast_startup_query_interval = intvl_jiffies; ++} ++ + /** + * br_multicast_list_adjacent - Returns snooped multicast addresses + * @dev: The bridge port adjacent to which to retrieve addresses +diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c +index 09812df3bc91d..e365cf82f0615 100644 +--- a/net/bridge/br_netlink.c ++++ b/net/bridge/br_netlink.c +@@ -1369,7 +1369,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], + if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) { + u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]); + +- br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); ++ br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); + } + + if (data[IFLA_BR_MCAST_STATS_ENABLED]) { +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h +index 30c9411bfb646..5951e3142fe94 100644 +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -29,6 +29,7 @@ + + #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 + #define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000) ++#define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN + + #define BR_HWDOM_MAX BITS_PER_LONG + +@@ -971,6 +972,8 @@ size_t br_multicast_querier_state_size(void); + size_t br_rports_size(const struct net_bridge_mcast *brmctx); + void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val); ++void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, ++ unsigned long val); + + static inline bool br_group_is_l2(const struct br_ip *group) + { +diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c +index f5bd1114a434d..7b0c19772111c 100644 +--- a/net/bridge/br_sysfs_br.c ++++ b/net/bridge/br_sysfs_br.c +@@ -706,7 +706,7 @@ static ssize_t multicast_startup_query_interval_show( + static int set_startup_query_interval(struct net_bridge *br, unsigned long val, + struct netlink_ext_ack *extack) + { +- br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); ++ br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); + return 0; + } + +diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c +index bf1ac08742794..a6382973b3e70 100644 +--- a/net/bridge/br_vlan_options.c ++++ b/net/bridge/br_vlan_options.c +@@ -535,7 +535,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, + u64 val; + + val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]); +- v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); ++ br_multicast_set_startup_query_intvl(&v->br_mcast_ctx, val); + *changed = true; + } + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) { +-- +2.34.1 + diff --git a/queue-5.15/net-bridge-mcast-fix-br_multicast_ctx_vlan_global_di.patch b/queue-5.15/net-bridge-mcast-fix-br_multicast_ctx_vlan_global_di.patch new file mode 100644 index 00000000000..e9ab61dabe2 --- /dev/null +++ b/queue-5.15/net-bridge-mcast-fix-br_multicast_ctx_vlan_global_di.patch @@ -0,0 +1,43 @@ +From ff9646f81b11c79f524685fab51204ca0f2ec1fb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Dec 2021 17:31:42 +0200 +Subject: net: bridge: mcast: fix br_multicast_ctx_vlan_global_disabled helper + +From: Nikolay Aleksandrov + +[ Upstream commit 168fed986b3a7ec7b98cab1fe84e2f282b9e6a8f ] + +We need to first check if the context is a vlan one, then we need to +check the global bridge multicast vlan snooping flag, and finally the +vlan's multicast flag, otherwise we will unnecessarily enable vlan mcast +processing (e.g. querier timers). + +Fixes: 7b54aaaf53cb ("net: bridge: multicast: add vlan state initialization and control") +Signed-off-by: Nikolay Aleksandrov +Link: https://lore.kernel.org/r/20211228153142.536969-1-nikolay@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/bridge/br_private.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h +index 5951e3142fe94..bd218c2b2cd97 100644 +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -1158,9 +1158,9 @@ br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx) + static inline bool + br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx) + { +- return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && +- br_multicast_ctx_is_vlan(brmctx) && +- !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED); ++ return br_multicast_ctx_is_vlan(brmctx) && ++ (!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) || ++ !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)); + } + + static inline bool +-- +2.34.1 + diff --git a/queue-5.15/net-lantiq_xrx200-fix-statistics-of-received-bytes.patch b/queue-5.15/net-lantiq_xrx200-fix-statistics-of-received-bytes.patch new file mode 100644 index 00000000000..f92b7e16c83 --- /dev/null +++ b/queue-5.15/net-lantiq_xrx200-fix-statistics-of-received-bytes.patch @@ -0,0 +1,36 @@ +From e1ccda59b6c9b7cfa873b5a48df06eafb26257ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Dec 2021 17:22:03 +0100 +Subject: net: lantiq_xrx200: fix statistics of received bytes + +From: Aleksander Jan Bajkowski + +[ Upstream commit 5be60a945329d82f06fc755a43eeefbfc5f77d72 ] + +Received frames have FCS truncated. There is no need +to subtract FCS length from the statistics. + +Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver") +Signed-off-by: Aleksander Jan Bajkowski +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/lantiq_xrx200.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c +index fb78f17d734fe..b02f796b5422f 100644 +--- a/drivers/net/ethernet/lantiq_xrx200.c ++++ b/drivers/net/ethernet/lantiq_xrx200.c +@@ -209,7 +209,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) + skb->protocol = eth_type_trans(skb, net_dev); + netif_receive_skb(skb); + net_dev->stats.rx_packets++; +- net_dev->stats.rx_bytes += len - ETH_FCS_LEN; ++ net_dev->stats.rx_bytes += len; + + return 0; + } +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5-dr-fix-null-vs-is_err-checking-in-dr_domain.patch b/queue-5.15/net-mlx5-dr-fix-null-vs-is_err-checking-in-dr_domain.patch new file mode 100644 index 00000000000..1996f16a97e --- /dev/null +++ b/queue-5.15/net-mlx5-dr-fix-null-vs-is_err-checking-in-dr_domain.patch @@ -0,0 +1,47 @@ +From 6a805c457e7dc9698bf878c313367b9e811d14fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 Dec 2021 06:54:53 +0000 +Subject: net/mlx5: DR, Fix NULL vs IS_ERR checking in dr_domain_init_resources + +From: Miaoqian Lin + +[ Upstream commit 6b8b42585886c59a008015083282aae434349094 ] + +The mlx5_get_uars_page() function returns error pointers. +Using IS_ERR() to check the return value to fix this. + +Fixes: 4ec9e7b02697 ("net/mlx5: DR, Expose steering domain functionality") +Signed-off-by: Miaoqian Lin +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +index 0fe159809ba15..ea1b8ca5bf3aa 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +@@ -2,6 +2,7 @@ + /* Copyright (c) 2019 Mellanox Technologies. */ + + #include ++#include + #include "dr_types.h" + + #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ +@@ -75,9 +76,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) + } + + dmn->uar = mlx5_get_uars_page(dmn->mdev); +- if (!dmn->uar) { ++ if (IS_ERR(dmn->uar)) { + mlx5dr_err(dmn, "Couldn't allocate UAR\n"); +- ret = -ENOMEM; ++ ret = PTR_ERR(dmn->uar); + goto clean_pd; + } + +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5-fix-error-print-in-case-of-irq-request-fail.patch b/queue-5.15/net-mlx5-fix-error-print-in-case-of-irq-request-fail.patch new file mode 100644 index 00000000000..f56df2fec8b --- /dev/null +++ b/queue-5.15/net-mlx5-fix-error-print-in-case-of-irq-request-fail.patch @@ -0,0 +1,43 @@ +From 3aebafe5fba999948f7fe6a958d7a0bc5476c69d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Nov 2021 23:10:57 +0200 +Subject: net/mlx5: Fix error print in case of IRQ request failed + +From: Shay Drory + +[ Upstream commit aa968f922039706f6d13e8870b49e424d0a8d9ad ] + +In case IRQ layer failed to find or to request irq, the driver is +printing the first cpu of the provided affinity as part of the error +print. Empty affinity is a valid input for the IRQ layer, and it is +an error to call cpumask_first() on empty affinity. + +Remove the first cpu print from the error message. + +Fixes: c36326d38d93 ("net/mlx5: Round-Robin EQs over IRQs") +Signed-off-by: Shay Drory +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +index 763c83a023809..11f3649fdaab1 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +@@ -346,8 +346,8 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, + new_irq = irq_pool_create_irq(pool, affinity); + if (IS_ERR(new_irq)) { + if (!least_loaded_irq) { +- mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n", +- cpumask_first(affinity)); ++ mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", ++ PTR_ERR(new_irq)); + mutex_unlock(&pool->lock); + return new_irq; + } +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5-fix-sf-health-recovery-flow.patch b/queue-5.15/net-mlx5-fix-sf-health-recovery-flow.patch new file mode 100644 index 00000000000..c36f92f4499 --- /dev/null +++ b/queue-5.15/net-mlx5-fix-sf-health-recovery-flow.patch @@ -0,0 +1,60 @@ +From 663b1c674df8f0094d7b23fbd8980d509f3da3a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Nov 2021 20:08:13 +0200 +Subject: net/mlx5: Fix SF health recovery flow + +From: Moshe Shemesh + +[ Upstream commit 33de865f7bce3968676e43b0182af0a2dd359dae ] + +SF do not directly control the PCI device. During recovery flow SF +should not be allowed to do pci disable or pci reset, its PF will do it. + +It fixes the following kernel trace: +mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:387:(pid 40948): starting health recovery flow +mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called +mlx5_core 0000:03:00.0: wait vital counter value 0xab175 after 1 iterations +mlx5_core.sf mlx5_core.sf.25: firmware version: 24.32.532 +mlx5_core.sf mlx5_core.sf.23: mlx5_health_try_recover:387:(pid 40946): starting health recovery flow +mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called +mlx5_core 0000:03:00.0: wait vital counter value 0xab193 after 1 iterations +mlx5_core.sf mlx5_core.sf.23: firmware version: 24.32.532 +mlx5_core.sf mlx5_core.sf.25: mlx5_cmd_check:813:(pid 40948): ENABLE_HCA(0x104) op_mod(0x0) failed, +status bad resource state(0x9), syndrome (0x658908) +mlx5_core.sf mlx5_core.sf.25: mlx5_function_setup:1292:(pid 40948): enable hca failed +mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:389:(pid 40948): health recovery failed + +Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") +Signed-off-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index 92b08fa07efae..92b01858d7f3e 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1775,12 +1775,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) + + int mlx5_recover_device(struct mlx5_core_dev *dev) + { +- int ret = -EIO; ++ if (!mlx5_core_is_sf(dev)) { ++ mlx5_pci_disable_device(dev); ++ if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) ++ return -EIO; ++ } + +- mlx5_pci_disable_device(dev); +- if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) +- ret = mlx5_load_one(dev); +- return ret; ++ return mlx5_load_one(dev); + } + + static struct pci_driver mlx5_core_driver = { +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5-fix-tc-max-supported-prio-for-nic-mode.patch b/queue-5.15/net-mlx5-fix-tc-max-supported-prio-for-nic-mode.patch new file mode 100644 index 00000000000..4159aaacaad --- /dev/null +++ b/queue-5.15/net-mlx5-fix-tc-max-supported-prio-for-nic-mode.patch @@ -0,0 +1,39 @@ +From 023f1b3301047804e171b5ff57102010f8e8ec9c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Dec 2021 03:52:53 +0200 +Subject: net/mlx5: Fix tc max supported prio for nic mode + +From: Chris Mi + +[ Upstream commit d671e109bd8548d067b27e39e183a484430bf102 ] + +Only prio 1 is supported if firmware doesn't support ignore flow +level for nic mode. The offending commit removed the check wrongly. +Add it back. + +Fixes: 9a99c8f1253a ("net/mlx5e: E-Switch, Offload all chain 0 priorities when modify header and forward action is not supported") +Signed-off-by: Chris Mi +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +index 97e5845b4cfdd..d5e47630e2849 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +@@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) + + u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) + { ++ if (!mlx5_chains_prios_supported(chains)) ++ return 1; ++ + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX; + +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5e-delete-forward-rule-for-ct-or-sample-actio.patch b/queue-5.15/net-mlx5e-delete-forward-rule-for-ct-or-sample-actio.patch new file mode 100644 index 00000000000..a516d850539 --- /dev/null +++ b/queue-5.15/net-mlx5e-delete-forward-rule-for-ct-or-sample-actio.patch @@ -0,0 +1,60 @@ +From c222de48a8bac56cbad50b9d9baf44208f3e6bb3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Dec 2021 11:18:02 +0800 +Subject: net/mlx5e: Delete forward rule for ct or sample action + +From: Chris Mi + +[ Upstream commit 2820110d945923ab2f4901753e4ccbb2a506fa8e ] + +When there is ct or sample action, the ct or sample rule will be deleted +and return. But if there is an extra mirror action, the forward rule can't +be deleted because of the return. + +Fix it by removing the return. + +Fixes: 69e2916ebce4 ("net/mlx5: CT: Add support for mirroring") +Fixes: f94d6389f6a8 ("net/mlx5e: TC, Add support to offload sample action") +Signed-off-by: Chris Mi +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 17 ++++++----------- + 1 file changed, 6 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +index e7736421d1bc2..fa461bc57baee 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -1179,21 +1179,16 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) + goto offload_rule_0; + +- if (flow_flag_test(flow, CT)) { +- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); +- return; +- } +- +- if (flow_flag_test(flow, SAMPLE)) { +- mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); +- return; +- } +- + if (attr->esw_attr->split_count) + mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + ++ if (flow_flag_test(flow, CT)) ++ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); ++ else if (flow_flag_test(flow, SAMPLE)) ++ mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); ++ else + offload_rule_0: +- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); ++ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + } + + struct mlx5_flow_handle * +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5e-fix-icosq-recovery-flow-for-xsk.patch b/queue-5.15/net-mlx5e-fix-icosq-recovery-flow-for-xsk.patch new file mode 100644 index 00000000000..44538f159a6 --- /dev/null +++ b/queue-5.15/net-mlx5e-fix-icosq-recovery-flow-for-xsk.patch @@ -0,0 +1,126 @@ +From 90287a4b22d88a87325567122dd796214cb82386 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 Jul 2020 16:32:44 +0300 +Subject: net/mlx5e: Fix ICOSQ recovery flow for XSK + +From: Maxim Mikityanskiy + +[ Upstream commit 19c4aba2d4e23997061fb11aed8a3e41334bfa14 ] + +There are two ICOSQs per channel: one is needed for RX, and the other +for async operations (XSK TX, kTLS offload). Currently, the recovery +flow for both is the same, and async ICOSQ is mistakenly treated like +the regular ICOSQ. + +This patch prevents running the regular ICOSQ recovery on async ICOSQ. +The purpose of async ICOSQ is to handle XSK wakeup requests and post +kTLS offload RX parameters, it has nothing to do with RQ and XSKRQ UMRs, +so the regular recovery sequence is not applicable here. + +Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") +Signed-off-by: Maxim Mikityanskiy +Reviewed-by: Aya Levin +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 -- + .../net/ethernet/mellanox/mlx5/core/en_main.c | 30 ++++++++++++++----- + 2 files changed, 22 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h +index 54757117071db..7204bc86e4741 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -956,9 +956,6 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); + void mlx5e_destroy_rq(struct mlx5e_rq *rq); + + struct mlx5e_sq_param; +-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, +- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); +-void mlx5e_close_icosq(struct mlx5e_icosq *sq); + int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, + struct mlx5e_xdpsq *sq, bool is_redirect); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 611c8a0cbf4f0..685fe77de5ce4 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -1036,9 +1036,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) + mlx5e_reporter_icosq_cqe_err(sq); + } + ++static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) ++{ ++ struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, ++ recover_work); ++ ++ /* Not implemented yet. */ ++ ++ netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); ++} ++ + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, + struct mlx5e_sq_param *param, +- struct mlx5e_icosq *sq) ++ struct mlx5e_icosq *sq, ++ work_func_t recover_work_func) + { + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; +@@ -1059,7 +1070,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, + if (err) + goto err_sq_wq_destroy; + +- INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); ++ INIT_WORK(&sq->recover_work, recover_work_func); + + return 0; + +@@ -1397,13 +1408,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) + mlx5e_reporter_tx_err_cqe(sq); + } + +-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, +- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) ++static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, ++ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, ++ work_func_t recover_work_func) + { + struct mlx5e_create_sq_param csp = {}; + int err; + +- err = mlx5e_alloc_icosq(c, param, sq); ++ err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); + if (err) + return err; + +@@ -1442,7 +1454,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) + synchronize_net(); /* Sync with NAPI. */ + } + +-void mlx5e_close_icosq(struct mlx5e_icosq *sq) ++static void mlx5e_close_icosq(struct mlx5e_icosq *sq) + { + struct mlx5e_channel *c = sq->channel; + +@@ -1869,13 +1881,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, + + spin_lock_init(&c->async_icosq_lock); + +- err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq); ++ err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, ++ mlx5e_async_icosq_err_cqe_work); + if (err) + goto err_close_xdpsq_cq; + + mutex_init(&c->icosq_recovery_lock); + +- err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); ++ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, ++ mlx5e_icosq_err_cqe_work); + if (err) + goto err_close_async_icosq; + +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5e-fix-interoperability-between-xsk-and-icosq.patch b/queue-5.15/net-mlx5e-fix-interoperability-between-xsk-and-icosq.patch new file mode 100644 index 00000000000..a2714a70586 --- /dev/null +++ b/queue-5.15/net-mlx5e-fix-interoperability-between-xsk-and-icosq.patch @@ -0,0 +1,219 @@ +From 84708c3fb67cacc68517e97e053cfe59c4848699 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Oct 2021 19:40:09 +0300 +Subject: net/mlx5e: Fix interoperability between XSK and ICOSQ recovery flow + +From: Maxim Mikityanskiy + +[ Upstream commit 17958d7cd731b977ae7d4af38d891c3a1235b5f1 ] + +Both regular RQ and XSKRQ use the same ICOSQ for UMRs. When doing +recovery for the ICOSQ, don't forget to deactivate XSKRQ. + +XSK can be opened and closed while channels are active, so a new mutex +prevents the ICOSQ recovery from running at the same time. The ICOSQ +recovery deactivates and reactivates XSKRQ, so any parallel change in +XSK state would break consistency. As the regular RQ is running, it's +not enough to just flush the recovery work, because it can be +rescheduled. + +Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") +Signed-off-by: Maxim Mikityanskiy +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ + .../ethernet/mellanox/mlx5/core/en/health.h | 2 ++ + .../mellanox/mlx5/core/en/reporter_rx.c | 35 ++++++++++++++++++- + .../mellanox/mlx5/core/en/xsk/setup.c | 16 ++++++++- + .../net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++-- + 5 files changed, 58 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h +index c10a107a3ea53..54757117071db 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -727,6 +727,8 @@ struct mlx5e_channel { + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); + int ix; + int cpu; ++ /* Sync between icosq recovery and XSK enable/disable. */ ++ struct mutex icosq_recovery_lock; + }; + + struct mlx5e_ptp; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +index 018262d0164b3..3aaf3c2752feb 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +@@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); + void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); + void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); + void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); ++void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c); ++void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c); + + #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 + #define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000 +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +index 0eb125316fe20..e329158fdc555 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +@@ -59,6 +59,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) + + static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) + { ++ struct mlx5e_rq *xskrq = NULL; + struct mlx5_core_dev *mdev; + struct mlx5e_icosq *icosq; + struct net_device *dev; +@@ -67,7 +68,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) + int err; + + icosq = ctx; ++ ++ mutex_lock(&icosq->channel->icosq_recovery_lock); ++ ++ /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ + rq = &icosq->channel->rq; ++ if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) ++ xskrq = &icosq->channel->xskrq; + mdev = icosq->channel->mdev; + dev = icosq->channel->netdev; + err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); +@@ -81,6 +88,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) + goto out; + + mlx5e_deactivate_rq(rq); ++ if (xskrq) ++ mlx5e_deactivate_rq(xskrq); ++ + err = mlx5e_wait_for_icosq_flush(icosq); + if (err) + goto out; +@@ -94,15 +104,28 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) + goto out; + + mlx5e_reset_icosq_cc_pc(icosq); ++ + mlx5e_free_rx_in_progress_descs(rq); ++ if (xskrq) ++ mlx5e_free_rx_in_progress_descs(xskrq); ++ + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mlx5e_activate_icosq(icosq); +- mlx5e_activate_rq(rq); + ++ mlx5e_activate_rq(rq); + rq->stats->recover++; ++ ++ if (xskrq) { ++ mlx5e_activate_rq(xskrq); ++ xskrq->stats->recover++; ++ } ++ ++ mutex_unlock(&icosq->channel->icosq_recovery_lock); ++ + return 0; + out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); ++ mutex_unlock(&icosq->channel->icosq_recovery_lock); + return err; + } + +@@ -703,6 +726,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); + } + ++void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) ++{ ++ mutex_lock(&c->icosq_recovery_lock); ++} ++ ++void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) ++{ ++ mutex_unlock(&c->icosq_recovery_lock); ++} ++ + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { + .name = "rx", + .recover = mlx5e_rx_reporter_recover, +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +index 538bc2419bd83..8526a5fbbf0bf 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +@@ -4,6 +4,7 @@ + #include "setup.h" + #include "en/params.h" + #include "en/txrx.h" ++#include "en/health.h" + + /* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may + * change unexpectedly, and mlx5e has a minimum valid stride size for striding +@@ -170,7 +171,13 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) + + void mlx5e_activate_xsk(struct mlx5e_channel *c) + { ++ /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid ++ * activating XSKRQ in the middle of recovery. ++ */ ++ mlx5e_reporter_icosq_suspend_recovery(c); + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); ++ mlx5e_reporter_icosq_resume_recovery(c); ++ + /* TX queue is created active. */ + + spin_lock_bh(&c->async_icosq_lock); +@@ -180,6 +187,13 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) + + void mlx5e_deactivate_xsk(struct mlx5e_channel *c) + { +- mlx5e_deactivate_rq(&c->xskrq); ++ /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the ++ * middle of recovery. Suspend the recovery to avoid it. ++ */ ++ mlx5e_reporter_icosq_suspend_recovery(c); ++ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); ++ mlx5e_reporter_icosq_resume_recovery(c); ++ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ ++ + /* TX queue is disabled on close. */ + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 8cf5fbebd674b..611c8a0cbf4f0 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -911,8 +911,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) + void mlx5e_close_rq(struct mlx5e_rq *rq) + { + cancel_work_sync(&rq->dim.work); +- if (rq->icosq) +- cancel_work_sync(&rq->icosq->recover_work); + cancel_work_sync(&rq->recover_work); + mlx5e_destroy_rq(rq); + mlx5e_free_rx_descs(rq); +@@ -1875,6 +1873,8 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, + if (err) + goto err_close_xdpsq_cq; + ++ mutex_init(&c->icosq_recovery_lock); ++ + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); + if (err) + goto err_close_async_icosq; +@@ -1943,9 +1943,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) + mlx5e_close_xdpsq(&c->xdpsq); + if (c->xdp) + mlx5e_close_xdpsq(&c->rq_xdpsq); ++ /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ ++ cancel_work_sync(&c->icosq.recover_work); + mlx5e_close_rq(&c->rq); + mlx5e_close_sqs(c); + mlx5e_close_icosq(&c->icosq); ++ mutex_destroy(&c->icosq_recovery_lock); + mlx5e_close_icosq(&c->async_icosq); + if (c->xdp) + mlx5e_close_cq(&c->rq_xdpsq.cq); +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5e-fix-wrong-features-assignment-in-case-of-e.patch b/queue-5.15/net-mlx5e-fix-wrong-features-assignment-in-case-of-e.patch new file mode 100644 index 00000000000..eddd20400f2 --- /dev/null +++ b/queue-5.15/net-mlx5e-fix-wrong-features-assignment-in-case-of-e.patch @@ -0,0 +1,86 @@ +From 28d18fe129915b757906f23fc095ab72b1903856 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 Nov 2021 11:08:41 +0200 +Subject: net/mlx5e: Fix wrong features assignment in case of error + +From: Gal Pressman + +[ Upstream commit 992d8a4e38f0527f24e273ce3a9cd6dea1a6a436 ] + +In case of an error in mlx5e_set_features(), 'netdev->features' must be +updated with the correct state of the device to indicate which features +were updated successfully. +To do that we maintain a copy of 'netdev->features' and update it after +successful feature changes, so we can assign it to back to +'netdev->features' if needed. + +However, since not all netdev features are handled by the driver (e.g. +GRO/TSO/etc), some features may not be updated correctly in case of an +error updating another feature. + +For example, while requesting to disable TSO (feature which is not +handled by the driver) and enable HW-GRO, if an error occurs during +HW-GRO enable, 'oper_features' will be assigned with 'netdev->features' +and HW-GRO turned off. TSO will remain enabled in such case, which is a +bug. + +To solve that, instead of using 'netdev->features' as the baseline of +'oper_features' and changing it on set feature success, use 'features' +instead and update it in case of errors. + +Fixes: 75b81ce719b7 ("net/mlx5e: Don't override netdev features field unless in error flow") +Signed-off-by: Gal Pressman +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 685fe77de5ce4..baa0d7d48fc0c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -3450,12 +3450,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) + + static int mlx5e_handle_feature(struct net_device *netdev, + netdev_features_t *features, +- netdev_features_t wanted_features, + netdev_features_t feature, + mlx5e_feature_handler feature_handler) + { +- netdev_features_t changes = wanted_features ^ netdev->features; +- bool enable = !!(wanted_features & feature); ++ netdev_features_t changes = *features ^ netdev->features; ++ bool enable = !!(*features & feature); + int err; + + if (!(changes & feature)) +@@ -3463,22 +3462,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, + + err = feature_handler(netdev, enable); + if (err) { ++ MLX5E_SET_FEATURE(features, feature, !enable); + netdev_err(netdev, "%s feature %pNF failed, err %d\n", + enable ? "Enable" : "Disable", &feature, err); + return err; + } + +- MLX5E_SET_FEATURE(features, feature, enable); + return 0; + } + + int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) + { +- netdev_features_t oper_features = netdev->features; ++ netdev_features_t oper_features = features; + int err = 0; + + #define MLX5E_HANDLE_FEATURE(feature, handler) \ +- mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) ++ mlx5e_handle_feature(netdev, &oper_features, feature, handler) + + err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5e-use-tc-sample-stubs-instead-of-ifdefs-in-s.patch b/queue-5.15/net-mlx5e-use-tc-sample-stubs-instead-of-ifdefs-in-s.patch new file mode 100644 index 00000000000..bfe4e90e4e2 --- /dev/null +++ b/queue-5.15/net-mlx5e-use-tc-sample-stubs-instead-of-ifdefs-in-s.patch @@ -0,0 +1,157 @@ +From e35babfd02149aabcf23a23de8b0e1a88219fd42 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Aug 2021 13:33:17 +0300 +Subject: net/mlx5e: Use tc sample stubs instead of ifdefs in source file + +From: Roi Dayan + +[ Upstream commit f3e02e479debb37777696c9f984f75152beeb56d ] + +Instead of having sparse ifdefs in source files use a single +ifdef in the tc sample header file and use stubs. + +Signed-off-by: Roi Dayan +Reviewed-by: Maor Dickman +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../ethernet/mellanox/mlx5/core/en/rep/tc.c | 2 -- + .../mellanox/mlx5/core/en/tc/sample.h | 27 +++++++++++++++++++ + .../net/ethernet/mellanox/mlx5/core/en_tc.c | 12 --------- + 3 files changed, 27 insertions(+), 14 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +index de03684528bbf..8451940c16ab9 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +@@ -647,9 +647,7 @@ static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *sk + "Failed to restore tunnel info for sampled packet\n"); + return; + } +-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + mlx5e_tc_sample_skb(skb, mapped_obj); +-#endif /* CONFIG_MLX5_TC_SAMPLE */ + mlx5_rep_tc_post_napi_receive(tc_priv); + } + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h +index db0146df9b303..9ef8a49d78014 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h +@@ -19,6 +19,8 @@ struct mlx5e_sample_attr { + struct mlx5e_sample_flow *sample_flow; + }; + ++#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) ++ + void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj); + + struct mlx5_flow_handle * +@@ -38,4 +40,29 @@ mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act); + void + mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample); + ++#else /* CONFIG_MLX5_TC_SAMPLE */ ++ ++static inline struct mlx5_flow_handle * ++mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, ++ struct mlx5_flow_spec *spec, ++ struct mlx5_flow_attr *attr, ++ u32 tunnel_id) ++{ return ERR_PTR(-EOPNOTSUPP); } ++ ++static inline void ++mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, ++ struct mlx5_flow_handle *rule, ++ struct mlx5_flow_attr *attr) {} ++ ++static inline struct mlx5e_tc_psample * ++mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act) ++{ return ERR_PTR(-EOPNOTSUPP); } ++ ++static inline void ++mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) {} ++ ++static inline void ++mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) {} ++ ++#endif /* CONFIG_MLX5_TC_SAMPLE */ + #endif /* __MLX5_EN_TC_SAMPLE_H__ */ +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +index e3b320b6d85b9..e7736421d1bc2 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -248,7 +248,6 @@ get_ct_priv(struct mlx5e_priv *priv) + return priv->fs.tc.ct; + } + +-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + static struct mlx5e_tc_psample * + get_sample_priv(struct mlx5e_priv *priv) + { +@@ -265,7 +264,6 @@ get_sample_priv(struct mlx5e_priv *priv) + + return NULL; + } +-#endif + + struct mlx5_flow_handle * + mlx5_tc_rule_insert(struct mlx5e_priv *priv, +@@ -1148,11 +1146,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), + flow, spec, attr, + mod_hdr_acts); +-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + } else if (flow_flag_test(flow, SAMPLE)) { + rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr, + mlx5e_tc_get_flow_tun_id(flow)); +-#endif + } else { + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + } +@@ -1188,12 +1184,10 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + return; + } + +-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + if (flow_flag_test(flow, SAMPLE)) { + mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + return; + } +-#endif + + if (attr->esw_attr->split_count) + mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); +@@ -5014,9 +5008,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) + MLX5_FLOW_NAMESPACE_FDB, + uplink_priv->post_act); + +-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); +-#endif + + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + +@@ -5060,9 +5052,7 @@ err_ht_init: + err_enc_opts_mapping: + mapping_destroy(uplink_priv->tunnel_mapping); + err_tun_mapping: +-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); +-#endif + mlx5_tc_ct_clean(uplink_priv->ct_priv); + netdev_warn(priv->netdev, + "Failed to initialize tc (eswitch), err: %d", err); +@@ -5082,9 +5072,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); + mapping_destroy(uplink_priv->tunnel_mapping); + +-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); +-#endif + mlx5_tc_ct_clean(uplink_priv->ct_priv); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); + } +-- +2.34.1 + diff --git a/queue-5.15/net-mlx5e-wrap-the-tx-reporter-dump-callback-to-extr.patch b/queue-5.15/net-mlx5e-wrap-the-tx-reporter-dump-callback-to-extr.patch new file mode 100644 index 00000000000..f6c3f09b0be --- /dev/null +++ b/queue-5.15/net-mlx5e-wrap-the-tx-reporter-dump-callback-to-extr.patch @@ -0,0 +1,92 @@ +From e09777ed4c1e36dadf6cd8ea95a2f4f1e4612f3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Nov 2021 16:05:44 +0200 +Subject: net/mlx5e: Wrap the tx reporter dump callback to extract the sq + +From: Amir Tzin + +[ Upstream commit 918fc3855a6507a200e9cf22c20be852c0982687 ] + +Function mlx5e_tx_reporter_dump_sq() casts its void * argument to struct +mlx5e_txqsq *, but in TX-timeout-recovery flow the argument is actually +of type struct mlx5e_tx_timeout_ctx *. + + mlx5_core 0000:08:00.1 enp8s0f1: TX timeout detected + mlx5_core 0000:08:00.1 enp8s0f1: TX timeout on queue: 1, SQ: 0x11ec, CQ: 0x146d, SQ Cons: 0x0 SQ Prod: 0x1, usecs since last trans: 21565000 + BUG: stack guard page was hit at 0000000093f1a2de (stack is 00000000b66ea0dc..000000004d932dae) + kernel stack overflow (page fault): 0000 [#1] SMP NOPTI + CPU: 5 PID: 95 Comm: kworker/u20:1 Tainted: G W OE 5.13.0_mlnx #1 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 + Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] + RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 + [mlx5_core] + Call Trace: + mlx5e_tx_reporter_dump+0x43/0x1c0 [mlx5_core] + devlink_health_do_dump.part.91+0x71/0xd0 + devlink_health_report+0x157/0x1b0 + mlx5e_reporter_tx_timeout+0xb9/0xf0 [mlx5_core] + ? mlx5e_tx_reporter_err_cqe_recover+0x1d0/0x1d0 + [mlx5_core] + ? mlx5e_health_queue_dump+0xd0/0xd0 [mlx5_core] + ? update_load_avg+0x19b/0x550 + ? set_next_entity+0x72/0x80 + ? pick_next_task_fair+0x227/0x340 + ? finish_task_switch+0xa2/0x280 + mlx5e_tx_timeout_work+0x83/0xb0 [mlx5_core] + process_one_work+0x1de/0x3a0 + worker_thread+0x2d/0x3c0 + ? process_one_work+0x3a0/0x3a0 + kthread+0x115/0x130 + ? kthread_park+0x90/0x90 + ret_from_fork+0x1f/0x30 + --[ end trace 51ccabea504edaff ]--- + RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 + PKRU: 55555554 + Kernel panic - not syncing: Fatal exception + Kernel Offset: disabled + end Kernel panic - not syncing: Fatal exception + +To fix this bug add a wrapper for mlx5e_tx_reporter_dump_sq() which +extracts the sq from struct mlx5e_tx_timeout_ctx and set it as the +TX-timeout-recovery flow dump callback. + +Fixes: 5f29458b77d5 ("net/mlx5e: Support dump callback in TX reporter") +Signed-off-by: Aya Levin +Signed-off-by: Amir Tzin +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +index bb682fd751c98..8024599994642 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +@@ -463,6 +463,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); + } + ++static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, ++ void *ctx) ++{ ++ struct mlx5e_tx_timeout_ctx *to_ctx = ctx; ++ ++ return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); ++} ++ + static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) + { +@@ -558,7 +566,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) + to_ctx.sq = sq; + err_ctx.ctx = &to_ctx; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; +- err_ctx.dump = mlx5e_tx_reporter_dump_sq; ++ err_ctx.dump = mlx5e_tx_reporter_timeout_dump; + snprintf(err_str, sizeof(err_str), + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", + sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, +-- +2.34.1 + diff --git a/queue-5.15/net-ncsi-check-for-error-return-from-call-to-nla_put.patch b/queue-5.15/net-ncsi-check-for-error-return-from-call-to-nla_put.patch new file mode 100644 index 00000000000..947a7cd20d7 --- /dev/null +++ b/queue-5.15/net-ncsi-check-for-error-return-from-call-to-nla_put.patch @@ -0,0 +1,45 @@ +From 02683fd674fedc945cc702a20935fabbb4240e2a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Dec 2021 11:21:18 +0800 +Subject: net/ncsi: check for error return from call to nla_put_u32 + +From: Jiasheng Jiang + +[ Upstream commit 92a34ab169f9eefe29cd420ce96b0a0a2a1da853 ] + +As we can see from the comment of the nla_put() that it could return +-EMSGSIZE if the tailroom of the skb is insufficient. +Therefore, it should be better to check the return value of the +nla_put_u32 and return the error code if error accurs. +Also, there are many other functions have the same problem, and if this +patch is correct, I will commit a new version to fix all. + +Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family") +Signed-off-by: Jiasheng Jiang +Link: https://lore.kernel.org/r/20211229032118.1706294-1-jiasheng@iscas.ac.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ncsi/ncsi-netlink.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c +index bb5f1650f11cb..c189b4c8a1823 100644 +--- a/net/ncsi/ncsi-netlink.c ++++ b/net/ncsi/ncsi-netlink.c +@@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb, + pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); + if (!pnest) + return -ENOMEM; +- nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); ++ rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); ++ if (rc) { ++ nla_nest_cancel(skb, pnest); ++ return rc; ++ } + if ((0x1 << np->id) == ndp->package_whitelist) + nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); + cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); +-- +2.34.1 + diff --git a/queue-5.15/net-phy-fixed_phy-fix-null-vs-is_err-checking-in-__f.patch b/queue-5.15/net-phy-fixed_phy-fix-null-vs-is_err-checking-in-__f.patch new file mode 100644 index 00000000000..0dab7ec97d9 --- /dev/null +++ b/queue-5.15/net-phy-fixed_phy-fix-null-vs-is_err-checking-in-__f.patch @@ -0,0 +1,40 @@ +From 818c9aa9da0404a2c39402486f8369462b84d392 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Dec 2021 02:14:59 +0000 +Subject: net: phy: fixed_phy: Fix NULL vs IS_ERR() checking in + __fixed_phy_register + +From: Miaoqian Lin + +[ Upstream commit b45396afa4177f2b1ddfeff7185da733fade1dc3 ] + +The fixed_phy_get_gpiod function() returns NULL, it doesn't return error +pointers, using NULL checking to fix this.i + +Fixes: 5468e82f7034 ("net: phy: fixed-phy: Drop GPIO from fixed_phy_add()") +Signed-off-by: Miaoqian Lin +Link: https://lore.kernel.org/r/20211224021500.10362-1-linmq006@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/fixed_phy.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c +index c65fb5f5d2dc5..a0c256bd54417 100644 +--- a/drivers/net/phy/fixed_phy.c ++++ b/drivers/net/phy/fixed_phy.c +@@ -239,8 +239,8 @@ static struct phy_device *__fixed_phy_register(unsigned int irq, + /* Check if we have a GPIO associated with this fixed phy */ + if (!gpiod) { + gpiod = fixed_phy_get_gpiod(np); +- if (IS_ERR(gpiod)) +- return ERR_CAST(gpiod); ++ if (!gpiod) ++ return ERR_PTR(-EINVAL); + } + + /* Get the next available PHY address, up to PHY_MAX_ADDR */ +-- +2.34.1 + diff --git a/queue-5.15/net-smc-don-t-send-cdc-llc-message-if-link-not-ready.patch b/queue-5.15/net-smc-don-t-send-cdc-llc-message-if-link-not-ready.patch new file mode 100644 index 00000000000..a3d5fc00d8e --- /dev/null +++ b/queue-5.15/net-smc-don-t-send-cdc-llc-message-if-link-not-ready.patch @@ -0,0 +1,120 @@ +From 56ab6da51734dd0dfa422124402931c737c42741 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Dec 2021 17:03:24 +0800 +Subject: net/smc: don't send CDC/LLC message if link not ready + +From: Dust Li + +[ Upstream commit 90cee52f2e780345d3629e278291aea5ac74f40f ] + +We found smc_llc_send_link_delete_all() sometimes wait +for 2s timeout when testing with RDMA link up/down. +It is possible when a smc_link is in ACTIVATING state, +the underlaying QP is still in RESET or RTR state, which +cannot send any messages out. + +smc_llc_send_link_delete_all() use smc_link_usable() to +checks whether the link is usable, if the QP is still in +RESET or RTR state, but the smc_link is in ACTIVATING, this +LLC message will always fail without any CQE entering the +CQ, and we will always wait 2s before timeout. + +Since we cannot send any messages through the QP before +the QP enter RTS. I add a wrapper smc_link_sendable() +which checks the state of QP along with the link state. +And replace smc_link_usable() with smc_link_sendable() +in all LLC & CDC message sending routine. + +Fixes: 5f08318f617b ("smc: connection data control (CDC)") +Signed-off-by: Dust Li +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/smc/smc_core.c | 2 +- + net/smc/smc_core.h | 6 ++++++ + net/smc/smc_llc.c | 2 +- + net/smc/smc_wr.c | 4 ++-- + net/smc/smc_wr.h | 2 +- + 5 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c +index 5a9c22ee75fa4..cb06568cf422f 100644 +--- a/net/smc/smc_core.c ++++ b/net/smc/smc_core.c +@@ -604,7 +604,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &lgr->lnk[i]; + +- if (smc_link_usable(lnk)) ++ if (smc_link_sendable(lnk)) + lnk->state = SMC_LNK_INACTIVE; + } + wake_up_all(&lgr->llc_msg_waiter); +diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h +index c043ecdca5c44..51a3e8248ade2 100644 +--- a/net/smc/smc_core.h ++++ b/net/smc/smc_core.h +@@ -366,6 +366,12 @@ static inline bool smc_link_usable(struct smc_link *lnk) + return true; + } + ++static inline bool smc_link_sendable(struct smc_link *lnk) ++{ ++ return smc_link_usable(lnk) && ++ lnk->qp_attr.cur_qp_state == IB_QPS_RTS; ++} ++ + static inline bool smc_link_active(struct smc_link *lnk) + { + return lnk->state == SMC_LNK_ACTIVE; +diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c +index f1d323439a2af..ee1f0fdba0855 100644 +--- a/net/smc/smc_llc.c ++++ b/net/smc/smc_llc.c +@@ -1358,7 +1358,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) + delllc.reason = htonl(rsn); + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { +- if (!smc_link_usable(&lgr->lnk[i])) ++ if (!smc_link_sendable(&lgr->lnk[i])) + continue; + if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) + break; +diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c +index c9cd7a4c5acfc..fcc1942001760 100644 +--- a/net/smc/smc_wr.c ++++ b/net/smc/smc_wr.c +@@ -169,7 +169,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) + static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) + { + *idx = link->wr_tx_cnt; +- if (!smc_link_usable(link)) ++ if (!smc_link_sendable(link)) + return -ENOLINK; + for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { + if (!test_and_set_bit(*idx, link->wr_tx_mask)) +@@ -212,7 +212,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, + } else { + rc = wait_event_interruptible_timeout( + link->wr_tx_wait, +- !smc_link_usable(link) || ++ !smc_link_sendable(link) || + lgr->terminating || + (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), + SMC_WR_TX_WAIT_FREE_SLOT_TIME); +diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h +index 2bc626f230a56..102d515757ee2 100644 +--- a/net/smc/smc_wr.h ++++ b/net/smc/smc_wr.h +@@ -62,7 +62,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) + + static inline bool smc_wr_tx_link_hold(struct smc_link *link) + { +- if (!smc_link_usable(link)) ++ if (!smc_link_sendable(link)) + return false; + atomic_inc(&link->wr_tx_refcnt); + return true; +-- +2.34.1 + diff --git a/queue-5.15/net-smc-fix-kernel-panic-caused-by-race-of-smc_sock.patch b/queue-5.15/net-smc-fix-kernel-panic-caused-by-race-of-smc_sock.patch new file mode 100644 index 00000000000..6b3365e02be --- /dev/null +++ b/queue-5.15/net-smc-fix-kernel-panic-caused-by-race-of-smc_sock.patch @@ -0,0 +1,437 @@ +From 29c300dc7e5e909bdd0d944d1524dd615de05c67 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Dec 2021 17:03:25 +0800 +Subject: net/smc: fix kernel panic caused by race of smc_sock + +From: Dust Li + +[ Upstream commit 349d43127dac00c15231e8ffbcaabd70f7b0e544 ] + +A crash occurs when smc_cdc_tx_handler() tries to access smc_sock +but smc_release() has already freed it. + +[ 4570.695099] BUG: unable to handle page fault for address: 000000002eae9e88 +[ 4570.696048] #PF: supervisor write access in kernel mode +[ 4570.696728] #PF: error_code(0x0002) - not-present page +[ 4570.697401] PGD 0 P4D 0 +[ 4570.697716] Oops: 0002 [#1] PREEMPT SMP NOPTI +[ 4570.698228] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-rc4+ #111 +[ 4570.699013] Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 8c24b4c 04/0 +[ 4570.699933] RIP: 0010:_raw_spin_lock+0x1a/0x30 +<...> +[ 4570.711446] Call Trace: +[ 4570.711746] +[ 4570.711992] smc_cdc_tx_handler+0x41/0xc0 +[ 4570.712470] smc_wr_tx_tasklet_fn+0x213/0x560 +[ 4570.712981] ? smc_cdc_tx_dismisser+0x10/0x10 +[ 4570.713489] tasklet_action_common.isra.17+0x66/0x140 +[ 4570.714083] __do_softirq+0x123/0x2f4 +[ 4570.714521] irq_exit_rcu+0xc4/0xf0 +[ 4570.714934] common_interrupt+0xba/0xe0 + +Though smc_cdc_tx_handler() checked the existence of smc connection, +smc_release() may have already dismissed and released the smc socket +before smc_cdc_tx_handler() further visits it. + +smc_cdc_tx_handler() |smc_release() +if (!conn) | + | + |smc_cdc_tx_dismiss_slots() + | smc_cdc_tx_dismisser() + | + |sock_put(&smc->sk) <- last sock_put, + | smc_sock freed +bh_lock_sock(&smc->sk) (panic) | + +To make sure we won't receive any CDC messages after we free the +smc_sock, add a refcount on the smc_connection for inflight CDC +message(posted to the QP but haven't received related CQE), and +don't release the smc_connection until all the inflight CDC messages +haven been done, for both success or failed ones. + +Using refcount on CDC messages brings another problem: when the link +is going to be destroyed, smcr_link_clear() will reset the QP, which +then remove all the pending CQEs related to the QP in the CQ. To make +sure all the CQEs will always come back so the refcount on the +smc_connection can always reach 0, smc_ib_modify_qp_reset() was replaced +by smc_ib_modify_qp_error(). +And remove the timeout in smc_wr_tx_wait_no_pending_sends() since we +need to wait for all pending WQEs done, or we may encounter use-after- +free when handling CQEs. + +For IB device removal routine, we need to wait for all the QPs on that +device been destroyed before we can destroy CQs on the device, or +the refcount on smc_connection won't reach 0 and smc_sock cannot be +released. + +Fixes: 5f08318f617b ("smc: connection data control (CDC)") +Reported-by: Wen Gu +Signed-off-by: Dust Li +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/smc/smc.h | 5 +++++ + net/smc/smc_cdc.c | 52 +++++++++++++++++++++------------------------- + net/smc/smc_cdc.h | 2 +- + net/smc/smc_core.c | 25 +++++++++++++++++----- + net/smc/smc_ib.c | 4 ++-- + net/smc/smc_ib.h | 1 + + net/smc/smc_wr.c | 41 +++--------------------------------- + net/smc/smc_wr.h | 3 +-- + 8 files changed, 57 insertions(+), 76 deletions(-) + +diff --git a/net/smc/smc.h b/net/smc/smc.h +index d65e15f0c944c..e6919fe31617b 100644 +--- a/net/smc/smc.h ++++ b/net/smc/smc.h +@@ -170,6 +170,11 @@ struct smc_connection { + u16 tx_cdc_seq; /* sequence # for CDC send */ + u16 tx_cdc_seq_fin; /* sequence # - tx completed */ + spinlock_t send_lock; /* protect wr_sends */ ++ atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe ++ * - inc when post wqe, ++ * - dec on polled tx cqe ++ */ ++ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ + struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ + u32 tx_off; /* base offset in peer rmb */ + +diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c +index 99acd337ba90d..84c8a4374fddd 100644 +--- a/net/smc/smc_cdc.c ++++ b/net/smc/smc_cdc.c +@@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, + struct smc_sock *smc; + int diff; + +- if (!conn) +- /* already dismissed */ +- return; +- + smc = container_of(conn, struct smc_sock, conn); + bh_lock_sock(&smc->sk); + if (!wc_status) { +@@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, + conn); + conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; + } ++ ++ if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) && ++ unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) ++ wake_up(&conn->cdc_pend_tx_wq); ++ WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0); ++ + smc_tx_sndbuf_nonfull(smc); + bh_unlock_sock(&smc->sk); + } +@@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, + conn->tx_cdc_seq++; + conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; + smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); ++ ++ atomic_inc(&conn->cdc_pend_tx_wr); ++ smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ ++ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); + if (!rc) { + smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); +@@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, + } else { + conn->tx_cdc_seq--; + conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; ++ atomic_dec(&conn->cdc_pend_tx_wr); + } + + return rc; +@@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn, + peer->token = htonl(local->token); + peer->prod_flags.failover_validation = 1; + ++ /* We need to set pend->conn here to make sure smc_cdc_tx_handler() ++ * can handle properly ++ */ ++ smc_cdc_add_pending_send(conn, pend); ++ ++ atomic_inc(&conn->cdc_pend_tx_wr); ++ smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ ++ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); ++ if (unlikely(rc)) ++ atomic_dec(&conn->cdc_pend_tx_wr); ++ + return rc; + } + +@@ -193,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) + return rc; + } + +-static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, +- unsigned long data) ++void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn) + { +- struct smc_connection *conn = (struct smc_connection *)data; +- struct smc_cdc_tx_pend *cdc_pend = +- (struct smc_cdc_tx_pend *)tx_pend; +- +- return cdc_pend->conn == conn; +-} +- +-static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend) +-{ +- struct smc_cdc_tx_pend *cdc_pend = +- (struct smc_cdc_tx_pend *)tx_pend; +- +- cdc_pend->conn = NULL; +-} +- +-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) +-{ +- struct smc_link *link = conn->lnk; +- +- smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE, +- smc_cdc_tx_filter, smc_cdc_tx_dismisser, +- (unsigned long)conn); ++ wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr)); + } + + /* Send a SMC-D CDC header. +diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h +index 0a0a89abd38b2..696cc11f2303b 100644 +--- a/net/smc/smc_cdc.h ++++ b/net/smc/smc_cdc.h +@@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, + struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, + struct smc_cdc_tx_pend **pend); +-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); ++void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn); + int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, + struct smc_cdc_tx_pend *pend); + int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); +diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c +index cb06568cf422f..506b8498623b0 100644 +--- a/net/smc/smc_core.c ++++ b/net/smc/smc_core.c +@@ -1056,7 +1056,7 @@ void smc_conn_free(struct smc_connection *conn) + smc_ism_unset_conn(conn); + tasklet_kill(&conn->rx_tsklet); + } else { +- smc_cdc_tx_dismiss_slots(conn); ++ smc_cdc_wait_pend_tx_wr(conn); + if (current_work() != &conn->abort_work) + cancel_work_sync(&conn->abort_work); + } +@@ -1133,7 +1133,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log) + smc_llc_link_clear(lnk, log); + smcr_buf_unmap_lgr(lnk); + smcr_rtoken_clear_link(lnk); +- smc_ib_modify_qp_reset(lnk); ++ smc_ib_modify_qp_error(lnk); + smc_wr_free_link(lnk); + smc_ib_destroy_queue_pair(lnk); + smc_ib_dealloc_protection_domain(lnk); +@@ -1264,7 +1264,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) + else + tasklet_unlock_wait(&conn->rx_tsklet); + } else { +- smc_cdc_tx_dismiss_slots(conn); ++ smc_cdc_wait_pend_tx_wr(conn); + } + smc_lgr_unregister_conn(conn); + smc_close_active_abort(smc); +@@ -1387,11 +1387,16 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) + /* Called when an SMCR device is removed or the smc module is unloaded. + * If smcibdev is given, all SMCR link groups using this device are terminated. + * If smcibdev is NULL, all SMCR link groups are terminated. ++ * ++ * We must wait here for QPs been destroyed before we destroy the CQs, ++ * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus ++ * smc_sock cannot be released. + */ + void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) + { + struct smc_link_group *lgr, *lg; + LIST_HEAD(lgr_free_list); ++ LIST_HEAD(lgr_linkdown_list); + int i; + + spin_lock_bh(&smc_lgr_list.lock); +@@ -1403,7 +1408,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) + list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].smcibdev == smcibdev) +- smcr_link_down_cond_sched(&lgr->lnk[i]); ++ list_move_tail(&lgr->list, &lgr_linkdown_list); + } + } + } +@@ -1415,6 +1420,16 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) + __smc_lgr_terminate(lgr, false); + } + ++ list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) { ++ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { ++ if (lgr->lnk[i].smcibdev == smcibdev) { ++ mutex_lock(&lgr->llc_conf_mutex); ++ smcr_link_down_cond(&lgr->lnk[i]); ++ mutex_unlock(&lgr->llc_conf_mutex); ++ } ++ } ++ } ++ + if (smcibdev) { + if (atomic_read(&smcibdev->lnk_cnt)) + wait_event(smcibdev->lnks_deleted, +@@ -1514,7 +1529,6 @@ static void smcr_link_down(struct smc_link *lnk) + if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) + return; + +- smc_ib_modify_qp_reset(lnk); + to_lnk = smc_switch_conns(lgr, lnk, true); + if (!to_lnk) { /* no backup link available */ + smcr_link_clear(lnk, true); +@@ -1742,6 +1756,7 @@ create: + conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; + conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; + conn->urg_state = SMC_URG_READ; ++ init_waitqueue_head(&conn->cdc_pend_tx_wq); + INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); + if (ini->is_smcd) { + conn->rx_off = sizeof(struct smcd_cdc_msg); +diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c +index a8845343d183e..f0ec1f1d50fac 100644 +--- a/net/smc/smc_ib.c ++++ b/net/smc/smc_ib.c +@@ -101,12 +101,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk) + IB_QP_MAX_QP_RD_ATOMIC); + } + +-int smc_ib_modify_qp_reset(struct smc_link *lnk) ++int smc_ib_modify_qp_error(struct smc_link *lnk) + { + struct ib_qp_attr qp_attr; + + memset(&qp_attr, 0, sizeof(qp_attr)); +- qp_attr.qp_state = IB_QPS_RESET; ++ qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); + } + +diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h +index 3085f5180da79..6967c3d52b03e 100644 +--- a/net/smc/smc_ib.h ++++ b/net/smc/smc_ib.h +@@ -79,6 +79,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk); + int smc_ib_ready_link(struct smc_link *lnk); + int smc_ib_modify_qp_rts(struct smc_link *lnk); + int smc_ib_modify_qp_reset(struct smc_link *lnk); ++int smc_ib_modify_qp_error(struct smc_link *lnk); + long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); + int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, + struct smc_buf_desc *buf_slot, u8 link_idx); +diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c +index fcc1942001760..59ca1a2d5c650 100644 +--- a/net/smc/smc_wr.c ++++ b/net/smc/smc_wr.c +@@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link) + } + + /* wait till all pending tx work requests on the given link are completed */ +-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link) ++void smc_wr_tx_wait_no_pending_sends(struct smc_link *link) + { +- if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link), +- SMC_WR_TX_WAIT_PENDING_TIME)) +- return 0; +- else /* timeout */ +- return -EPIPE; ++ wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link)); + } + + static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) +@@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) + struct smc_wr_tx_pend pnd_snd; + struct smc_link *link; + u32 pnd_snd_idx; +- int i; + + link = wc->qp->qp_context; + +@@ -115,14 +110,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) + if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) + return; + if (wc->status) { +- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { +- /* clear full struct smc_wr_tx_pend including .priv */ +- memset(&link->wr_tx_pends[i], 0, +- sizeof(link->wr_tx_pends[i])); +- memset(&link->wr_tx_bufs[i], 0, +- sizeof(link->wr_tx_bufs[i])); +- clear_bit(i, link->wr_tx_mask); +- } + /* terminate link */ + smcr_link_down_cond_sched(link); + } +@@ -351,25 +338,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) + return rc; + } + +-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, +- smc_wr_tx_filter filter, +- smc_wr_tx_dismisser dismisser, +- unsigned long data) +-{ +- struct smc_wr_tx_pend_priv *tx_pend; +- struct smc_wr_rx_hdr *wr_tx; +- int i; +- +- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { +- wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; +- if (wr_tx->type != wr_tx_hdr_type) +- continue; +- tx_pend = &link->wr_tx_pends[i].priv; +- if (filter(tx_pend, data)) +- dismisser(tx_pend); +- } +-} +- + /****************************** receive queue ********************************/ + + int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) +@@ -574,10 +542,7 @@ void smc_wr_free_link(struct smc_link *lnk) + smc_wr_wakeup_reg_wait(lnk); + smc_wr_wakeup_tx_wait(lnk); + +- if (smc_wr_tx_wait_no_pending_sends(lnk)) +- memset(lnk->wr_tx_mask, 0, +- BITS_TO_LONGS(SMC_WR_BUF_CNT) * +- sizeof(*lnk->wr_tx_mask)); ++ smc_wr_tx_wait_no_pending_sends(lnk); + wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); + wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); + +diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h +index 102d515757ee2..cb58e60078f57 100644 +--- a/net/smc/smc_wr.h ++++ b/net/smc/smc_wr.h +@@ -22,7 +22,6 @@ + #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ + + #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) +-#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ) + + #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ + +@@ -122,7 +121,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, + smc_wr_tx_filter filter, + smc_wr_tx_dismisser dismisser, + unsigned long data); +-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link); ++void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); + + int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); + int smc_wr_rx_post_init(struct smc_link *link); +-- +2.34.1 + diff --git a/queue-5.15/net-smc-fix-using-of-uninitialized-completions.patch b/queue-5.15/net-smc-fix-using-of-uninitialized-completions.patch new file mode 100644 index 00000000000..024b4060145 --- /dev/null +++ b/queue-5.15/net-smc-fix-using-of-uninitialized-completions.patch @@ -0,0 +1,54 @@ +From b65035dd9568a56faab05b6267f5ca21aba336e5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Dec 2021 14:35:30 +0100 +Subject: net/smc: fix using of uninitialized completions + +From: Karsten Graul + +[ Upstream commit 6d7373dabfd3933ee30c40fc8c09d2a788f6ece1 ] + +In smc_wr_tx_send_wait() the completion on index specified by +pend->idx is initialized and after smc_wr_tx_send() was called the wait +for completion starts. pend->idx is used to get the correct index for +the wait, but the pend structure could already be cleared in +smc_wr_tx_process_cqe(). +Introduce pnd_idx to hold and use a local copy of the correct index. + +Fixes: 09c61d24f96d ("net/smc: wait for departure of an IB message") +Signed-off-by: Karsten Graul +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/smc/smc_wr.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c +index a419e9af36b98..c9cd7a4c5acfc 100644 +--- a/net/smc/smc_wr.c ++++ b/net/smc/smc_wr.c +@@ -288,18 +288,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, + unsigned long timeout) + { + struct smc_wr_tx_pend *pend; ++ u32 pnd_idx; + int rc; + + pend = container_of(priv, struct smc_wr_tx_pend, priv); + pend->compl_requested = 1; +- init_completion(&link->wr_tx_compl[pend->idx]); ++ pnd_idx = pend->idx; ++ init_completion(&link->wr_tx_compl[pnd_idx]); + + rc = smc_wr_tx_send(link, priv); + if (rc) + return rc; + /* wait for completion by smc_wr_tx_process_cqe() */ + rc = wait_for_completion_interruptible_timeout( +- &link->wr_tx_compl[pend->idx], timeout); ++ &link->wr_tx_compl[pnd_idx], timeout); + if (rc <= 0) + rc = -ENODATA; + if (rc > 0) +-- +2.34.1 + diff --git a/queue-5.15/net-usb-pegasus-do-not-drop-long-ethernet-frames.patch b/queue-5.15/net-usb-pegasus-do-not-drop-long-ethernet-frames.patch new file mode 100644 index 00000000000..1cdb85fb36d --- /dev/null +++ b/queue-5.15/net-usb-pegasus-do-not-drop-long-ethernet-frames.patch @@ -0,0 +1,63 @@ +From 91eb5db8ed2352072ed153fbb42555333251d059 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 Dec 2021 23:12:08 +0100 +Subject: net: usb: pegasus: Do not drop long Ethernet frames + +From: Matthias-Christian Ott + +[ Upstream commit ca506fca461b260ab32952b610c3d4aadc6c11fd ] + +The D-Link DSB-650TX (2001:4002) is unable to receive Ethernet frames +that are longer than 1518 octets, for example, Ethernet frames that +contain 802.1Q VLAN tags. + +The frames are sent to the pegasus driver via USB but the driver +discards them because they have the Long_pkt field set to 1 in the +received status report. The function read_bulk_callback of the pegasus +driver treats such received "packets" (in the terminology of the +hardware) as errors but the field simply does just indicate that the +Ethernet frame (MAC destination to FCS) is longer than 1518 octets. + +It seems that in the 1990s there was a distinction between +"giant" (> 1518) and "runt" (< 64) frames and the hardware includes +flags to indicate this distinction. It seems that the purpose of the +distinction "giant" frames was to not allow infinitely long frames due +to transmission errors and to allow hardware to have an upper limit of +the frame size. However, the hardware already has such limit with its +2048 octet receive buffer and, therefore, Long_pkt is merely a +convention and should not be treated as a receive error. + +Actually, the hardware is even able to receive Ethernet frames with 2048 +octets which exceeds the claimed limit frame size limit of the driver of +1536 octets (PEGASUS_MTU). + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Matthias-Christian Ott +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/usb/pegasus.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c +index 6a92a3fef75e5..cd063f45785b7 100644 +--- a/drivers/net/usb/pegasus.c ++++ b/drivers/net/usb/pegasus.c +@@ -493,11 +493,11 @@ static void read_bulk_callback(struct urb *urb) + goto goon; + + rx_status = buf[count - 2]; +- if (rx_status & 0x1e) { ++ if (rx_status & 0x1c) { + netif_dbg(pegasus, rx_err, net, + "RX packet error %x\n", rx_status); + net->stats.rx_errors++; +- if (rx_status & 0x06) /* long or runt */ ++ if (rx_status & 0x04) /* runt */ + net->stats.rx_length_errors++; + if (rx_status & 0x08) + net->stats.rx_crc_errors++; +-- +2.34.1 + diff --git a/queue-5.15/nfc-st21nfca-fix-memory-leak-in-device-probe-and-rem.patch b/queue-5.15/nfc-st21nfca-fix-memory-leak-in-device-probe-and-rem.patch new file mode 100644 index 00000000000..23ada3fcd4c --- /dev/null +++ b/queue-5.15/nfc-st21nfca-fix-memory-leak-in-device-probe-and-rem.patch @@ -0,0 +1,101 @@ +From 1fd82e11c90e79735e09f7f6d2b43f94cf1fb08f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Dec 2021 12:48:11 +0000 +Subject: NFC: st21nfca: Fix memory leak in device probe and remove + +From: Wei Yongjun + +[ Upstream commit 1b9dadba502234eea7244879b8d5d126bfaf9f0c ] + +'phy->pending_skb' is alloced when device probe, but forgot to free +in the error handling path and remove path, this cause memory leak +as follows: + +unreferenced object 0xffff88800bc06800 (size 512): + comm "8", pid 11775, jiffies 4295159829 (age 9.032s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [<00000000d66c09ce>] __kmalloc_node_track_caller+0x1ed/0x450 + [<00000000c93382b3>] kmalloc_reserve+0x37/0xd0 + [<000000005fea522c>] __alloc_skb+0x124/0x380 + [<0000000019f29f9a>] st21nfca_hci_i2c_probe+0x170/0x8f2 + +Fix it by freeing 'pending_skb' in error and remove. + +Fixes: 68957303f44a ("NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip") +Reported-by: Hulk Robot +Signed-off-by: Wei Yongjun +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/nfc/st21nfca/i2c.c | 29 ++++++++++++++++++++--------- + 1 file changed, 20 insertions(+), 9 deletions(-) + +diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c +index 279d88128b2e4..d56bc24709b5c 100644 +--- a/drivers/nfc/st21nfca/i2c.c ++++ b/drivers/nfc/st21nfca/i2c.c +@@ -528,7 +528,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, + phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); + if (IS_ERR(phy->gpiod_ena)) { + nfc_err(dev, "Unable to get ENABLE GPIO\n"); +- return PTR_ERR(phy->gpiod_ena); ++ r = PTR_ERR(phy->gpiod_ena); ++ goto out_free; + } + + phy->se_status.is_ese_present = +@@ -539,7 +540,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, + r = st21nfca_hci_platform_init(phy); + if (r < 0) { + nfc_err(&client->dev, "Unable to reboot st21nfca\n"); +- return r; ++ goto out_free; + } + + r = devm_request_threaded_irq(&client->dev, client->irq, NULL, +@@ -548,15 +549,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, + ST21NFCA_HCI_DRIVER_NAME, phy); + if (r < 0) { + nfc_err(&client->dev, "Unable to register IRQ handler\n"); +- return r; ++ goto out_free; + } + +- return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, +- ST21NFCA_FRAME_HEADROOM, +- ST21NFCA_FRAME_TAILROOM, +- ST21NFCA_HCI_LLC_MAX_PAYLOAD, +- &phy->hdev, +- &phy->se_status); ++ r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, ++ ST21NFCA_FRAME_HEADROOM, ++ ST21NFCA_FRAME_TAILROOM, ++ ST21NFCA_HCI_LLC_MAX_PAYLOAD, ++ &phy->hdev, ++ &phy->se_status); ++ if (r) ++ goto out_free; ++ ++ return 0; ++ ++out_free: ++ kfree_skb(phy->pending_skb); ++ return r; + } + + static int st21nfca_hci_i2c_remove(struct i2c_client *client) +@@ -567,6 +576,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client) + + if (phy->powered) + st21nfca_hci_i2c_disable(phy); ++ if (phy->pending_skb) ++ kfree_skb(phy->pending_skb); + + return 0; + } +-- +2.34.1 + diff --git a/queue-5.15/scsi-lpfc-terminate-string-in-lpfc_debugfs_nvmeio_tr.patch b/queue-5.15/scsi-lpfc-terminate-string-in-lpfc_debugfs_nvmeio_tr.patch new file mode 100644 index 00000000000..f831b502341 --- /dev/null +++ b/queue-5.15/scsi-lpfc-terminate-string-in-lpfc_debugfs_nvmeio_tr.patch @@ -0,0 +1,40 @@ +From 822bc7568b9406dd083542000b8ccf0bbd7f0b66 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Dec 2021 10:05:27 +0300 +Subject: scsi: lpfc: Terminate string in lpfc_debugfs_nvmeio_trc_write() + +From: Dan Carpenter + +[ Upstream commit 9020be114a47bf7ff33e179b3bb0016b91a098e6 ] + +The "mybuf" string comes from the user, so we need to ensure that it is NUL +terminated. + +Link: https://lore.kernel.org/r/20211214070527.GA27934@kili +Fixes: bd2cdd5e400f ("scsi: lpfc: NVME Initiator: Add debugfs support") +Reviewed-by: James Smart +Signed-off-by: Dan Carpenter +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/lpfc/lpfc_debugfs.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c +index bd6d459afce54..08b2e85dcd7d8 100644 +--- a/drivers/scsi/lpfc/lpfc_debugfs.c ++++ b/drivers/scsi/lpfc/lpfc_debugfs.c +@@ -2954,8 +2954,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf, + char mybuf[64]; + char *pbuf; + +- if (nbytes > 64) +- nbytes = 64; ++ if (nbytes > 63) ++ nbytes = 63; + + memset(mybuf, 0, sizeof(mybuf)); + +-- +2.34.1 + diff --git a/queue-5.15/sctp-use-call_rcu-to-free-endpoint.patch b/queue-5.15/sctp-use-call_rcu-to-free-endpoint.patch new file mode 100644 index 00000000000..d9845e2145e --- /dev/null +++ b/queue-5.15/sctp-use-call_rcu-to-free-endpoint.patch @@ -0,0 +1,277 @@ +From 49d75b97300b0d80b0e3b05dc4b2ea4fa86fb965 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Dec 2021 13:04:30 -0500 +Subject: sctp: use call_rcu to free endpoint + +From: Xin Long + +[ Upstream commit 5ec7d18d1813a5bead0b495045606c93873aecbb ] + +This patch is to delay the endpoint free by calling call_rcu() to fix +another use-after-free issue in sctp_sock_dump(): + + BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20 + Call Trace: + __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218 + lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844 + __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline] + _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168 + spin_lock_bh include/linux/spinlock.h:334 [inline] + __lock_sock+0x203/0x350 net/core/sock.c:2253 + lock_sock_nested+0xfe/0x120 net/core/sock.c:2774 + lock_sock include/net/sock.h:1492 [inline] + sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324 + sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091 + sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527 + __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049 + inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065 + netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 + __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 + netlink_dump_start include/linux/netlink.h:216 [inline] + inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170 + __sock_diag_cmd net/core/sock_diag.c:232 [inline] + sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263 + netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 + sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274 + +This issue occurs when asoc is peeled off and the old sk is freed after +getting it by asoc->base.sk and before calling lock_sock(sk). + +To prevent the sk free, as a holder of the sk, ep should be alive when +calling lock_sock(). This patch uses call_rcu() and moves sock_put and +ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to +hold the ep under rcu_read_lock in sctp_transport_traverse_process(). + +If sctp_endpoint_hold() returns true, it means this ep is still alive +and we have held it and can continue to dump it; If it returns false, +it means this ep is dead and can be freed after rcu_read_unlock, and +we should skip it. + +In sctp_sock_dump(), after locking the sk, if this ep is different from +tsp->asoc->ep, it means during this dumping, this asoc was peeled off +before calling lock_sock(), and the sk should be skipped; If this ep is +the same with tsp->asoc->ep, it means no peeloff happens on this asoc, +and due to lock_sock, no peeloff will happen either until release_sock. + +Note that delaying endpoint free won't delay the port release, as the +port release happens in sctp_endpoint_destroy() before calling call_rcu(). +Also, freeing endpoint by call_rcu() makes it safe to access the sk by +asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv(). + +Thanks Jones to bring this issue up. + +v1->v2: + - improve the changelog. + - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed. + +Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com +Reported-by: Lee Jones +Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump") +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/sctp/sctp.h | 6 +++--- + include/net/sctp/structs.h | 3 ++- + net/sctp/diag.c | 12 ++++++------ + net/sctp/endpointola.c | 23 +++++++++++++++-------- + net/sctp/socket.c | 23 +++++++++++++++-------- + 5 files changed, 41 insertions(+), 26 deletions(-) + +diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h +index 189fdb9db1622..d314a180ab93d 100644 +--- a/include/net/sctp/sctp.h ++++ b/include/net/sctp/sctp.h +@@ -105,6 +105,7 @@ extern struct percpu_counter sctp_sockets_allocated; + int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); + struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); + ++typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); + void sctp_transport_walk_start(struct rhashtable_iter *iter); + void sctp_transport_walk_stop(struct rhashtable_iter *iter); + struct sctp_transport *sctp_transport_get_next(struct net *net, +@@ -115,9 +116,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr, void *p); +-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), +- int (*cb_done)(struct sctp_transport *, void *), +- struct net *net, int *pos, void *p); ++int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, ++ struct net *net, int *pos, void *p); + int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); + int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, + struct sctp_info *info); +diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h +index 651bba654d77d..8d2c3dd9f5953 100644 +--- a/include/net/sctp/structs.h ++++ b/include/net/sctp/structs.h +@@ -1365,6 +1365,7 @@ struct sctp_endpoint { + + u32 secid; + u32 peer_secid; ++ struct rcu_head rcu; + }; + + /* Recover the outter endpoint structure. */ +@@ -1380,7 +1381,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) + struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); + void sctp_endpoint_free(struct sctp_endpoint *); + void sctp_endpoint_put(struct sctp_endpoint *); +-void sctp_endpoint_hold(struct sctp_endpoint *); ++int sctp_endpoint_hold(struct sctp_endpoint *ep); + void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); + struct sctp_association *sctp_endpoint_lookup_assoc( + const struct sctp_endpoint *ep, +diff --git a/net/sctp/diag.c b/net/sctp/diag.c +index 760b367644c12..a7d6231715013 100644 +--- a/net/sctp/diag.c ++++ b/net/sctp/diag.c +@@ -290,9 +290,8 @@ out: + return err; + } + +-static int sctp_sock_dump(struct sctp_transport *tsp, void *p) ++static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) + { +- struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct sk_buff *skb = commp->skb; +@@ -302,6 +301,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) + int err = 0; + + lock_sock(sk); ++ if (ep != tsp->asoc->ep) ++ goto release; + list_for_each_entry(assoc, &ep->asocs, asocs) { + if (cb->args[4] < cb->args[1]) + goto next; +@@ -344,9 +345,8 @@ release: + return err; + } + +-static int sctp_sock_filter(struct sctp_transport *tsp, void *p) ++static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) + { +- struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + const struct inet_diag_req_v2 *r = commp->r; +@@ -505,8 +505,8 @@ skip: + if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) + goto done; + +- sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, +- net, &pos, &commp); ++ sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, ++ net, &pos, &commp); + cb->args[2] = pos; + + done: +diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c +index 48c9c2c7602f7..efffde7f2328e 100644 +--- a/net/sctp/endpointola.c ++++ b/net/sctp/endpointola.c +@@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) + } + + /* Final destructor for endpoint. */ ++static void sctp_endpoint_destroy_rcu(struct rcu_head *head) ++{ ++ struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); ++ struct sock *sk = ep->base.sk; ++ ++ sctp_sk(sk)->ep = NULL; ++ sock_put(sk); ++ ++ kfree(ep); ++ SCTP_DBG_OBJCNT_DEC(ep); ++} ++ + static void sctp_endpoint_destroy(struct sctp_endpoint *ep) + { + struct sock *sk; +@@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) + if (sctp_sk(sk)->bind_hash) + sctp_put_port(sk); + +- sctp_sk(sk)->ep = NULL; +- /* Give up our hold on the sock */ +- sock_put(sk); +- +- kfree(ep); +- SCTP_DBG_OBJCNT_DEC(ep); ++ call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); + } + + /* Hold a reference to an endpoint. */ +-void sctp_endpoint_hold(struct sctp_endpoint *ep) ++int sctp_endpoint_hold(struct sctp_endpoint *ep) + { +- refcount_inc(&ep->base.refcnt); ++ return refcount_inc_not_zero(&ep->base.refcnt); + } + + /* Release a reference to an endpoint and clean up if there are +diff --git a/net/sctp/socket.c b/net/sctp/socket.c +index 6b937bfd47515..d2215d24634e8 100644 +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -5338,11 +5338,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), + } + EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); + +-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), +- int (*cb_done)(struct sctp_transport *, void *), +- struct net *net, int *pos, void *p) { ++int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, ++ struct net *net, int *pos, void *p) ++{ + struct rhashtable_iter hti; + struct sctp_transport *tsp; ++ struct sctp_endpoint *ep; + int ret; + + again: +@@ -5351,26 +5352,32 @@ again: + + tsp = sctp_transport_get_idx(net, &hti, *pos + 1); + for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { +- ret = cb(tsp, p); +- if (ret) +- break; ++ ep = tsp->asoc->ep; ++ if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ ++ ret = cb(ep, tsp, p); ++ if (ret) ++ break; ++ sctp_endpoint_put(ep); ++ } + (*pos)++; + sctp_transport_put(tsp); + } + sctp_transport_walk_stop(&hti); + + if (ret) { +- if (cb_done && !cb_done(tsp, p)) { ++ if (cb_done && !cb_done(ep, tsp, p)) { + (*pos)++; ++ sctp_endpoint_put(ep); + sctp_transport_put(tsp); + goto again; + } ++ sctp_endpoint_put(ep); + sctp_transport_put(tsp); + } + + return ret; + } +-EXPORT_SYMBOL_GPL(sctp_for_each_transport); ++EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); + + /* 7.2.1 Association Status (SCTP_STATUS) + +-- +2.34.1 + diff --git a/queue-5.15/selftests-calculate-udpgso-segment-count-without-hea.patch b/queue-5.15/selftests-calculate-udpgso-segment-count-without-hea.patch new file mode 100644 index 00000000000..f63edcc2cef --- /dev/null +++ b/queue-5.15/selftests-calculate-udpgso-segment-count-without-hea.patch @@ -0,0 +1,70 @@ +From ef2ac44b8f3185802b7a3941e5e9c1a6111f6fa8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Dec 2021 22:24:41 +0000 +Subject: selftests: Calculate udpgso segment count without header adjustment + +From: Coco Li + +[ Upstream commit 5471d5226c3b39b3d2f7011c082d5715795bd65c ] + +The below referenced commit correctly updated the computation of number +of segments (gso_size) by using only the gso payload size and +removing the header lengths. + +With this change the regression test started failing. Update +the tests to match this new behavior. + +Both IPv4 and IPv6 tests are updated, as a separate patch in this series +will update udp_v6_send_skb to match this change in udp_send_skb. + +Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") +Signed-off-by: Coco Li +Reviewed-by: Willem de Bruijn +Link: https://lore.kernel.org/r/20211223222441.2975883-2-lixiaoyan@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/udpgso.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c +index c66da6ffd6d8d..7badaf215de28 100644 +--- a/tools/testing/selftests/net/udpgso.c ++++ b/tools/testing/selftests/net/udpgso.c +@@ -156,13 +156,13 @@ struct testcase testcases_v4[] = { + }, + { + /* send max number of min sized segments */ +- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, ++ .tlen = UDP_MAX_SEGMENTS, + .gso_len = 1, +- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, ++ .r_num_mss = UDP_MAX_SEGMENTS, + }, + { + /* send max number + 1 of min sized segments: fail */ +- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, ++ .tlen = UDP_MAX_SEGMENTS + 1, + .gso_len = 1, + .tfail = true, + }, +@@ -259,13 +259,13 @@ struct testcase testcases_v6[] = { + }, + { + /* send max number of min sized segments */ +- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, ++ .tlen = UDP_MAX_SEGMENTS, + .gso_len = 1, +- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, ++ .r_num_mss = UDP_MAX_SEGMENTS, + }, + { + /* send max number + 1 of min sized segments: fail */ +- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, ++ .tlen = UDP_MAX_SEGMENTS + 1, + .gso_len = 1, + .tfail = true, + }, +-- +2.34.1 + diff --git a/queue-5.15/selftests-net-fix-a-typo-in-udpgro_fwd.sh.patch b/queue-5.15/selftests-net-fix-a-typo-in-udpgro_fwd.sh.patch new file mode 100644 index 00000000000..c31b4928995 --- /dev/null +++ b/queue-5.15/selftests-net-fix-a-typo-in-udpgro_fwd.sh.patch @@ -0,0 +1,36 @@ +From e7ad5b2e0bdbde171280e4e45c73c3b9303e6888 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Dec 2021 15:27:30 +0800 +Subject: selftests: net: Fix a typo in udpgro_fwd.sh + +From: Jianguo Wu + +[ Upstream commit add25d6d6c85f7b6d00a055ee0a4169acf845681 ] + +$rvs -> $rcv + +Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") +Signed-off-by: Jianguo Wu +Link: https://lore.kernel.org/r/d247d7c8-a03a-0abf-3c71-4006a051d133@163.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/udpgro_fwd.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh +index 7f26591f236b9..6a3985b8cd7f6 100755 +--- a/tools/testing/selftests/net/udpgro_fwd.sh ++++ b/tools/testing/selftests/net/udpgro_fwd.sh +@@ -132,7 +132,7 @@ run_test() { + local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \ + sed -e 's/\[//' -e 's/:.*//'` + if [ $rcv != $pkts ]; then +- echo " fail - received $rvs packets, expected $pkts" ++ echo " fail - received $rcv packets, expected $pkts" + ret=1 + return + fi +-- +2.34.1 + diff --git a/queue-5.15/selftests-net-udpgso_bench_tx-fix-dst-ip-argument.patch b/queue-5.15/selftests-net-udpgso_bench_tx-fix-dst-ip-argument.patch new file mode 100644 index 00000000000..8c234d85df8 --- /dev/null +++ b/queue-5.15/selftests-net-udpgso_bench_tx-fix-dst-ip-argument.patch @@ -0,0 +1,63 @@ +From f15cb0802fd81ce5167c38990348abadd17fd45c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Dec 2021 18:58:10 +0800 +Subject: selftests/net: udpgso_bench_tx: fix dst ip argument + +From: wujianguo + +[ Upstream commit 9c1952aeaa98b3cfc49e2a79cb2c7d6a674213e9 ] + +udpgso_bench_tx call setup_sockaddr() for dest address before +parsing all arguments, if we specify "-p ${dst_port}" after "-D ${dst_ip}", +then ${dst_port} will be ignored, and using default cfg_port 8000. + +This will cause test case "multiple GRO socks" failed in udpgro.sh. + +Setup sockaddr after parsing all arguments. + +Fixes: 3a687bef148d ("selftests: udp gso benchmark") +Signed-off-by: Jianguo Wu +Reviewed-by: Willem de Bruijn +Link: https://lore.kernel.org/r/ff620d9f-5b52-06ab-5286-44b945453002@163.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/udpgso_bench_tx.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c +index 17512a43885e7..f1fdaa2702913 100644 +--- a/tools/testing/selftests/net/udpgso_bench_tx.c ++++ b/tools/testing/selftests/net/udpgso_bench_tx.c +@@ -419,6 +419,7 @@ static void usage(const char *filepath) + + static void parse_opts(int argc, char **argv) + { ++ const char *bind_addr = NULL; + int max_len, hdrlen; + int c; + +@@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv) + cfg_cpu = strtol(optarg, NULL, 0); + break; + case 'D': +- setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); ++ bind_addr = optarg; + break; + case 'l': + cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; +@@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv) + } + } + ++ if (!bind_addr) ++ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; ++ ++ setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr); ++ + if (optind != argc) + usage(argv[0]); + +-- +2.34.1 + diff --git a/queue-5.15/selftests-net-using-ping6-for-ipv6-in-udpgro_fwd.sh.patch b/queue-5.15/selftests-net-using-ping6-for-ipv6-in-udpgro_fwd.sh.patch new file mode 100644 index 00000000000..b22e46ef8b8 --- /dev/null +++ b/queue-5.15/selftests-net-using-ping6-for-ipv6-in-udpgro_fwd.sh.patch @@ -0,0 +1,54 @@ +From 207cab2684cdb4af6c1387774f09a3effefd66b1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 Dec 2021 18:40:29 +0800 +Subject: selftests: net: using ping6 for IPv6 in udpgro_fwd.sh + +From: Jianguo Wu + +[ Upstream commit 8b3170e07539855ee91bc5e2fa7780a4c9b5c7aa ] + +udpgro_fwd.sh output following message: + ping: 2001:db8:1::100: Address family for hostname not supported + +Using ping6 when pinging IPv6 addresses. + +Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") +Signed-off-by: Jianguo Wu +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/udpgro_fwd.sh | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh +index 6a3985b8cd7f6..3ea73013d9568 100755 +--- a/tools/testing/selftests/net/udpgro_fwd.sh ++++ b/tools/testing/selftests/net/udpgro_fwd.sh +@@ -185,6 +185,7 @@ for family in 4 6; do + IPT=iptables + SUFFIX=24 + VXDEV=vxlan ++ PING=ping + + if [ $family = 6 ]; then + BM_NET=$BM_NET_V6 +@@ -192,6 +193,7 @@ for family in 4 6; do + SUFFIX="64 nodad" + VXDEV=vxlan6 + IPT=ip6tables ++ PING="ping6" + fi + + echo "IPv$family" +@@ -237,7 +239,7 @@ for family in 4 6; do + + # load arp cache before running the test to reduce the amount of + # stray traffic on top of the UDP tunnel +- ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null ++ ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST + cleanup + +-- +2.34.1 + diff --git a/queue-5.15/series b/queue-5.15/series index 28aaa51e725..74e4307a8ee 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -13,3 +13,38 @@ recordmcount.pl-fix-typo-in-s390-mcount-regex.patch powerpc-ptdump-fix-debug_wx-since-generic-ptdump-conversion.patch efi-move-efifb_setup_from_dmi-prototype-from-arch-headers.patch selinux-initialize-proto-variable-in-selinux_ip_postroute_compat.patch +scsi-lpfc-terminate-string-in-lpfc_debugfs_nvmeio_tr.patch +net-mlx5-dr-fix-null-vs-is_err-checking-in-dr_domain.patch +net-mlx5-fix-error-print-in-case-of-irq-request-fail.patch +net-mlx5-fix-sf-health-recovery-flow.patch +net-mlx5-fix-tc-max-supported-prio-for-nic-mode.patch +net-mlx5e-wrap-the-tx-reporter-dump-callback-to-extr.patch +net-mlx5e-fix-interoperability-between-xsk-and-icosq.patch +net-mlx5e-fix-icosq-recovery-flow-for-xsk.patch +net-mlx5e-use-tc-sample-stubs-instead-of-ifdefs-in-s.patch +net-mlx5e-delete-forward-rule-for-ct-or-sample-actio.patch +udp-using-datalen-to-cap-ipv6-udp-max-gso-segments.patch +selftests-calculate-udpgso-segment-count-without-hea.patch +net-phy-fixed_phy-fix-null-vs-is_err-checking-in-__f.patch +sctp-use-call_rcu-to-free-endpoint.patch +net-smc-fix-using-of-uninitialized-completions.patch +net-usb-pegasus-do-not-drop-long-ethernet-frames.patch +net-ag71xx-fix-a-potential-double-free-in-error-hand.patch +net-lantiq_xrx200-fix-statistics-of-received-bytes.patch +nfc-st21nfca-fix-memory-leak-in-device-probe-and-rem.patch +net-smc-don-t-send-cdc-llc-message-if-link-not-ready.patch +net-smc-fix-kernel-panic-caused-by-race-of-smc_sock.patch +igc-do-not-enable-crosstimestamping-for-i225-v-model.patch +igc-fix-tx-timestamp-support-for-non-msi-x-platforms.patch +drm-amd-display-send-s0i2_rdy-in-stream_count-0-opti.patch +drm-amd-display-set-optimize_pwr_state-for-dcn31.patch +ionic-initialize-the-lif-dbid_inuse-bitmap.patch +net-mlx5e-fix-wrong-features-assignment-in-case-of-e.patch +net-bridge-mcast-add-and-enforce-query-interval-mini.patch +net-bridge-mcast-add-and-enforce-startup-query-inter.patch +selftests-net-udpgso_bench_tx-fix-dst-ip-argument.patch +selftests-net-fix-a-typo-in-udpgro_fwd.sh.patch +net-bridge-mcast-fix-br_multicast_ctx_vlan_global_di.patch +net-ncsi-check-for-error-return-from-call-to-nla_put.patch +selftests-net-using-ping6-for-ipv6-in-udpgro_fwd.sh.patch +fsl-fman-fix-missing-put_device-call-in-fman_port_pr.patch diff --git a/queue-5.15/udp-using-datalen-to-cap-ipv6-udp-max-gso-segments.patch b/queue-5.15/udp-using-datalen-to-cap-ipv6-udp-max-gso-segments.patch new file mode 100644 index 00000000000..5c7a7c9967f --- /dev/null +++ b/queue-5.15/udp-using-datalen-to-cap-ipv6-udp-max-gso-segments.patch @@ -0,0 +1,44 @@ +From 1587073fcd199f7fa580a8b8f1e4b5175ba690bf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Dec 2021 22:24:40 +0000 +Subject: udp: using datalen to cap ipv6 udp max gso segments + +From: Coco Li + +[ Upstream commit 736ef37fd9a44f5966e25319d08ff7ea99ac79e8 ] + +The max number of UDP gso segments is intended to cap to +UDP_MAX_SEGMENTS, this is checked in udp_send_skb(). + +skb->len contains network and transport header len here, we should use +only data len instead. + +This is the ipv6 counterpart to the below referenced commit, +which missed the ipv6 change + +Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") +Signed-off-by: Coco Li +Reviewed-by: Willem de Bruijn +Link: https://lore.kernel.org/r/20211223222441.2975883-1-lixiaoyan@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv6/udp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c +index 7bee95d8d2df0..8cd8c0bce0986 100644 +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -1204,7 +1204,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, + kfree_skb(skb); + return -EINVAL; + } +- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { ++ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); + return -EINVAL; + } +-- +2.34.1 +