From: Greg Kroah-Hartman Date: Tue, 22 Apr 2025 12:32:54 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v6.1.135~44 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6c728e65b2dc69861701284869a1d0ac34946684;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: blk-cgroup-support-to-track-if-policy-is-online.patch blk-iocost-do-not-warn-if-iocg-was-already-offlined.patch ext4-fix-timer-use-after-free-on-failed-mount.patch ipvs-properly-dereference-pe-in-ip_vs_add_service.patch mmc-sdhci-brcmstb-add-ability-to-increase-max-clock-rate-for-72116b0.patch mmc-sdhci-brcmstb-initialize-base_clk-to-null-in-sdhci_brcmstb_probe.patch mmc-sdhci-brcmstb-use-clk_get_rate-base_clk-in-pm-resume.patch net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh-update-handler.patch net-openvswitch-fix-race-on-port-output.patch openvswitch-fix-lockup-on-tx-to-unregistering-netdev-with-carrier.patch --- diff --git a/queue-5.10/blk-cgroup-support-to-track-if-policy-is-online.patch b/queue-5.10/blk-cgroup-support-to-track-if-policy-is-online.patch new file mode 100644 index 0000000000..b9631ea85c --- /dev/null +++ b/queue-5.10/blk-cgroup-support-to-track-if-policy-is-online.patch @@ -0,0 +1,120 @@ +From dfd6200a095440b663099d8d42f1efb0175a1ce3 Mon Sep 17 00:00:00 2001 +From: Yu Kuai +Date: Thu, 19 Jan 2023 19:03:49 +0800 +Subject: blk-cgroup: support to track if policy is online + +From: Yu Kuai + +commit dfd6200a095440b663099d8d42f1efb0175a1ce3 upstream. + +A new field 'online' is added to blkg_policy_data to fix following +2 problem: + +1) In blkcg_activate_policy(), if pd_alloc_fn() with 'GFP_NOWAIT' + failed, 'queue_lock' will be dropped and pd_alloc_fn() will try again + without 'GFP_NOWAIT'. In the meantime, remove cgroup can race with + it, and pd_offline_fn() will be called without pd_init_fn() and + pd_online_fn(). This way null-ptr-deference can be triggered. + +2) In order to synchronize pd_free_fn() from blkg_free_workfn() and + blkcg_deactivate_policy(), 'list_del_init(&blkg->q_node)' will be + delayed to blkg_free_workfn(), hence pd_offline_fn() can be called + first in blkg_destroy(), and then blkcg_deactivate_policy() will + call it again, we must prevent it. + +The new field 'online' will be set after pd_online_fn() and will be +cleared after pd_offline_fn(), in the meantime pd_offline_fn() will only +be called if 'online' is set. + +Signed-off-by: Yu Kuai +Acked-by: Tejun Heo +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20230119110350.2287325-3-yukuai1@huaweicloud.com +Signed-off-by: Jens Axboe +Signed-off-by: Bin Lan +Signed-off-by: He Zhe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-cgroup.c | 24 +++++++++++++++++------- + include/linux/blk-cgroup.h | 1 + + 2 files changed, 18 insertions(+), 7 deletions(-) + +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -191,6 +191,7 @@ static struct blkcg_gq *blkg_alloc(struc + blkg->pd[i] = pd; + pd->blkg = blkg; + pd->plid = i; ++ pd->online = false; + } + + return blkg; +@@ -288,8 +289,11 @@ static struct blkcg_gq *blkg_create(stru + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + +- if (blkg->pd[i] && pol->pd_online_fn) +- pol->pd_online_fn(blkg->pd[i]); ++ if (blkg->pd[i]) { ++ if (pol->pd_online_fn) ++ pol->pd_online_fn(blkg->pd[i]); ++ blkg->pd[i]->online = true; ++ } + } + } + blkg->online = true; +@@ -389,8 +393,11 @@ static void blkg_destroy(struct blkcg_gq + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + +- if (blkg->pd[i] && pol->pd_offline_fn) +- pol->pd_offline_fn(blkg->pd[i]); ++ if (blkg->pd[i] && blkg->pd[i]->online) { ++ if (pol->pd_offline_fn) ++ pol->pd_offline_fn(blkg->pd[i]); ++ blkg->pd[i]->online = false; ++ } + } + + blkg->online = false; +@@ -1364,6 +1371,7 @@ retry: + blkg->pd[pol->plid] = pd; + pd->blkg = blkg; + pd->plid = pol->plid; ++ pd->online = false; + } + + /* all allocated, init in the same order */ +@@ -1371,9 +1379,11 @@ retry: + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) + pol->pd_init_fn(blkg->pd[pol->plid]); + +- if (pol->pd_online_fn) +- list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) ++ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { ++ if (pol->pd_online_fn) + pol->pd_online_fn(blkg->pd[pol->plid]); ++ blkg->pd[pol->plid]->online = true; ++ } + + __set_bit(pol->plid, q->blkcg_pols); + ret = 0; +@@ -1435,7 +1445,7 @@ void blkcg_deactivate_policy(struct requ + + spin_lock(&blkcg->lock); + if (blkg->pd[pol->plid]) { +- if (pol->pd_offline_fn) ++ if (blkg->pd[pol->plid]->online && pol->pd_offline_fn) + pol->pd_offline_fn(blkg->pd[pol->plid]); + pol->pd_free_fn(blkg->pd[pol->plid]); + blkg->pd[pol->plid] = NULL; +--- a/include/linux/blk-cgroup.h ++++ b/include/linux/blk-cgroup.h +@@ -87,6 +87,7 @@ struct blkg_policy_data { + /* the blkg and policy id this per-policy data belongs to */ + struct blkcg_gq *blkg; + int plid; ++ bool online; + }; + + /* diff --git a/queue-5.10/blk-iocost-do-not-warn-if-iocg-was-already-offlined.patch b/queue-5.10/blk-iocost-do-not-warn-if-iocg-was-already-offlined.patch new file mode 100644 index 0000000000..8c5ea2b4b5 --- /dev/null +++ b/queue-5.10/blk-iocost-do-not-warn-if-iocg-was-already-offlined.patch @@ -0,0 +1,59 @@ +From 01bc4fda9ea0a6b52f12326486f07a4910666cf6 Mon Sep 17 00:00:00 2001 +From: Li Nan +Date: Fri, 19 Apr 2024 17:32:57 +0800 +Subject: blk-iocost: do not WARN if iocg was already offlined + +From: Li Nan + +commit 01bc4fda9ea0a6b52f12326486f07a4910666cf6 upstream. + +In iocg_pay_debt(), warn is triggered if 'active_list' is empty, which +is intended to confirm iocg is active when it has debt. However, warn +can be triggered during a blkcg or disk removal, if iocg_waitq_timer_fn() +is run at that time: + + WARNING: CPU: 0 PID: 2344971 at block/blk-iocost.c:1402 iocg_pay_debt+0x14c/0x190 + Call trace: + iocg_pay_debt+0x14c/0x190 + iocg_kick_waitq+0x438/0x4c0 + iocg_waitq_timer_fn+0xd8/0x130 + __run_hrtimer+0x144/0x45c + __hrtimer_run_queues+0x16c/0x244 + hrtimer_interrupt+0x2cc/0x7b0 + +The warn in this situation is meaningless. Since this iocg is being +removed, the state of the 'active_list' is irrelevant, and 'waitq_timer' +is canceled after removing 'active_list' in ioc_pd_free(), which ensures +iocg is freed after iocg_waitq_timer_fn() returns. + +Therefore, add the check if iocg was already offlined to avoid warn +when removing a blkcg or disk. + +Signed-off-by: Li Nan +Reviewed-by: Yu Kuai +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20240419093257.3004211-1-linan666@huaweicloud.com +Signed-off-by: Jens Axboe +Signed-off-by: Bin Lan +Signed-off-by: He Zhe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-iocost.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -1389,8 +1389,11 @@ static void iocg_pay_debt(struct ioc_gq + lockdep_assert_held(&iocg->ioc->lock); + lockdep_assert_held(&iocg->waitq.lock); + +- /* make sure that nobody messed with @iocg */ +- WARN_ON_ONCE(list_empty(&iocg->active_list)); ++ /* ++ * make sure that nobody messed with @iocg. Check iocg->pd.online ++ * to avoid warn when removing blkcg or disk. ++ */ ++ WARN_ON_ONCE(list_empty(&iocg->active_list) && iocg->pd.online); + WARN_ON_ONCE(iocg->inuse > 1); + + iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt); diff --git a/queue-5.10/ext4-fix-timer-use-after-free-on-failed-mount.patch b/queue-5.10/ext4-fix-timer-use-after-free-on-failed-mount.patch new file mode 100644 index 0000000000..e26ec88d1b --- /dev/null +++ b/queue-5.10/ext4-fix-timer-use-after-free-on-failed-mount.patch @@ -0,0 +1,49 @@ +From 0ce160c5bdb67081a62293028dc85758a8efb22a Mon Sep 17 00:00:00 2001 +From: Xiaxi Shen +Date: Sun, 14 Jul 2024 21:33:36 -0700 +Subject: ext4: fix timer use-after-free on failed mount + +From: Xiaxi Shen + +commit 0ce160c5bdb67081a62293028dc85758a8efb22a upstream. + +Syzbot has found an ODEBUG bug in ext4_fill_super + +The del_timer_sync function cancels the s_err_report timer, +which reminds about filesystem errors daily. We should +guarantee the timer is no longer active before kfree(sbi). + +When filesystem mounting fails, the flow goes to failed_mount3, +where an error occurs when ext4_stop_mmpd is called, causing +a read I/O failure. This triggers the ext4_handle_error function +that ultimately re-arms the timer, +leaving the s_err_report timer active before kfree(sbi) is called. + +Fix the issue by canceling the s_err_report timer after calling ext4_stop_mmpd. + +Signed-off-by: Xiaxi Shen +Reported-and-tested-by: syzbot+59e0101c430934bc9a36@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=59e0101c430934bc9a36 +Link: https://patch.msgid.link/20240715043336.98097-1-shenxiaxi26@gmail.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +[Minor context change fixed] +Signed-off-by: Xiangyu Chen +Signed-off-by: He Zhe +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -5185,8 +5185,8 @@ failed_mount_wq: + failed_mount3a: + ext4_es_unregister_shrinker(sbi); + failed_mount3: +- del_timer_sync(&sbi->s_err_report); + ext4_stop_mmpd(sbi); ++ del_timer_sync(&sbi->s_err_report); + failed_mount2: + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); diff --git a/queue-5.10/ipvs-properly-dereference-pe-in-ip_vs_add_service.patch b/queue-5.10/ipvs-properly-dereference-pe-in-ip_vs_add_service.patch new file mode 100644 index 0000000000..80884c5a8a --- /dev/null +++ b/queue-5.10/ipvs-properly-dereference-pe-in-ip_vs_add_service.patch @@ -0,0 +1,53 @@ +From cbd070a4ae62f119058973f6d2c984e325bce6e7 Mon Sep 17 00:00:00 2001 +From: Chen Hanxiao +Date: Thu, 27 Jun 2024 14:15:15 +0800 +Subject: ipvs: properly dereference pe in ip_vs_add_service + +From: Chen Hanxiao + +commit cbd070a4ae62f119058973f6d2c984e325bce6e7 upstream. + +Use pe directly to resolve sparse warning: + + net/netfilter/ipvs/ip_vs_ctl.c:1471:27: warning: dereference of noderef expression + +Fixes: 39b972231536 ("ipvs: handle connections started by real-servers") +Signed-off-by: Chen Hanxiao +Acked-by: Julian Anastasov +Acked-by: Simon Horman +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Cliff Liu +Signed-off-by: He Zhe +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/ipvs/ip_vs_ctl.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/net/netfilter/ipvs/ip_vs_ctl.c ++++ b/net/netfilter/ipvs/ip_vs_ctl.c +@@ -1384,20 +1384,20 @@ ip_vs_add_service(struct netns_ipvs *ipv + sched = NULL; + } + +- /* Bind the ct retriever */ +- RCU_INIT_POINTER(svc->pe, pe); +- pe = NULL; +- + /* Update the virtual service counters */ + if (svc->port == FTPPORT) + atomic_inc(&ipvs->ftpsvc_counter); + else if (svc->port == 0) + atomic_inc(&ipvs->nullsvc_counter); +- if (svc->pe && svc->pe->conn_out) ++ if (pe && pe->conn_out) + atomic_inc(&ipvs->conn_out_counter); + + ip_vs_start_estimator(ipvs, &svc->stats); + ++ /* Bind the ct retriever */ ++ RCU_INIT_POINTER(svc->pe, pe); ++ pe = NULL; ++ + /* Count only IPv4 services for old get/setsockopt interface */ + if (svc->af == AF_INET) + ipvs->num_services++; diff --git a/queue-5.10/mmc-sdhci-brcmstb-add-ability-to-increase-max-clock-rate-for-72116b0.patch b/queue-5.10/mmc-sdhci-brcmstb-add-ability-to-increase-max-clock-rate-for-72116b0.patch new file mode 100644 index 0000000000..6a01f817f9 --- /dev/null +++ b/queue-5.10/mmc-sdhci-brcmstb-add-ability-to-increase-max-clock-rate-for-72116b0.patch @@ -0,0 +1,142 @@ +From 97904a59855c7ac7c613085bc6bdc550d48524ff Mon Sep 17 00:00:00 2001 +From: Kamal Dasu +Date: Fri, 20 May 2022 14:31:08 -0400 +Subject: mmc: sdhci-brcmstb: Add ability to increase max clock rate for 72116b0 + +From: Kamal Dasu + +commit 97904a59855c7ac7c613085bc6bdc550d48524ff upstream. + +The 72116B0 has improved SDIO controllers that allow the max clock +rate to be increased from a max of 100MHz to a max of 150MHz. The +driver will need to get the clock and increase it's default rate +and override the caps register, that still indicates a max of 100MHz. +The new clock will be named "sdio_freq" in the DT node's "clock-names" +list. The driver will use a DT property, "clock-frequency", to +enable this functionality and will get the actual rate in MHz +from the property to allow various speeds to be requested. + +Signed-off-by: Al Cooper +Signed-off-by: Kamal Dasu +Acked-by: Florian Fainelli +Link: https://lore.kernel.org/r/20220520183108.47358-3-kdasu.kdev@gmail.com +Signed-off-by: Ulf Hansson +Signed-off-by: Kamal Dasu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/sdhci-brcmstb.c | 69 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 68 insertions(+), 1 deletion(-) + +--- a/drivers/mmc/host/sdhci-brcmstb.c ++++ b/drivers/mmc/host/sdhci-brcmstb.c +@@ -32,6 +32,8 @@ + struct sdhci_brcmstb_priv { + void __iomem *cfg_regs; + unsigned int flags; ++ struct clk *base_clk; ++ u32 base_freq_hz; + }; + + struct brcmstb_match_priv { +@@ -251,9 +253,11 @@ static int sdhci_brcmstb_probe(struct pl + struct sdhci_pltfm_host *pltfm_host; + const struct of_device_id *match; + struct sdhci_brcmstb_priv *priv; ++ u32 actual_clock_mhz; + struct sdhci_host *host; + struct resource *iomem; + struct clk *clk; ++ struct clk *base_clk; + int res; + + match = of_match_node(sdhci_brcm_of_match, pdev->dev.of_node); +@@ -331,6 +335,35 @@ static int sdhci_brcmstb_probe(struct pl + if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT) + host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + ++ /* Change the base clock frequency if the DT property exists */ ++ if (device_property_read_u32(&pdev->dev, "clock-frequency", ++ &priv->base_freq_hz) != 0) ++ goto add_host; ++ ++ base_clk = devm_clk_get_optional(&pdev->dev, "sdio_freq"); ++ if (IS_ERR(base_clk)) { ++ dev_warn(&pdev->dev, "Clock for \"sdio_freq\" not found\n"); ++ goto add_host; ++ } ++ ++ res = clk_prepare_enable(base_clk); ++ if (res) ++ goto err; ++ ++ /* set improved clock rate */ ++ clk_set_rate(base_clk, priv->base_freq_hz); ++ actual_clock_mhz = clk_get_rate(base_clk) / 1000000; ++ ++ host->caps &= ~SDHCI_CLOCK_V3_BASE_MASK; ++ host->caps |= (actual_clock_mhz << SDHCI_CLOCK_BASE_SHIFT); ++ /* Disable presets because they are now incorrect */ ++ host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN; ++ ++ dev_dbg(&pdev->dev, "Base Clock Frequency changed to %dMHz\n", ++ actual_clock_mhz); ++ priv->base_clk = base_clk; ++ ++add_host: + res = sdhci_brcmstb_add_host(host, priv); + if (res) + goto err; +@@ -341,6 +374,7 @@ static int sdhci_brcmstb_probe(struct pl + err: + sdhci_pltfm_free(pdev); + err_clk: ++ clk_disable_unprepare(base_clk); + clk_disable_unprepare(clk); + return res; + } +@@ -352,11 +386,44 @@ static void sdhci_brcmstb_shutdown(struc + + MODULE_DEVICE_TABLE(of, sdhci_brcm_of_match); + ++#ifdef CONFIG_PM_SLEEP ++static int sdhci_brcmstb_suspend(struct device *dev) ++{ ++ struct sdhci_host *host = dev_get_drvdata(dev); ++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); ++ struct sdhci_brcmstb_priv *priv = sdhci_pltfm_priv(pltfm_host); ++ ++ clk_disable_unprepare(priv->base_clk); ++ return sdhci_pltfm_suspend(dev); ++} ++ ++static int sdhci_brcmstb_resume(struct device *dev) ++{ ++ struct sdhci_host *host = dev_get_drvdata(dev); ++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); ++ struct sdhci_brcmstb_priv *priv = sdhci_pltfm_priv(pltfm_host); ++ int ret; ++ ++ ret = sdhci_pltfm_resume(dev); ++ if (!ret && priv->base_freq_hz) { ++ ret = clk_prepare_enable(priv->base_clk); ++ if (!ret) ++ ret = clk_set_rate(priv->base_clk, priv->base_freq_hz); ++ } ++ ++ return ret; ++} ++#endif ++ ++static const struct dev_pm_ops sdhci_brcmstb_pmops = { ++ SET_SYSTEM_SLEEP_PM_OPS(sdhci_brcmstb_suspend, sdhci_brcmstb_resume) ++}; ++ + static struct platform_driver sdhci_brcmstb_driver = { + .driver = { + .name = "sdhci-brcmstb", + .probe_type = PROBE_PREFER_ASYNCHRONOUS, +- .pm = &sdhci_pltfm_pmops, ++ .pm = &sdhci_brcmstb_pmops, + .of_match_table = of_match_ptr(sdhci_brcm_of_match), + }, + .probe = sdhci_brcmstb_probe, diff --git a/queue-5.10/mmc-sdhci-brcmstb-initialize-base_clk-to-null-in-sdhci_brcmstb_probe.patch b/queue-5.10/mmc-sdhci-brcmstb-initialize-base_clk-to-null-in-sdhci_brcmstb_probe.patch new file mode 100644 index 0000000000..c57333059e --- /dev/null +++ b/queue-5.10/mmc-sdhci-brcmstb-initialize-base_clk-to-null-in-sdhci_brcmstb_probe.patch @@ -0,0 +1,46 @@ +From c3c0ed75ffbff5c70667030b5139bbb75b0a30f5 Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Wed, 8 Jun 2022 08:27:57 -0700 +Subject: mmc: sdhci-brcmstb: Initialize base_clk to NULL in sdhci_brcmstb_probe() + +From: Nathan Chancellor + +commit c3c0ed75ffbff5c70667030b5139bbb75b0a30f5 upstream. + +Clang warns a few times along the lines of: + + drivers/mmc/host/sdhci-brcmstb.c:302:6: warning: variable 'base_clk' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] + if (res) + ^~~ + drivers/mmc/host/sdhci-brcmstb.c:376:24: note: uninitialized use occurs here + clk_disable_unprepare(base_clk); + ^~~~~~~~ + +base_clk is used in the error path before it is initialized. Initialize +it to NULL, as clk_disable_unprepare() calls clk_disable() and +clk_unprepare(), which both handle NULL pointers gracefully. + +Link: https://github.com/ClangBuiltLinux/linux/issues/1650 +Reported-by: kernel test robot +Signed-off-by: Nathan Chancellor +Acked-by: Florian Fainelli +Acked-by: Adrian Hunter +Link: https://lore.kernel.org/r/20220608152757.82529-1-nathan@kernel.org +Signed-off-by: Ulf Hansson +Signed-off-by: Kamal Dasu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/sdhci-brcmstb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/mmc/host/sdhci-brcmstb.c ++++ b/drivers/mmc/host/sdhci-brcmstb.c +@@ -257,7 +257,7 @@ static int sdhci_brcmstb_probe(struct pl + struct sdhci_host *host; + struct resource *iomem; + struct clk *clk; +- struct clk *base_clk; ++ struct clk *base_clk = NULL; + int res; + + match = of_match_node(sdhci_brcm_of_match, pdev->dev.of_node); diff --git a/queue-5.10/mmc-sdhci-brcmstb-use-clk_get_rate-base_clk-in-pm-resume.patch b/queue-5.10/mmc-sdhci-brcmstb-use-clk_get_rate-base_clk-in-pm-resume.patch new file mode 100644 index 0000000000..6df89a04c6 --- /dev/null +++ b/queue-5.10/mmc-sdhci-brcmstb-use-clk_get_rate-base_clk-in-pm-resume.patch @@ -0,0 +1,43 @@ +From 886201c70a1cab34ef96f867c2b2dd6379ffa7b9 Mon Sep 17 00:00:00 2001 +From: Kamal Dasu +Date: Thu, 14 Jul 2022 13:41:32 -0400 +Subject: mmc: sdhci-brcmstb: use clk_get_rate(base_clk) in PM resume + +From: Kamal Dasu + +commit 886201c70a1cab34ef96f867c2b2dd6379ffa7b9 upstream. + +Use clk_get_rate for base_clk on resume before setting new rate. +This change ensures that the clock api returns current rate +and sets the clock to the desired rate and honors CLK_GET_NO_CACHE +attribute used by clock api. + +Fixes: 97904a59855c (mmc: sdhci-brcmstb: Add ability to increase max clock rate for 72116b0) +Signed-off-by: Kamal Dasu +Acked-by: Florian Fainelli +Link: https://lore.kernel.org/r/20220714174132.18541-1-kdasu.kdev@gmail.com +Signed-off-by: Ulf Hansson +Signed-off-by: Kamal Dasu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/sdhci-brcmstb.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/drivers/mmc/host/sdhci-brcmstb.c ++++ b/drivers/mmc/host/sdhci-brcmstb.c +@@ -407,7 +407,14 @@ static int sdhci_brcmstb_resume(struct d + ret = sdhci_pltfm_resume(dev); + if (!ret && priv->base_freq_hz) { + ret = clk_prepare_enable(priv->base_clk); +- if (!ret) ++ /* ++ * Note: using clk_get_rate() below as clk_get_rate() ++ * honors CLK_GET_RATE_NOCACHE attribute, but clk_set_rate() ++ * may do implicit get_rate() calls that do not honor ++ * CLK_GET_RATE_NOCACHE. ++ */ ++ if (!ret && ++ (clk_get_rate(priv->base_clk) != priv->base_freq_hz)) + ret = clk_set_rate(priv->base_clk, priv->base_freq_hz); + } + diff --git a/queue-5.10/net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh-update-handler.patch b/queue-5.10/net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh-update-handler.patch new file mode 100644 index 0000000000..8187f0479b --- /dev/null +++ b/queue-5.10/net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh-update-handler.patch @@ -0,0 +1,278 @@ +From fb1a3132ee1ac968316e45d21a48703a6db0b6c3 Mon Sep 17 00:00:00 2001 +From: Vlad Buslov +Date: Mon, 31 May 2021 16:28:39 +0300 +Subject: net/mlx5e: Fix use-after-free of encap entry in neigh update handler + +From: Vlad Buslov + +commit fb1a3132ee1ac968316e45d21a48703a6db0b6c3 upstream. + +Function mlx5e_rep_neigh_update() wasn't updated to accommodate rtnl lock +removal from TC filter update path and properly handle concurrent encap +entry insertion/deletion which can lead to following use-after-free: + + [23827.464923] ================================================================== + [23827.469446] BUG: KASAN: use-after-free in mlx5e_encap_take+0x72/0x140 [mlx5_core] + [23827.470971] Read of size 4 at addr ffff8881d132228c by task kworker/u20:6/21635 + [23827.472251] + [23827.472615] CPU: 9 PID: 21635 Comm: kworker/u20:6 Not tainted 5.13.0-rc3+ #5 + [23827.473788] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 + [23827.475639] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core] + [23827.476731] Call Trace: + [23827.477260] dump_stack+0xbb/0x107 + [23827.477906] print_address_description.constprop.0+0x18/0x140 + [23827.478896] ? mlx5e_encap_take+0x72/0x140 [mlx5_core] + [23827.479879] ? mlx5e_encap_take+0x72/0x140 [mlx5_core] + [23827.480905] kasan_report.cold+0x7c/0xd8 + [23827.481701] ? mlx5e_encap_take+0x72/0x140 [mlx5_core] + [23827.482744] kasan_check_range+0x145/0x1a0 + [23827.493112] mlx5e_encap_take+0x72/0x140 [mlx5_core] + [23827.494054] ? mlx5e_tc_tun_encap_info_equal_generic+0x140/0x140 [mlx5_core] + [23827.495296] mlx5e_rep_neigh_update+0x41e/0x5e0 [mlx5_core] + [23827.496338] ? mlx5e_rep_neigh_entry_release+0xb80/0xb80 [mlx5_core] + [23827.497486] ? read_word_at_a_time+0xe/0x20 + [23827.498250] ? strscpy+0xa0/0x2a0 + [23827.498889] process_one_work+0x8ac/0x14e0 + [23827.499638] ? lockdep_hardirqs_on_prepare+0x400/0x400 + [23827.500537] ? pwq_dec_nr_in_flight+0x2c0/0x2c0 + [23827.501359] ? rwlock_bug.part.0+0x90/0x90 + [23827.502116] worker_thread+0x53b/0x1220 + [23827.502831] ? process_one_work+0x14e0/0x14e0 + [23827.503627] kthread+0x328/0x3f0 + [23827.504254] ? _raw_spin_unlock_irq+0x24/0x40 + [23827.505065] ? __kthread_bind_mask+0x90/0x90 + [23827.505912] ret_from_fork+0x1f/0x30 + [23827.506621] + [23827.506987] Allocated by task 28248: + [23827.507694] kasan_save_stack+0x1b/0x40 + [23827.508476] __kasan_kmalloc+0x7c/0x90 + [23827.509197] mlx5e_attach_encap+0xde1/0x1d40 [mlx5_core] + [23827.510194] mlx5e_tc_add_fdb_flow+0x397/0xc40 [mlx5_core] + [23827.511218] __mlx5e_add_fdb_flow+0x519/0xb30 [mlx5_core] + [23827.512234] mlx5e_configure_flower+0x191c/0x4870 [mlx5_core] + [23827.513298] tc_setup_cb_add+0x1d5/0x420 + [23827.514023] fl_hw_replace_filter+0x382/0x6a0 [cls_flower] + [23827.514975] fl_change+0x2ceb/0x4a51 [cls_flower] + [23827.515821] tc_new_tfilter+0x89a/0x2070 + [23827.516548] rtnetlink_rcv_msg+0x644/0x8c0 + [23827.517300] netlink_rcv_skb+0x11d/0x340 + [23827.518021] netlink_unicast+0x42b/0x700 + [23827.518742] netlink_sendmsg+0x743/0xc20 + [23827.519467] sock_sendmsg+0xb2/0xe0 + [23827.520131] ____sys_sendmsg+0x590/0x770 + [23827.520851] ___sys_sendmsg+0xd8/0x160 + [23827.521552] __sys_sendmsg+0xb7/0x140 + [23827.522238] do_syscall_64+0x3a/0x70 + [23827.522907] entry_SYSCALL_64_after_hwframe+0x44/0xae + [23827.523797] + [23827.524163] Freed by task 25948: + [23827.524780] kasan_save_stack+0x1b/0x40 + [23827.525488] kasan_set_track+0x1c/0x30 + [23827.526187] kasan_set_free_info+0x20/0x30 + [23827.526968] __kasan_slab_free+0xed/0x130 + [23827.527709] slab_free_freelist_hook+0xcf/0x1d0 + [23827.528528] kmem_cache_free_bulk+0x33a/0x6e0 + [23827.529317] kfree_rcu_work+0x55f/0xb70 + [23827.530024] process_one_work+0x8ac/0x14e0 + [23827.530770] worker_thread+0x53b/0x1220 + [23827.531480] kthread+0x328/0x3f0 + [23827.532114] ret_from_fork+0x1f/0x30 + [23827.532785] + [23827.533147] Last potentially related work creation: + [23827.534007] kasan_save_stack+0x1b/0x40 + [23827.534710] kasan_record_aux_stack+0xab/0xc0 + [23827.535492] kvfree_call_rcu+0x31/0x7b0 + [23827.536206] mlx5e_tc_del_fdb_flow+0x577/0xef0 [mlx5_core] + [23827.537305] mlx5e_flow_put+0x49/0x80 [mlx5_core] + [23827.538290] mlx5e_delete_flower+0x6d1/0xe60 [mlx5_core] + [23827.539300] tc_setup_cb_destroy+0x18e/0x2f0 + [23827.540144] fl_hw_destroy_filter+0x1d2/0x310 [cls_flower] + [23827.541148] __fl_delete+0x4dc/0x660 [cls_flower] + [23827.541985] fl_delete+0x97/0x160 [cls_flower] + [23827.542782] tc_del_tfilter+0x7ab/0x13d0 + [23827.543503] rtnetlink_rcv_msg+0x644/0x8c0 + [23827.544257] netlink_rcv_skb+0x11d/0x340 + [23827.544981] netlink_unicast+0x42b/0x700 + [23827.545700] netlink_sendmsg+0x743/0xc20 + [23827.546424] sock_sendmsg+0xb2/0xe0 + [23827.547084] ____sys_sendmsg+0x590/0x770 + [23827.547850] ___sys_sendmsg+0xd8/0x160 + [23827.548606] __sys_sendmsg+0xb7/0x140 + [23827.549303] do_syscall_64+0x3a/0x70 + [23827.549969] entry_SYSCALL_64_after_hwframe+0x44/0xae + [23827.550853] + [23827.551217] The buggy address belongs to the object at ffff8881d1322200 + [23827.551217] which belongs to the cache kmalloc-256 of size 256 + [23827.553341] The buggy address is located 140 bytes inside of + [23827.553341] 256-byte region [ffff8881d1322200, ffff8881d1322300) + [23827.555747] The buggy address belongs to the page: + [23827.556847] page:00000000898762aa refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d1320 + [23827.558651] head:00000000898762aa order:2 compound_mapcount:0 compound_pincount:0 + [23827.559961] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff) + [23827.561243] raw: 002ffff800010200 dead000000000100 dead000000000122 ffff888100042b40 + [23827.562653] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 + [23827.564112] page dumped because: kasan: bad access detected + [23827.565439] + [23827.565932] Memory state around the buggy address: + [23827.566917] ffff8881d1322180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + [23827.568485] ffff8881d1322200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + [23827.569818] >ffff8881d1322280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + [23827.571143] ^ + [23827.571879] ffff8881d1322300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + [23827.573283] ffff8881d1322380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + [23827.574654] ================================================================== + +Most of the necessary logic is already correctly implemented by +mlx5e_get_next_valid_encap() helper that is used in neigh stats update +handler. Make the handler generic by renaming it to +mlx5e_get_next_matching_encap() and use callback to test whether flow is +matching instead of hardcoded check for 'valid' flag value. Implement +mlx5e_get_next_valid_encap() by calling mlx5e_get_next_matching_encap() +with callback that tests encap MLX5_ENCAP_ENTRY_VALID flag. Implement new +mlx5e_get_next_init_encap() helper by calling +mlx5e_get_next_matching_encap() with callback that tests encap completion +result to be non-error and use it in mlx5e_rep_neigh_update() to safely +iterate over nhe->encap_list. + +Remove encap completion logic from mlx5e_rep_update_flows() since the encap +entries passed to this function are already guaranteed to be properly +initialized by similar code in mlx5e_get_next_init_encap(). + +Fixes: 2a1f1768fa17 ("net/mlx5e: Refactor neigh update for concurrent execution") +Signed-off-by: Vlad Buslov +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +[ since kernel 5.10 doesn't have commit 0d9f96471493 + ("net/mlx5e: Extract tc tunnel encap/decap code to dedicated file") + which moved encap/decap from en_tc.c to tc_tun_encap.c, so backport and + move the additional functions to en_tc.c instead of tc_tun_encap.c ] +Signed-off-by: Xiangyu Chen +Signed-off-by: He Zhe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c | 17 +++----- + drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 6 --- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 33 +++++++++++++++-- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 3 + + 4 files changed, 41 insertions(+), 18 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +@@ -129,9 +129,8 @@ static void mlx5e_rep_neigh_update(struc + work); + struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; + struct neighbour *n = update_work->n; +- struct mlx5e_encap_entry *e; ++ struct mlx5e_encap_entry *e = NULL; + unsigned char ha[ETH_ALEN]; +- struct mlx5e_priv *priv; + bool neigh_connected; + u8 nud_state, dead; + +@@ -152,14 +151,12 @@ static void mlx5e_rep_neigh_update(struc + + trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); + +- list_for_each_entry(e, &nhe->encap_list, encap_list) { +- if (!mlx5e_encap_take(e)) +- continue; +- +- priv = netdev_priv(e->out_dev); +- mlx5e_rep_update_flows(priv, e, neigh_connected, ha); +- mlx5e_encap_put(priv, e); +- } ++ /* mlx5e_get_next_init_encap() releases previous encap before returning ++ * the next one. ++ */ ++ while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL) ++ mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha); ++ + rtnl_unlock(); + mlx5e_release_neigh_update_work(update_work); + } +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +@@ -91,13 +91,9 @@ void mlx5e_rep_update_flows(struct mlx5e + + ASSERT_RTNL(); + +- /* wait for encap to be fully initialized */ +- wait_for_completion(&e->res_ready); +- + mutex_lock(&esw->offloads.encap_tbl_lock); + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); +- if (e->compl_result < 0 || (encap_connected == neigh_connected && +- ether_addr_equal(e->h_dest, ha))) ++ if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha)) + goto unlock; + + mlx5e_take_all_encap_flows(e, &flow_list); +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -1653,9 +1653,12 @@ void mlx5e_put_encap_flow_list(struct ml + mlx5e_flow_put(priv, flow); + } + ++typedef bool (match_cb)(struct mlx5e_encap_entry *); ++ + static struct mlx5e_encap_entry * +-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, +- struct mlx5e_encap_entry *e) ++mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, ++ struct mlx5e_encap_entry *e, ++ match_cb match) + { + struct mlx5e_encap_entry *next = NULL; + +@@ -1690,7 +1693,7 @@ retry: + /* wait for encap to be fully initialized */ + wait_for_completion(&next->res_ready); + /* continue searching if encap entry is not in valid state after completion */ +- if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { ++ if (!match(next)) { + e = next; + goto retry; + } +@@ -1698,6 +1701,30 @@ retry: + return next; + } + ++static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) ++{ ++ return e->flags & MLX5_ENCAP_ENTRY_VALID; ++} ++ ++static struct mlx5e_encap_entry * ++mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, ++ struct mlx5e_encap_entry *e) ++{ ++ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); ++} ++ ++static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) ++{ ++ return e->compl_result >= 0; ++} ++ ++struct mlx5e_encap_entry * ++mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, ++ struct mlx5e_encap_entry *e) ++{ ++ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); ++} ++ + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) + { + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +@@ -161,6 +161,9 @@ void mlx5e_take_all_encap_flows(struct m + void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); + + struct mlx5e_neigh_hash_entry; ++struct mlx5e_encap_entry * ++mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, ++ struct mlx5e_encap_entry *e); + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); + + void mlx5e_tc_reoffload_flows_work(struct work_struct *work); diff --git a/queue-5.10/net-openvswitch-fix-race-on-port-output.patch b/queue-5.10/net-openvswitch-fix-race-on-port-output.patch new file mode 100644 index 0000000000..dad11d50a1 --- /dev/null +++ b/queue-5.10/net-openvswitch-fix-race-on-port-output.patch @@ -0,0 +1,235 @@ +From 066b86787fa3d97b7aefb5ac0a99a22dad2d15f8 Mon Sep 17 00:00:00 2001 +From: Felix Huettner +Date: Wed, 5 Apr 2023 07:53:41 +0000 +Subject: net: openvswitch: fix race on port output + +From: Felix Huettner + +commit 066b86787fa3d97b7aefb5ac0a99a22dad2d15f8 upstream. + +assume the following setup on a single machine: +1. An openvswitch instance with one bridge and default flows +2. two network namespaces "server" and "client" +3. two ovs interfaces "server" and "client" on the bridge +4. for each ovs interface a veth pair with a matching name and 32 rx and + tx queues +5. move the ends of the veth pairs to the respective network namespaces +6. assign ip addresses to each of the veth ends in the namespaces (needs + to be the same subnet) +7. start some http server on the server network namespace +8. test if a client in the client namespace can reach the http server + +when following the actions below the host has a chance of getting a cpu +stuck in a infinite loop: +1. send a large amount of parallel requests to the http server (around + 3000 curls should work) +2. in parallel delete the network namespace (do not delete interfaces or + stop the server, just kill the namespace) + +there is a low chance that this will cause the below kernel cpu stuck +message. If this does not happen just retry. +Below there is also the output of bpftrace for the functions mentioned +in the output. + +The series of events happening here is: +1. the network namespace is deleted calling + `unregister_netdevice_many_notify` somewhere in the process +2. this sets first `NETREG_UNREGISTERING` on both ends of the veth and + then runs `synchronize_net` +3. it then calls `call_netdevice_notifiers` with `NETDEV_UNREGISTER` +4. this is then handled by `dp_device_event` which calls + `ovs_netdev_detach_dev` (if a vport is found, which is the case for + the veth interface attached to ovs) +5. this removes the rx_handlers of the device but does not prevent + packages to be sent to the device +6. `dp_device_event` then queues the vport deletion to work in + background as a ovs_lock is needed that we do not hold in the + unregistration path +7. `unregister_netdevice_many_notify` continues to call + `netdev_unregister_kobject` which sets `real_num_tx_queues` to 0 +8. port deletion continues (but details are not relevant for this issue) +9. at some future point the background task deletes the vport + +If after 7. but before 9. a packet is send to the ovs vport (which is +not deleted at this point in time) which forwards it to the +`dev_queue_xmit` flow even though the device is unregistering. +In `skb_tx_hash` (which is called in the `dev_queue_xmit`) path there is +a while loop (if the packet has a rx_queue recorded) that is infinite if +`dev->real_num_tx_queues` is zero. + +To prevent this from happening we update `do_output` to handle devices +without carrier the same as if the device is not found (which would +be the code path after 9. is done). + +Additionally we now produce a warning in `skb_tx_hash` if we will hit +the infinite loop. + +bpftrace (first word is function name): + +__dev_queue_xmit server: real_num_tx_queues: 1, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 1 +netdev_core_pick_tx server: addr: 0xffff9f0a46d4a000 real_num_tx_queues: 1, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 1 +dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 2, reg_state: 1 +synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 +synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 +synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 +synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 +dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 6, reg_state: 2 +ovs_netdev_detach_dev server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, reg_state: 2 +netdev_rx_handler_unregister server: real_num_tx_queues: 1, cpu: 9, pid: 21024, tid: 21024, reg_state: 2 +synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 +netdev_rx_handler_unregister ret server: real_num_tx_queues: 1, cpu: 9, pid: 21024, tid: 21024, reg_state: 2 +dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 27, reg_state: 2 +dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 22, reg_state: 2 +dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 18, reg_state: 2 +netdev_unregister_kobject: real_num_tx_queues: 1, cpu: 9, pid: 21024, tid: 21024 +synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024 +ovs_vport_send server: real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 2 +__dev_queue_xmit server: real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 2 +netdev_core_pick_tx server: addr: 0xffff9f0a46d4a000 real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 2 +broken device server: real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024 +ovs_dp_detach_port server: real_num_tx_queues: 0 cpu 9, pid: 9124, tid: 9124, reg_state: 2 +synchronize_rcu_expedited: cpu 9, pid: 33604, tid: 33604 + +stuck message: + +watchdog: BUG: soft lockup - CPU#5 stuck for 26s! [curl:1929279] +Modules linked in: veth pktgen bridge stp llc ip_set_hash_net nft_counter xt_set nft_compat nf_tables ip_set_hash_ip ip_set nfnetlink_cttimeout nfnetlink openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 tls binfmt_misc nls_iso8859_1 input_leds joydev serio_raw dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua sch_fq_codel drm efi_pstore virtio_rng ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear hid_generic usbhid hid crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel virtio_net ahci net_failover crypto_simd cryptd psmouse libahci virtio_blk failover +CPU: 5 PID: 1929279 Comm: curl Not tainted 5.15.0-67-generic #74-Ubuntu +Hardware name: OpenStack Foundation OpenStack Nova, BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +RIP: 0010:netdev_pick_tx+0xf1/0x320 +Code: 00 00 8d 48 ff 0f b7 c1 66 39 ca 0f 86 e9 01 00 00 45 0f b7 ff 41 39 c7 0f 87 5b 01 00 00 44 29 f8 41 39 c7 0f 87 4f 01 00 00 f2 0f 1f 44 00 00 49 8b 94 24 28 04 00 00 48 85 d2 0f 84 53 01 +RSP: 0018:ffffb78b40298820 EFLAGS: 00000246 +RAX: 0000000000000000 RBX: ffff9c8773adc2e0 RCX: 000000000000083f +RDX: 0000000000000000 RSI: ffff9c8773adc2e0 RDI: ffff9c870a25e000 +RBP: ffffb78b40298858 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: ffff9c870a25e000 +R13: ffff9c870a25e000 R14: ffff9c87fe043480 R15: 0000000000000000 +FS: 00007f7b80008f00(0000) GS:ffff9c8e5f740000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f7b80f6a0b0 CR3: 0000000329d66000 CR4: 0000000000350ee0 +Call Trace: + + netdev_core_pick_tx+0xa4/0xb0 + __dev_queue_xmit+0xf8/0x510 + ? __bpf_prog_exit+0x1e/0x30 + dev_queue_xmit+0x10/0x20 + ovs_vport_send+0xad/0x170 [openvswitch] + do_output+0x59/0x180 [openvswitch] + do_execute_actions+0xa80/0xaa0 [openvswitch] + ? kfree+0x1/0x250 + ? kfree+0x1/0x250 + ? kprobe_perf_func+0x4f/0x2b0 + ? flow_lookup.constprop.0+0x5c/0x110 [openvswitch] + ovs_execute_actions+0x4c/0x120 [openvswitch] + ovs_dp_process_packet+0xa1/0x200 [openvswitch] + ? ovs_ct_update_key.isra.0+0xa8/0x120 [openvswitch] + ? ovs_ct_fill_key+0x1d/0x30 [openvswitch] + ? ovs_flow_key_extract+0x2db/0x350 [openvswitch] + ovs_vport_receive+0x77/0xd0 [openvswitch] + ? __htab_map_lookup_elem+0x4e/0x60 + ? bpf_prog_680e8aff8547aec1_kfree+0x3b/0x714 + ? trace_call_bpf+0xc8/0x150 + ? kfree+0x1/0x250 + ? kfree+0x1/0x250 + ? kprobe_perf_func+0x4f/0x2b0 + ? kprobe_perf_func+0x4f/0x2b0 + ? __mod_memcg_lruvec_state+0x63/0xe0 + netdev_port_receive+0xc4/0x180 [openvswitch] + ? netdev_port_receive+0x180/0x180 [openvswitch] + netdev_frame_hook+0x1f/0x40 [openvswitch] + __netif_receive_skb_core.constprop.0+0x23d/0xf00 + __netif_receive_skb_one_core+0x3f/0xa0 + __netif_receive_skb+0x15/0x60 + process_backlog+0x9e/0x170 + __napi_poll+0x33/0x180 + net_rx_action+0x126/0x280 + ? ttwu_do_activate+0x72/0xf0 + __do_softirq+0xd9/0x2e7 + ? rcu_report_exp_cpu_mult+0x1b0/0x1b0 + do_softirq+0x7d/0xb0 + + + __local_bh_enable_ip+0x54/0x60 + ip_finish_output2+0x191/0x460 + __ip_finish_output+0xb7/0x180 + ip_finish_output+0x2e/0xc0 + ip_output+0x78/0x100 + ? __ip_finish_output+0x180/0x180 + ip_local_out+0x5e/0x70 + __ip_queue_xmit+0x184/0x440 + ? tcp_syn_options+0x1f9/0x300 + ip_queue_xmit+0x15/0x20 + __tcp_transmit_skb+0x910/0x9c0 + ? __mod_memcg_state+0x44/0xa0 + tcp_connect+0x437/0x4e0 + ? ktime_get_with_offset+0x60/0xf0 + tcp_v4_connect+0x436/0x530 + __inet_stream_connect+0xd4/0x3a0 + ? kprobe_perf_func+0x4f/0x2b0 + ? aa_sk_perm+0x43/0x1c0 + inet_stream_connect+0x3b/0x60 + __sys_connect_file+0x63/0x70 + __sys_connect+0xa6/0xd0 + ? setfl+0x108/0x170 + ? do_fcntl+0xe8/0x5a0 + __x64_sys_connect+0x18/0x20 + do_syscall_64+0x5c/0xc0 + ? __x64_sys_fcntl+0xa9/0xd0 + ? exit_to_user_mode_prepare+0x37/0xb0 + ? syscall_exit_to_user_mode+0x27/0x50 + ? do_syscall_64+0x69/0xc0 + ? __sys_setsockopt+0xea/0x1e0 + ? exit_to_user_mode_prepare+0x37/0xb0 + ? syscall_exit_to_user_mode+0x27/0x50 + ? __x64_sys_setsockopt+0x1f/0x30 + ? do_syscall_64+0x69/0xc0 + ? irqentry_exit+0x1d/0x30 + ? exc_page_fault+0x89/0x170 + entry_SYSCALL_64_after_hwframe+0x61/0xcb +RIP: 0033:0x7f7b8101c6a7 +Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34 24 89 +RSP: 002b:00007ffffd6b2198 EFLAGS: 00000246 ORIG_RAX: 000000000000002a +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7b8101c6a7 +RDX: 0000000000000010 RSI: 00007ffffd6b2360 RDI: 0000000000000005 +RBP: 0000561f1370d560 R08: 00002795ad21d1ac R09: 0030312e302e302e +R10: 00007ffffd73f080 R11: 0000000000000246 R12: 0000561f1370c410 +R13: 0000000000000000 R14: 0000000000000005 R15: 0000000000000000 + + +Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") +Co-developed-by: Luca Czesla +Signed-off-by: Luca Czesla +Signed-off-by: Felix Huettner +Reviewed-by: Eric Dumazet +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/ZC0pBXBAgh7c76CA@kernel-bug-kernel-bug +Signed-off-by: Jakub Kicinski +Signed-off-by: Carlos Soto +Signed-off-by: Florian Fainelli +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 1 + + net/openvswitch/actions.c | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3186,6 +3186,7 @@ static u16 skb_tx_hash(const struct net_ + } + + if (skb_rx_queue_recorded(skb)) { ++ BUILD_BUG_ON_INVALID(qcount == 0); + hash = skb_get_rx_queue(skb); + if (hash >= qoffset) + hash -= qoffset; +--- a/net/openvswitch/actions.c ++++ b/net/openvswitch/actions.c +@@ -912,7 +912,7 @@ static void do_output(struct datapath *d + { + struct vport *vport = ovs_vport_rcu(dp, out_port); + +- if (likely(vport)) { ++ if (likely(vport && netif_carrier_ok(vport->dev))) { + u16 mru = OVS_CB(skb)->mru; + u32 cutlen = OVS_CB(skb)->cutlen; + diff --git a/queue-5.10/openvswitch-fix-lockup-on-tx-to-unregistering-netdev-with-carrier.patch b/queue-5.10/openvswitch-fix-lockup-on-tx-to-unregistering-netdev-with-carrier.patch new file mode 100644 index 0000000000..fc47c64dce --- /dev/null +++ b/queue-5.10/openvswitch-fix-lockup-on-tx-to-unregistering-netdev-with-carrier.patch @@ -0,0 +1,77 @@ +From 47e55e4b410f7d552e43011baa5be1aab4093990 Mon Sep 17 00:00:00 2001 +From: Ilya Maximets +Date: Thu, 9 Jan 2025 13:21:24 +0100 +Subject: openvswitch: fix lockup on tx to unregistering netdev with carrier + +From: Ilya Maximets + +commit 47e55e4b410f7d552e43011baa5be1aab4093990 upstream. + +Commit in a fixes tag attempted to fix the issue in the following +sequence of calls: + + do_output + -> ovs_vport_send + -> dev_queue_xmit + -> __dev_queue_xmit + -> netdev_core_pick_tx + -> skb_tx_hash + +When device is unregistering, the 'dev->real_num_tx_queues' goes to +zero and the 'while (unlikely(hash >= qcount))' loop inside the +'skb_tx_hash' becomes infinite, locking up the core forever. + +But unfortunately, checking just the carrier status is not enough to +fix the issue, because some devices may still be in unregistering +state while reporting carrier status OK. + +One example of such device is a net/dummy. It sets carrier ON +on start, but it doesn't implement .ndo_stop to set the carrier off. +And it makes sense, because dummy doesn't really have a carrier. +Therefore, while this device is unregistering, it's still easy to hit +the infinite loop in the skb_tx_hash() from the OVS datapath. There +might be other drivers that do the same, but dummy by itself is +important for the OVS ecosystem, because it is frequently used as a +packet sink for tcpdump while debugging OVS deployments. And when the +issue is hit, the only way to recover is to reboot. + +Fix that by also checking if the device is running. The running +state is handled by the net core during unregistering, so it covers +unregistering case better, and we don't really need to send packets +to devices that are not running anyway. + +While only checking the running state might be enough, the carrier +check is preserved. The running and the carrier states seem disjoined +throughout the code and different drivers. And other core functions +like __dev_direct_xmit() check both before attempting to transmit +a packet. So, it seems safer to check both flags in OVS as well. + +Fixes: 066b86787fa3 ("net: openvswitch: fix race on port output") +Reported-by: Friedrich Weber +Closes: https://mail.openvswitch.org/pipermail/ovs-discuss/2025-January/053423.html +Signed-off-by: Ilya Maximets +Tested-by: Friedrich Weber +Reviewed-by: Aaron Conole +Link: https://patch.msgid.link/20250109122225.4034688-1-i.maximets@ovn.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Carlos Soto +Signed-off-by: Florian Fainelli +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/actions.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/openvswitch/actions.c ++++ b/net/openvswitch/actions.c +@@ -912,7 +912,9 @@ static void do_output(struct datapath *d + { + struct vport *vport = ovs_vport_rcu(dp, out_port); + +- if (likely(vport && netif_carrier_ok(vport->dev))) { ++ if (likely(vport && ++ netif_running(vport->dev) && ++ netif_carrier_ok(vport->dev))) { + u16 mru = OVS_CB(skb)->mru; + u32 cutlen = OVS_CB(skb)->cutlen; + diff --git a/queue-5.10/series b/queue-5.10/series index 31356db87d..78b11ed419 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -165,3 +165,13 @@ smb-client-fix-uaf-in-async-decryption.patch smb-client-fix-null-ptr-deref-in-crypto_aead_setkey.patch bpf-avoid-holding-freeze_mutex-during-mmap-operation.patch bpf-check-rcu_read_lock_trace_held-before-calling-bpf-map-helpers.patch +blk-cgroup-support-to-track-if-policy-is-online.patch +blk-iocost-do-not-warn-if-iocg-was-already-offlined.patch +ext4-fix-timer-use-after-free-on-failed-mount.patch +mmc-sdhci-brcmstb-add-ability-to-increase-max-clock-rate-for-72116b0.patch +mmc-sdhci-brcmstb-initialize-base_clk-to-null-in-sdhci_brcmstb_probe.patch +mmc-sdhci-brcmstb-use-clk_get_rate-base_clk-in-pm-resume.patch +net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh-update-handler.patch +ipvs-properly-dereference-pe-in-ip_vs_add_service.patch +net-openvswitch-fix-race-on-port-output.patch +openvswitch-fix-lockup-on-tx-to-unregistering-netdev-with-carrier.patch