--- /dev/null
+From dfd6200a095440b663099d8d42f1efb0175a1ce3 Mon Sep 17 00:00:00 2001
+From: Yu Kuai <yukuai3@huawei.com>
+Date: Thu, 19 Jan 2023 19:03:49 +0800
+Subject: blk-cgroup: support to track if policy is online
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+commit dfd6200a095440b663099d8d42f1efb0175a1ce3 upstream.
+
+A new field 'online' is added to blkg_policy_data to fix following
+2 problem:
+
+1) In blkcg_activate_policy(), if pd_alloc_fn() with 'GFP_NOWAIT'
+ failed, 'queue_lock' will be dropped and pd_alloc_fn() will try again
+ without 'GFP_NOWAIT'. In the meantime, remove cgroup can race with
+ it, and pd_offline_fn() will be called without pd_init_fn() and
+ pd_online_fn(). This way null-ptr-deference can be triggered.
+
+2) In order to synchronize pd_free_fn() from blkg_free_workfn() and
+ blkcg_deactivate_policy(), 'list_del_init(&blkg->q_node)' will be
+ delayed to blkg_free_workfn(), hence pd_offline_fn() can be called
+ first in blkg_destroy(), and then blkcg_deactivate_policy() will
+ call it again, we must prevent it.
+
+The new field 'online' will be set after pd_online_fn() and will be
+cleared after pd_offline_fn(), in the meantime pd_offline_fn() will only
+be called if 'online' is set.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20230119110350.2287325-3-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Bin Lan <bin.lan.cn@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-cgroup.c | 24 +++++++++++++++++-------
+ include/linux/blk-cgroup.h | 1 +
+ 2 files changed, 18 insertions(+), 7 deletions(-)
+
+--- a/block/blk-cgroup.c
++++ b/block/blk-cgroup.c
+@@ -191,6 +191,7 @@ static struct blkcg_gq *blkg_alloc(struc
+ blkg->pd[i] = pd;
+ pd->blkg = blkg;
+ pd->plid = i;
++ pd->online = false;
+ }
+
+ return blkg;
+@@ -288,8 +289,11 @@ static struct blkcg_gq *blkg_create(stru
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkcg_policy *pol = blkcg_policy[i];
+
+- if (blkg->pd[i] && pol->pd_online_fn)
+- pol->pd_online_fn(blkg->pd[i]);
++ if (blkg->pd[i]) {
++ if (pol->pd_online_fn)
++ pol->pd_online_fn(blkg->pd[i]);
++ blkg->pd[i]->online = true;
++ }
+ }
+ }
+ blkg->online = true;
+@@ -389,8 +393,11 @@ static void blkg_destroy(struct blkcg_gq
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkcg_policy *pol = blkcg_policy[i];
+
+- if (blkg->pd[i] && pol->pd_offline_fn)
+- pol->pd_offline_fn(blkg->pd[i]);
++ if (blkg->pd[i] && blkg->pd[i]->online) {
++ if (pol->pd_offline_fn)
++ pol->pd_offline_fn(blkg->pd[i]);
++ blkg->pd[i]->online = false;
++ }
+ }
+
+ blkg->online = false;
+@@ -1364,6 +1371,7 @@ retry:
+ blkg->pd[pol->plid] = pd;
+ pd->blkg = blkg;
+ pd->plid = pol->plid;
++ pd->online = false;
+ }
+
+ /* all allocated, init in the same order */
+@@ -1371,9 +1379,11 @@ retry:
+ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+ pol->pd_init_fn(blkg->pd[pol->plid]);
+
+- if (pol->pd_online_fn)
+- list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
++ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
++ if (pol->pd_online_fn)
+ pol->pd_online_fn(blkg->pd[pol->plid]);
++ blkg->pd[pol->plid]->online = true;
++ }
+
+ __set_bit(pol->plid, q->blkcg_pols);
+ ret = 0;
+@@ -1435,7 +1445,7 @@ void blkcg_deactivate_policy(struct requ
+
+ spin_lock(&blkcg->lock);
+ if (blkg->pd[pol->plid]) {
+- if (pol->pd_offline_fn)
++ if (blkg->pd[pol->plid]->online && pol->pd_offline_fn)
+ pol->pd_offline_fn(blkg->pd[pol->plid]);
+ pol->pd_free_fn(blkg->pd[pol->plid]);
+ blkg->pd[pol->plid] = NULL;
+--- a/include/linux/blk-cgroup.h
++++ b/include/linux/blk-cgroup.h
+@@ -87,6 +87,7 @@ struct blkg_policy_data {
+ /* the blkg and policy id this per-policy data belongs to */
+ struct blkcg_gq *blkg;
+ int plid;
++ bool online;
+ };
+
+ /*
--- /dev/null
+From 01bc4fda9ea0a6b52f12326486f07a4910666cf6 Mon Sep 17 00:00:00 2001
+From: Li Nan <linan122@huawei.com>
+Date: Fri, 19 Apr 2024 17:32:57 +0800
+Subject: blk-iocost: do not WARN if iocg was already offlined
+
+From: Li Nan <linan122@huawei.com>
+
+commit 01bc4fda9ea0a6b52f12326486f07a4910666cf6 upstream.
+
+In iocg_pay_debt(), warn is triggered if 'active_list' is empty, which
+is intended to confirm iocg is active when it has debt. However, warn
+can be triggered during a blkcg or disk removal, if iocg_waitq_timer_fn()
+is run at that time:
+
+ WARNING: CPU: 0 PID: 2344971 at block/blk-iocost.c:1402 iocg_pay_debt+0x14c/0x190
+ Call trace:
+ iocg_pay_debt+0x14c/0x190
+ iocg_kick_waitq+0x438/0x4c0
+ iocg_waitq_timer_fn+0xd8/0x130
+ __run_hrtimer+0x144/0x45c
+ __hrtimer_run_queues+0x16c/0x244
+ hrtimer_interrupt+0x2cc/0x7b0
+
+The warn in this situation is meaningless. Since this iocg is being
+removed, the state of the 'active_list' is irrelevant, and 'waitq_timer'
+is canceled after removing 'active_list' in ioc_pd_free(), which ensures
+iocg is freed after iocg_waitq_timer_fn() returns.
+
+Therefore, add the check if iocg was already offlined to avoid warn
+when removing a blkcg or disk.
+
+Signed-off-by: Li Nan <linan122@huawei.com>
+Reviewed-by: Yu Kuai <yukuai3@huawei.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20240419093257.3004211-1-linan666@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Bin Lan <bin.lan.cn@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-iocost.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -1389,8 +1389,11 @@ static void iocg_pay_debt(struct ioc_gq
+ lockdep_assert_held(&iocg->ioc->lock);
+ lockdep_assert_held(&iocg->waitq.lock);
+
+- /* make sure that nobody messed with @iocg */
+- WARN_ON_ONCE(list_empty(&iocg->active_list));
++ /*
++ * make sure that nobody messed with @iocg. Check iocg->pd.online
++ * to avoid warn when removing blkcg or disk.
++ */
++ WARN_ON_ONCE(list_empty(&iocg->active_list) && iocg->pd.online);
+ WARN_ON_ONCE(iocg->inuse > 1);
+
+ iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt);
--- /dev/null
+From 0ce160c5bdb67081a62293028dc85758a8efb22a Mon Sep 17 00:00:00 2001
+From: Xiaxi Shen <shenxiaxi26@gmail.com>
+Date: Sun, 14 Jul 2024 21:33:36 -0700
+Subject: ext4: fix timer use-after-free on failed mount
+
+From: Xiaxi Shen <shenxiaxi26@gmail.com>
+
+commit 0ce160c5bdb67081a62293028dc85758a8efb22a upstream.
+
+Syzbot has found an ODEBUG bug in ext4_fill_super
+
+The del_timer_sync function cancels the s_err_report timer,
+which reminds about filesystem errors daily. We should
+guarantee the timer is no longer active before kfree(sbi).
+
+When filesystem mounting fails, the flow goes to failed_mount3,
+where an error occurs when ext4_stop_mmpd is called, causing
+a read I/O failure. This triggers the ext4_handle_error function
+that ultimately re-arms the timer,
+leaving the s_err_report timer active before kfree(sbi) is called.
+
+Fix the issue by canceling the s_err_report timer after calling ext4_stop_mmpd.
+
+Signed-off-by: Xiaxi Shen <shenxiaxi26@gmail.com>
+Reported-and-tested-by: syzbot+59e0101c430934bc9a36@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=59e0101c430934bc9a36
+Link: https://patch.msgid.link/20240715043336.98097-1-shenxiaxi26@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+[Minor context change fixed]
+Signed-off-by: Xiangyu Chen <xiangyu.chen@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/super.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -5185,8 +5185,8 @@ failed_mount_wq:
+ failed_mount3a:
+ ext4_es_unregister_shrinker(sbi);
+ failed_mount3:
+- del_timer_sync(&sbi->s_err_report);
+ ext4_stop_mmpd(sbi);
++ del_timer_sync(&sbi->s_err_report);
+ failed_mount2:
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
--- /dev/null
+From cbd070a4ae62f119058973f6d2c984e325bce6e7 Mon Sep 17 00:00:00 2001
+From: Chen Hanxiao <chenhx.fnst@fujitsu.com>
+Date: Thu, 27 Jun 2024 14:15:15 +0800
+Subject: ipvs: properly dereference pe in ip_vs_add_service
+
+From: Chen Hanxiao <chenhx.fnst@fujitsu.com>
+
+commit cbd070a4ae62f119058973f6d2c984e325bce6e7 upstream.
+
+Use pe directly to resolve sparse warning:
+
+ net/netfilter/ipvs/ip_vs_ctl.c:1471:27: warning: dereference of noderef expression
+
+Fixes: 39b972231536 ("ipvs: handle connections started by real-servers")
+Signed-off-by: Chen Hanxiao <chenhx.fnst@fujitsu.com>
+Acked-by: Julian Anastasov <ja@ssi.bg>
+Acked-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Cliff Liu <donghua.liu@windriver.com>
+Signed-off-by: He Zhe <Zhe.He@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/ipvs/ip_vs_ctl.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/netfilter/ipvs/ip_vs_ctl.c
++++ b/net/netfilter/ipvs/ip_vs_ctl.c
+@@ -1384,20 +1384,20 @@ ip_vs_add_service(struct netns_ipvs *ipv
+ sched = NULL;
+ }
+
+- /* Bind the ct retriever */
+- RCU_INIT_POINTER(svc->pe, pe);
+- pe = NULL;
+-
+ /* Update the virtual service counters */
+ if (svc->port == FTPPORT)
+ atomic_inc(&ipvs->ftpsvc_counter);
+ else if (svc->port == 0)
+ atomic_inc(&ipvs->nullsvc_counter);
+- if (svc->pe && svc->pe->conn_out)
++ if (pe && pe->conn_out)
+ atomic_inc(&ipvs->conn_out_counter);
+
+ ip_vs_start_estimator(ipvs, &svc->stats);
+
++ /* Bind the ct retriever */
++ RCU_INIT_POINTER(svc->pe, pe);
++ pe = NULL;
++
+ /* Count only IPv4 services for old get/setsockopt interface */
+ if (svc->af == AF_INET)
+ ipvs->num_services++;
--- /dev/null
+From 97904a59855c7ac7c613085bc6bdc550d48524ff Mon Sep 17 00:00:00 2001
+From: Kamal Dasu <kdasu.kdev@gmail.com>
+Date: Fri, 20 May 2022 14:31:08 -0400
+Subject: mmc: sdhci-brcmstb: Add ability to increase max clock rate for 72116b0
+
+From: Kamal Dasu <kdasu.kdev@gmail.com>
+
+commit 97904a59855c7ac7c613085bc6bdc550d48524ff upstream.
+
+The 72116B0 has improved SDIO controllers that allow the max clock
+rate to be increased from a max of 100MHz to a max of 150MHz. The
+driver will need to get the clock and increase it's default rate
+and override the caps register, that still indicates a max of 100MHz.
+The new clock will be named "sdio_freq" in the DT node's "clock-names"
+list. The driver will use a DT property, "clock-frequency", to
+enable this functionality and will get the actual rate in MHz
+from the property to allow various speeds to be requested.
+
+Signed-off-by: Al Cooper <alcooperx@gmail.com>
+Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Link: https://lore.kernel.org/r/20220520183108.47358-3-kdasu.kdev@gmail.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Kamal Dasu <kamal.dasu@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-brcmstb.c | 69 ++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 68 insertions(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/sdhci-brcmstb.c
++++ b/drivers/mmc/host/sdhci-brcmstb.c
+@@ -32,6 +32,8 @@
+ struct sdhci_brcmstb_priv {
+ void __iomem *cfg_regs;
+ unsigned int flags;
++ struct clk *base_clk;
++ u32 base_freq_hz;
+ };
+
+ struct brcmstb_match_priv {
+@@ -251,9 +253,11 @@ static int sdhci_brcmstb_probe(struct pl
+ struct sdhci_pltfm_host *pltfm_host;
+ const struct of_device_id *match;
+ struct sdhci_brcmstb_priv *priv;
++ u32 actual_clock_mhz;
+ struct sdhci_host *host;
+ struct resource *iomem;
+ struct clk *clk;
++ struct clk *base_clk;
+ int res;
+
+ match = of_match_node(sdhci_brcm_of_match, pdev->dev.of_node);
+@@ -331,6 +335,35 @@ static int sdhci_brcmstb_probe(struct pl
+ if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT)
+ host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
+
++ /* Change the base clock frequency if the DT property exists */
++ if (device_property_read_u32(&pdev->dev, "clock-frequency",
++ &priv->base_freq_hz) != 0)
++ goto add_host;
++
++ base_clk = devm_clk_get_optional(&pdev->dev, "sdio_freq");
++ if (IS_ERR(base_clk)) {
++ dev_warn(&pdev->dev, "Clock for \"sdio_freq\" not found\n");
++ goto add_host;
++ }
++
++ res = clk_prepare_enable(base_clk);
++ if (res)
++ goto err;
++
++ /* set improved clock rate */
++ clk_set_rate(base_clk, priv->base_freq_hz);
++ actual_clock_mhz = clk_get_rate(base_clk) / 1000000;
++
++ host->caps &= ~SDHCI_CLOCK_V3_BASE_MASK;
++ host->caps |= (actual_clock_mhz << SDHCI_CLOCK_BASE_SHIFT);
++ /* Disable presets because they are now incorrect */
++ host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN;
++
++ dev_dbg(&pdev->dev, "Base Clock Frequency changed to %dMHz\n",
++ actual_clock_mhz);
++ priv->base_clk = base_clk;
++
++add_host:
+ res = sdhci_brcmstb_add_host(host, priv);
+ if (res)
+ goto err;
+@@ -341,6 +374,7 @@ static int sdhci_brcmstb_probe(struct pl
+ err:
+ sdhci_pltfm_free(pdev);
+ err_clk:
++ clk_disable_unprepare(base_clk);
+ clk_disable_unprepare(clk);
+ return res;
+ }
+@@ -352,11 +386,44 @@ static void sdhci_brcmstb_shutdown(struc
+
+ MODULE_DEVICE_TABLE(of, sdhci_brcm_of_match);
+
++#ifdef CONFIG_PM_SLEEP
++static int sdhci_brcmstb_suspend(struct device *dev)
++{
++ struct sdhci_host *host = dev_get_drvdata(dev);
++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
++ struct sdhci_brcmstb_priv *priv = sdhci_pltfm_priv(pltfm_host);
++
++ clk_disable_unprepare(priv->base_clk);
++ return sdhci_pltfm_suspend(dev);
++}
++
++static int sdhci_brcmstb_resume(struct device *dev)
++{
++ struct sdhci_host *host = dev_get_drvdata(dev);
++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
++ struct sdhci_brcmstb_priv *priv = sdhci_pltfm_priv(pltfm_host);
++ int ret;
++
++ ret = sdhci_pltfm_resume(dev);
++ if (!ret && priv->base_freq_hz) {
++ ret = clk_prepare_enable(priv->base_clk);
++ if (!ret)
++ ret = clk_set_rate(priv->base_clk, priv->base_freq_hz);
++ }
++
++ return ret;
++}
++#endif
++
++static const struct dev_pm_ops sdhci_brcmstb_pmops = {
++ SET_SYSTEM_SLEEP_PM_OPS(sdhci_brcmstb_suspend, sdhci_brcmstb_resume)
++};
++
+ static struct platform_driver sdhci_brcmstb_driver = {
+ .driver = {
+ .name = "sdhci-brcmstb",
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+- .pm = &sdhci_pltfm_pmops,
++ .pm = &sdhci_brcmstb_pmops,
+ .of_match_table = of_match_ptr(sdhci_brcm_of_match),
+ },
+ .probe = sdhci_brcmstb_probe,
--- /dev/null
+From c3c0ed75ffbff5c70667030b5139bbb75b0a30f5 Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Wed, 8 Jun 2022 08:27:57 -0700
+Subject: mmc: sdhci-brcmstb: Initialize base_clk to NULL in sdhci_brcmstb_probe()
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit c3c0ed75ffbff5c70667030b5139bbb75b0a30f5 upstream.
+
+Clang warns a few times along the lines of:
+
+ drivers/mmc/host/sdhci-brcmstb.c:302:6: warning: variable 'base_clk' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
+ if (res)
+ ^~~
+ drivers/mmc/host/sdhci-brcmstb.c:376:24: note: uninitialized use occurs here
+ clk_disable_unprepare(base_clk);
+ ^~~~~~~~
+
+base_clk is used in the error path before it is initialized. Initialize
+it to NULL, as clk_disable_unprepare() calls clk_disable() and
+clk_unprepare(), which both handle NULL pointers gracefully.
+
+Link: https://github.com/ClangBuiltLinux/linux/issues/1650
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Link: https://lore.kernel.org/r/20220608152757.82529-1-nathan@kernel.org
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Kamal Dasu <kamal.dasu@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-brcmstb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/sdhci-brcmstb.c
++++ b/drivers/mmc/host/sdhci-brcmstb.c
+@@ -257,7 +257,7 @@ static int sdhci_brcmstb_probe(struct pl
+ struct sdhci_host *host;
+ struct resource *iomem;
+ struct clk *clk;
+- struct clk *base_clk;
++ struct clk *base_clk = NULL;
+ int res;
+
+ match = of_match_node(sdhci_brcm_of_match, pdev->dev.of_node);
--- /dev/null
+From 886201c70a1cab34ef96f867c2b2dd6379ffa7b9 Mon Sep 17 00:00:00 2001
+From: Kamal Dasu <kdasu.kdev@gmail.com>
+Date: Thu, 14 Jul 2022 13:41:32 -0400
+Subject: mmc: sdhci-brcmstb: use clk_get_rate(base_clk) in PM resume
+
+From: Kamal Dasu <kdasu.kdev@gmail.com>
+
+commit 886201c70a1cab34ef96f867c2b2dd6379ffa7b9 upstream.
+
+Use clk_get_rate for base_clk on resume before setting new rate.
+This change ensures that the clock api returns current rate
+and sets the clock to the desired rate and honors CLK_GET_NO_CACHE
+attribute used by clock api.
+
+Fixes: 97904a59855c (mmc: sdhci-brcmstb: Add ability to increase max clock rate for 72116b0)
+Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Link: https://lore.kernel.org/r/20220714174132.18541-1-kdasu.kdev@gmail.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Kamal Dasu <kamal.dasu@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-brcmstb.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/sdhci-brcmstb.c
++++ b/drivers/mmc/host/sdhci-brcmstb.c
+@@ -407,7 +407,14 @@ static int sdhci_brcmstb_resume(struct d
+ ret = sdhci_pltfm_resume(dev);
+ if (!ret && priv->base_freq_hz) {
+ ret = clk_prepare_enable(priv->base_clk);
+- if (!ret)
++ /*
++ * Note: using clk_get_rate() below as clk_get_rate()
++ * honors CLK_GET_RATE_NOCACHE attribute, but clk_set_rate()
++ * may do implicit get_rate() calls that do not honor
++ * CLK_GET_RATE_NOCACHE.
++ */
++ if (!ret &&
++ (clk_get_rate(priv->base_clk) != priv->base_freq_hz))
+ ret = clk_set_rate(priv->base_clk, priv->base_freq_hz);
+ }
+
--- /dev/null
+From fb1a3132ee1ac968316e45d21a48703a6db0b6c3 Mon Sep 17 00:00:00 2001
+From: Vlad Buslov <vladbu@nvidia.com>
+Date: Mon, 31 May 2021 16:28:39 +0300
+Subject: net/mlx5e: Fix use-after-free of encap entry in neigh update handler
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+commit fb1a3132ee1ac968316e45d21a48703a6db0b6c3 upstream.
+
+Function mlx5e_rep_neigh_update() wasn't updated to accommodate rtnl lock
+removal from TC filter update path and properly handle concurrent encap
+entry insertion/deletion which can lead to following use-after-free:
+
+ [23827.464923] ==================================================================
+ [23827.469446] BUG: KASAN: use-after-free in mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.470971] Read of size 4 at addr ffff8881d132228c by task kworker/u20:6/21635
+ [23827.472251]
+ [23827.472615] CPU: 9 PID: 21635 Comm: kworker/u20:6 Not tainted 5.13.0-rc3+ #5
+ [23827.473788] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ [23827.475639] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
+ [23827.476731] Call Trace:
+ [23827.477260] dump_stack+0xbb/0x107
+ [23827.477906] print_address_description.constprop.0+0x18/0x140
+ [23827.478896] ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.479879] ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.480905] kasan_report.cold+0x7c/0xd8
+ [23827.481701] ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.482744] kasan_check_range+0x145/0x1a0
+ [23827.493112] mlx5e_encap_take+0x72/0x140 [mlx5_core]
+ [23827.494054] ? mlx5e_tc_tun_encap_info_equal_generic+0x140/0x140 [mlx5_core]
+ [23827.495296] mlx5e_rep_neigh_update+0x41e/0x5e0 [mlx5_core]
+ [23827.496338] ? mlx5e_rep_neigh_entry_release+0xb80/0xb80 [mlx5_core]
+ [23827.497486] ? read_word_at_a_time+0xe/0x20
+ [23827.498250] ? strscpy+0xa0/0x2a0
+ [23827.498889] process_one_work+0x8ac/0x14e0
+ [23827.499638] ? lockdep_hardirqs_on_prepare+0x400/0x400
+ [23827.500537] ? pwq_dec_nr_in_flight+0x2c0/0x2c0
+ [23827.501359] ? rwlock_bug.part.0+0x90/0x90
+ [23827.502116] worker_thread+0x53b/0x1220
+ [23827.502831] ? process_one_work+0x14e0/0x14e0
+ [23827.503627] kthread+0x328/0x3f0
+ [23827.504254] ? _raw_spin_unlock_irq+0x24/0x40
+ [23827.505065] ? __kthread_bind_mask+0x90/0x90
+ [23827.505912] ret_from_fork+0x1f/0x30
+ [23827.506621]
+ [23827.506987] Allocated by task 28248:
+ [23827.507694] kasan_save_stack+0x1b/0x40
+ [23827.508476] __kasan_kmalloc+0x7c/0x90
+ [23827.509197] mlx5e_attach_encap+0xde1/0x1d40 [mlx5_core]
+ [23827.510194] mlx5e_tc_add_fdb_flow+0x397/0xc40 [mlx5_core]
+ [23827.511218] __mlx5e_add_fdb_flow+0x519/0xb30 [mlx5_core]
+ [23827.512234] mlx5e_configure_flower+0x191c/0x4870 [mlx5_core]
+ [23827.513298] tc_setup_cb_add+0x1d5/0x420
+ [23827.514023] fl_hw_replace_filter+0x382/0x6a0 [cls_flower]
+ [23827.514975] fl_change+0x2ceb/0x4a51 [cls_flower]
+ [23827.515821] tc_new_tfilter+0x89a/0x2070
+ [23827.516548] rtnetlink_rcv_msg+0x644/0x8c0
+ [23827.517300] netlink_rcv_skb+0x11d/0x340
+ [23827.518021] netlink_unicast+0x42b/0x700
+ [23827.518742] netlink_sendmsg+0x743/0xc20
+ [23827.519467] sock_sendmsg+0xb2/0xe0
+ [23827.520131] ____sys_sendmsg+0x590/0x770
+ [23827.520851] ___sys_sendmsg+0xd8/0x160
+ [23827.521552] __sys_sendmsg+0xb7/0x140
+ [23827.522238] do_syscall_64+0x3a/0x70
+ [23827.522907] entry_SYSCALL_64_after_hwframe+0x44/0xae
+ [23827.523797]
+ [23827.524163] Freed by task 25948:
+ [23827.524780] kasan_save_stack+0x1b/0x40
+ [23827.525488] kasan_set_track+0x1c/0x30
+ [23827.526187] kasan_set_free_info+0x20/0x30
+ [23827.526968] __kasan_slab_free+0xed/0x130
+ [23827.527709] slab_free_freelist_hook+0xcf/0x1d0
+ [23827.528528] kmem_cache_free_bulk+0x33a/0x6e0
+ [23827.529317] kfree_rcu_work+0x55f/0xb70
+ [23827.530024] process_one_work+0x8ac/0x14e0
+ [23827.530770] worker_thread+0x53b/0x1220
+ [23827.531480] kthread+0x328/0x3f0
+ [23827.532114] ret_from_fork+0x1f/0x30
+ [23827.532785]
+ [23827.533147] Last potentially related work creation:
+ [23827.534007] kasan_save_stack+0x1b/0x40
+ [23827.534710] kasan_record_aux_stack+0xab/0xc0
+ [23827.535492] kvfree_call_rcu+0x31/0x7b0
+ [23827.536206] mlx5e_tc_del_fdb_flow+0x577/0xef0 [mlx5_core]
+ [23827.537305] mlx5e_flow_put+0x49/0x80 [mlx5_core]
+ [23827.538290] mlx5e_delete_flower+0x6d1/0xe60 [mlx5_core]
+ [23827.539300] tc_setup_cb_destroy+0x18e/0x2f0
+ [23827.540144] fl_hw_destroy_filter+0x1d2/0x310 [cls_flower]
+ [23827.541148] __fl_delete+0x4dc/0x660 [cls_flower]
+ [23827.541985] fl_delete+0x97/0x160 [cls_flower]
+ [23827.542782] tc_del_tfilter+0x7ab/0x13d0
+ [23827.543503] rtnetlink_rcv_msg+0x644/0x8c0
+ [23827.544257] netlink_rcv_skb+0x11d/0x340
+ [23827.544981] netlink_unicast+0x42b/0x700
+ [23827.545700] netlink_sendmsg+0x743/0xc20
+ [23827.546424] sock_sendmsg+0xb2/0xe0
+ [23827.547084] ____sys_sendmsg+0x590/0x770
+ [23827.547850] ___sys_sendmsg+0xd8/0x160
+ [23827.548606] __sys_sendmsg+0xb7/0x140
+ [23827.549303] do_syscall_64+0x3a/0x70
+ [23827.549969] entry_SYSCALL_64_after_hwframe+0x44/0xae
+ [23827.550853]
+ [23827.551217] The buggy address belongs to the object at ffff8881d1322200
+ [23827.551217] which belongs to the cache kmalloc-256 of size 256
+ [23827.553341] The buggy address is located 140 bytes inside of
+ [23827.553341] 256-byte region [ffff8881d1322200, ffff8881d1322300)
+ [23827.555747] The buggy address belongs to the page:
+ [23827.556847] page:00000000898762aa refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d1320
+ [23827.558651] head:00000000898762aa order:2 compound_mapcount:0 compound_pincount:0
+ [23827.559961] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff)
+ [23827.561243] raw: 002ffff800010200 dead000000000100 dead000000000122 ffff888100042b40
+ [23827.562653] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000
+ [23827.564112] page dumped because: kasan: bad access detected
+ [23827.565439]
+ [23827.565932] Memory state around the buggy address:
+ [23827.566917] ffff8881d1322180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ [23827.568485] ffff8881d1322200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ [23827.569818] >ffff8881d1322280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ [23827.571143] ^
+ [23827.571879] ffff8881d1322300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ [23827.573283] ffff8881d1322380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ [23827.574654] ==================================================================
+
+Most of the necessary logic is already correctly implemented by
+mlx5e_get_next_valid_encap() helper that is used in neigh stats update
+handler. Make the handler generic by renaming it to
+mlx5e_get_next_matching_encap() and use callback to test whether flow is
+matching instead of hardcoded check for 'valid' flag value. Implement
+mlx5e_get_next_valid_encap() by calling mlx5e_get_next_matching_encap()
+with callback that tests encap MLX5_ENCAP_ENTRY_VALID flag. Implement new
+mlx5e_get_next_init_encap() helper by calling
+mlx5e_get_next_matching_encap() with callback that tests encap completion
+result to be non-error and use it in mlx5e_rep_neigh_update() to safely
+iterate over nhe->encap_list.
+
+Remove encap completion logic from mlx5e_rep_update_flows() since the encap
+entries passed to this function are already guaranteed to be properly
+initialized by similar code in mlx5e_get_next_init_encap().
+
+Fixes: 2a1f1768fa17 ("net/mlx5e: Refactor neigh update for concurrent execution")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+[ since kernel 5.10 doesn't have commit 0d9f96471493
+ ("net/mlx5e: Extract tc tunnel encap/decap code to dedicated file")
+ which moved encap/decap from en_tc.c to tc_tun_encap.c, so backport and
+ move the additional functions to en_tc.c instead of tc_tun_encap.c ]
+Signed-off-by: Xiangyu Chen <xiangyu.chen@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c | 17 +++-----
+ drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 6 ---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 33 +++++++++++++++--
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 3 +
+ 4 files changed, 41 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+@@ -129,9 +129,8 @@ static void mlx5e_rep_neigh_update(struc
+ work);
+ struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
+ struct neighbour *n = update_work->n;
+- struct mlx5e_encap_entry *e;
++ struct mlx5e_encap_entry *e = NULL;
+ unsigned char ha[ETH_ALEN];
+- struct mlx5e_priv *priv;
+ bool neigh_connected;
+ u8 nud_state, dead;
+
+@@ -152,14 +151,12 @@ static void mlx5e_rep_neigh_update(struc
+
+ trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
+
+- list_for_each_entry(e, &nhe->encap_list, encap_list) {
+- if (!mlx5e_encap_take(e))
+- continue;
+-
+- priv = netdev_priv(e->out_dev);
+- mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+- mlx5e_encap_put(priv, e);
+- }
++ /* mlx5e_get_next_init_encap() releases previous encap before returning
++ * the next one.
++ */
++ while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
++ mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
++
+ rtnl_unlock();
+ mlx5e_release_neigh_update_work(update_work);
+ }
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+@@ -91,13 +91,9 @@ void mlx5e_rep_update_flows(struct mlx5e
+
+ ASSERT_RTNL();
+
+- /* wait for encap to be fully initialized */
+- wait_for_completion(&e->res_ready);
+-
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+- if (e->compl_result < 0 || (encap_connected == neigh_connected &&
+- ether_addr_equal(e->h_dest, ha)))
++ if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
+ goto unlock;
+
+ mlx5e_take_all_encap_flows(e, &flow_list);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1653,9 +1653,12 @@ void mlx5e_put_encap_flow_list(struct ml
+ mlx5e_flow_put(priv, flow);
+ }
+
++typedef bool (match_cb)(struct mlx5e_encap_entry *);
++
+ static struct mlx5e_encap_entry *
+-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+- struct mlx5e_encap_entry *e)
++mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
++ struct mlx5e_encap_entry *e,
++ match_cb match)
+ {
+ struct mlx5e_encap_entry *next = NULL;
+
+@@ -1690,7 +1693,7 @@ retry:
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&next->res_ready);
+ /* continue searching if encap entry is not in valid state after completion */
+- if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
++ if (!match(next)) {
+ e = next;
+ goto retry;
+ }
+@@ -1698,6 +1701,30 @@ retry:
+ return next;
+ }
+
++static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
++{
++ return e->flags & MLX5_ENCAP_ENTRY_VALID;
++}
++
++static struct mlx5e_encap_entry *
++mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
++ struct mlx5e_encap_entry *e)
++{
++ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
++}
++
++static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
++{
++ return e->compl_result >= 0;
++}
++
++struct mlx5e_encap_entry *
++mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
++ struct mlx5e_encap_entry *e)
++{
++ return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
++}
++
+ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+ {
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+@@ -161,6 +161,9 @@ void mlx5e_take_all_encap_flows(struct m
+ void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
+
+ struct mlx5e_neigh_hash_entry;
++struct mlx5e_encap_entry *
++mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
++ struct mlx5e_encap_entry *e);
+ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
+
+ void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
--- /dev/null
+From 066b86787fa3d97b7aefb5ac0a99a22dad2d15f8 Mon Sep 17 00:00:00 2001
+From: Felix Huettner <felix.huettner@mail.schwarz>
+Date: Wed, 5 Apr 2023 07:53:41 +0000
+Subject: net: openvswitch: fix race on port output
+
+From: Felix Huettner <felix.huettner@mail.schwarz>
+
+commit 066b86787fa3d97b7aefb5ac0a99a22dad2d15f8 upstream.
+
+assume the following setup on a single machine:
+1. An openvswitch instance with one bridge and default flows
+2. two network namespaces "server" and "client"
+3. two ovs interfaces "server" and "client" on the bridge
+4. for each ovs interface a veth pair with a matching name and 32 rx and
+ tx queues
+5. move the ends of the veth pairs to the respective network namespaces
+6. assign ip addresses to each of the veth ends in the namespaces (needs
+ to be the same subnet)
+7. start some http server on the server network namespace
+8. test if a client in the client namespace can reach the http server
+
+when following the actions below the host has a chance of getting a cpu
+stuck in a infinite loop:
+1. send a large amount of parallel requests to the http server (around
+ 3000 curls should work)
+2. in parallel delete the network namespace (do not delete interfaces or
+ stop the server, just kill the namespace)
+
+there is a low chance that this will cause the below kernel cpu stuck
+message. If this does not happen just retry.
+Below there is also the output of bpftrace for the functions mentioned
+in the output.
+
+The series of events happening here is:
+1. the network namespace is deleted calling
+ `unregister_netdevice_many_notify` somewhere in the process
+2. this sets first `NETREG_UNREGISTERING` on both ends of the veth and
+ then runs `synchronize_net`
+3. it then calls `call_netdevice_notifiers` with `NETDEV_UNREGISTER`
+4. this is then handled by `dp_device_event` which calls
+ `ovs_netdev_detach_dev` (if a vport is found, which is the case for
+ the veth interface attached to ovs)
+5. this removes the rx_handlers of the device but does not prevent
+ packages to be sent to the device
+6. `dp_device_event` then queues the vport deletion to work in
+ background as a ovs_lock is needed that we do not hold in the
+ unregistration path
+7. `unregister_netdevice_many_notify` continues to call
+ `netdev_unregister_kobject` which sets `real_num_tx_queues` to 0
+8. port deletion continues (but details are not relevant for this issue)
+9. at some future point the background task deletes the vport
+
+If after 7. but before 9. a packet is send to the ovs vport (which is
+not deleted at this point in time) which forwards it to the
+`dev_queue_xmit` flow even though the device is unregistering.
+In `skb_tx_hash` (which is called in the `dev_queue_xmit`) path there is
+a while loop (if the packet has a rx_queue recorded) that is infinite if
+`dev->real_num_tx_queues` is zero.
+
+To prevent this from happening we update `do_output` to handle devices
+without carrier the same as if the device is not found (which would
+be the code path after 9. is done).
+
+Additionally we now produce a warning in `skb_tx_hash` if we will hit
+the infinite loop.
+
+bpftrace (first word is function name):
+
+__dev_queue_xmit server: real_num_tx_queues: 1, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 1
+netdev_core_pick_tx server: addr: 0xffff9f0a46d4a000 real_num_tx_queues: 1, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 1
+dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 2, reg_state: 1
+synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024
+synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024
+synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024
+synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024
+dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 6, reg_state: 2
+ovs_netdev_detach_dev server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, reg_state: 2
+netdev_rx_handler_unregister server: real_num_tx_queues: 1, cpu: 9, pid: 21024, tid: 21024, reg_state: 2
+synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024
+netdev_rx_handler_unregister ret server: real_num_tx_queues: 1, cpu: 9, pid: 21024, tid: 21024, reg_state: 2
+dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 27, reg_state: 2
+dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 22, reg_state: 2
+dp_device_event server: real_num_tx_queues: 1 cpu 9, pid: 21024, tid: 21024, event 18, reg_state: 2
+netdev_unregister_kobject: real_num_tx_queues: 1, cpu: 9, pid: 21024, tid: 21024
+synchronize_rcu_expedited: cpu 9, pid: 21024, tid: 21024
+ovs_vport_send server: real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 2
+__dev_queue_xmit server: real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 2
+netdev_core_pick_tx server: addr: 0xffff9f0a46d4a000 real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024, skb_addr: 0xffff9edb6f207000, reg_state: 2
+broken device server: real_num_tx_queues: 0, cpu: 2, pid: 28024, tid: 28024
+ovs_dp_detach_port server: real_num_tx_queues: 0 cpu 9, pid: 9124, tid: 9124, reg_state: 2
+synchronize_rcu_expedited: cpu 9, pid: 33604, tid: 33604
+
+stuck message:
+
+watchdog: BUG: soft lockup - CPU#5 stuck for 26s! [curl:1929279]
+Modules linked in: veth pktgen bridge stp llc ip_set_hash_net nft_counter xt_set nft_compat nf_tables ip_set_hash_ip ip_set nfnetlink_cttimeout nfnetlink openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 tls binfmt_misc nls_iso8859_1 input_leds joydev serio_raw dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua sch_fq_codel drm efi_pstore virtio_rng ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear hid_generic usbhid hid crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel virtio_net ahci net_failover crypto_simd cryptd psmouse libahci virtio_blk failover
+CPU: 5 PID: 1929279 Comm: curl Not tainted 5.15.0-67-generic #74-Ubuntu
+Hardware name: OpenStack Foundation OpenStack Nova, BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+RIP: 0010:netdev_pick_tx+0xf1/0x320
+Code: 00 00 8d 48 ff 0f b7 c1 66 39 ca 0f 86 e9 01 00 00 45 0f b7 ff 41 39 c7 0f 87 5b 01 00 00 44 29 f8 41 39 c7 0f 87 4f 01 00 00 <eb> f2 0f 1f 44 00 00 49 8b 94 24 28 04 00 00 48 85 d2 0f 84 53 01
+RSP: 0018:ffffb78b40298820 EFLAGS: 00000246
+RAX: 0000000000000000 RBX: ffff9c8773adc2e0 RCX: 000000000000083f
+RDX: 0000000000000000 RSI: ffff9c8773adc2e0 RDI: ffff9c870a25e000
+RBP: ffffb78b40298858 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffff9c870a25e000
+R13: ffff9c870a25e000 R14: ffff9c87fe043480 R15: 0000000000000000
+FS: 00007f7b80008f00(0000) GS:ffff9c8e5f740000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f7b80f6a0b0 CR3: 0000000329d66000 CR4: 0000000000350ee0
+Call Trace:
+ <IRQ>
+ netdev_core_pick_tx+0xa4/0xb0
+ __dev_queue_xmit+0xf8/0x510
+ ? __bpf_prog_exit+0x1e/0x30
+ dev_queue_xmit+0x10/0x20
+ ovs_vport_send+0xad/0x170 [openvswitch]
+ do_output+0x59/0x180 [openvswitch]
+ do_execute_actions+0xa80/0xaa0 [openvswitch]
+ ? kfree+0x1/0x250
+ ? kfree+0x1/0x250
+ ? kprobe_perf_func+0x4f/0x2b0
+ ? flow_lookup.constprop.0+0x5c/0x110 [openvswitch]
+ ovs_execute_actions+0x4c/0x120 [openvswitch]
+ ovs_dp_process_packet+0xa1/0x200 [openvswitch]
+ ? ovs_ct_update_key.isra.0+0xa8/0x120 [openvswitch]
+ ? ovs_ct_fill_key+0x1d/0x30 [openvswitch]
+ ? ovs_flow_key_extract+0x2db/0x350 [openvswitch]
+ ovs_vport_receive+0x77/0xd0 [openvswitch]
+ ? __htab_map_lookup_elem+0x4e/0x60
+ ? bpf_prog_680e8aff8547aec1_kfree+0x3b/0x714
+ ? trace_call_bpf+0xc8/0x150
+ ? kfree+0x1/0x250
+ ? kfree+0x1/0x250
+ ? kprobe_perf_func+0x4f/0x2b0
+ ? kprobe_perf_func+0x4f/0x2b0
+ ? __mod_memcg_lruvec_state+0x63/0xe0
+ netdev_port_receive+0xc4/0x180 [openvswitch]
+ ? netdev_port_receive+0x180/0x180 [openvswitch]
+ netdev_frame_hook+0x1f/0x40 [openvswitch]
+ __netif_receive_skb_core.constprop.0+0x23d/0xf00
+ __netif_receive_skb_one_core+0x3f/0xa0
+ __netif_receive_skb+0x15/0x60
+ process_backlog+0x9e/0x170
+ __napi_poll+0x33/0x180
+ net_rx_action+0x126/0x280
+ ? ttwu_do_activate+0x72/0xf0
+ __do_softirq+0xd9/0x2e7
+ ? rcu_report_exp_cpu_mult+0x1b0/0x1b0
+ do_softirq+0x7d/0xb0
+ </IRQ>
+ <TASK>
+ __local_bh_enable_ip+0x54/0x60
+ ip_finish_output2+0x191/0x460
+ __ip_finish_output+0xb7/0x180
+ ip_finish_output+0x2e/0xc0
+ ip_output+0x78/0x100
+ ? __ip_finish_output+0x180/0x180
+ ip_local_out+0x5e/0x70
+ __ip_queue_xmit+0x184/0x440
+ ? tcp_syn_options+0x1f9/0x300
+ ip_queue_xmit+0x15/0x20
+ __tcp_transmit_skb+0x910/0x9c0
+ ? __mod_memcg_state+0x44/0xa0
+ tcp_connect+0x437/0x4e0
+ ? ktime_get_with_offset+0x60/0xf0
+ tcp_v4_connect+0x436/0x530
+ __inet_stream_connect+0xd4/0x3a0
+ ? kprobe_perf_func+0x4f/0x2b0
+ ? aa_sk_perm+0x43/0x1c0
+ inet_stream_connect+0x3b/0x60
+ __sys_connect_file+0x63/0x70
+ __sys_connect+0xa6/0xd0
+ ? setfl+0x108/0x170
+ ? do_fcntl+0xe8/0x5a0
+ __x64_sys_connect+0x18/0x20
+ do_syscall_64+0x5c/0xc0
+ ? __x64_sys_fcntl+0xa9/0xd0
+ ? exit_to_user_mode_prepare+0x37/0xb0
+ ? syscall_exit_to_user_mode+0x27/0x50
+ ? do_syscall_64+0x69/0xc0
+ ? __sys_setsockopt+0xea/0x1e0
+ ? exit_to_user_mode_prepare+0x37/0xb0
+ ? syscall_exit_to_user_mode+0x27/0x50
+ ? __x64_sys_setsockopt+0x1f/0x30
+ ? do_syscall_64+0x69/0xc0
+ ? irqentry_exit+0x1d/0x30
+ ? exc_page_fault+0x89/0x170
+ entry_SYSCALL_64_after_hwframe+0x61/0xcb
+RIP: 0033:0x7f7b8101c6a7
+Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34 24 89
+RSP: 002b:00007ffffd6b2198 EFLAGS: 00000246 ORIG_RAX: 000000000000002a
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7b8101c6a7
+RDX: 0000000000000010 RSI: 00007ffffd6b2360 RDI: 0000000000000005
+RBP: 0000561f1370d560 R08: 00002795ad21d1ac R09: 0030312e302e302e
+R10: 00007ffffd73f080 R11: 0000000000000246 R12: 0000561f1370c410
+R13: 0000000000000000 R14: 0000000000000005 R15: 0000000000000000
+ </TASK>
+
+Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
+Co-developed-by: Luca Czesla <luca.czesla@mail.schwarz>
+Signed-off-by: Luca Czesla <luca.czesla@mail.schwarz>
+Signed-off-by: Felix Huettner <felix.huettner@mail.schwarz>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/ZC0pBXBAgh7c76CA@kernel-bug-kernel-bug
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Carlos Soto <carlos.soto@broadcom.com>
+Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 1 +
+ net/openvswitch/actions.c | 2 +-
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3186,6 +3186,7 @@ static u16 skb_tx_hash(const struct net_
+ }
+
+ if (skb_rx_queue_recorded(skb)) {
++ BUILD_BUG_ON_INVALID(qcount == 0);
+ hash = skb_get_rx_queue(skb);
+ if (hash >= qoffset)
+ hash -= qoffset;
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -912,7 +912,7 @@ static void do_output(struct datapath *d
+ {
+ struct vport *vport = ovs_vport_rcu(dp, out_port);
+
+- if (likely(vport)) {
++ if (likely(vport && netif_carrier_ok(vport->dev))) {
+ u16 mru = OVS_CB(skb)->mru;
+ u32 cutlen = OVS_CB(skb)->cutlen;
+
--- /dev/null
+From 47e55e4b410f7d552e43011baa5be1aab4093990 Mon Sep 17 00:00:00 2001
+From: Ilya Maximets <i.maximets@ovn.org>
+Date: Thu, 9 Jan 2025 13:21:24 +0100
+Subject: openvswitch: fix lockup on tx to unregistering netdev with carrier
+
+From: Ilya Maximets <i.maximets@ovn.org>
+
+commit 47e55e4b410f7d552e43011baa5be1aab4093990 upstream.
+
+Commit in a fixes tag attempted to fix the issue in the following
+sequence of calls:
+
+ do_output
+ -> ovs_vport_send
+ -> dev_queue_xmit
+ -> __dev_queue_xmit
+ -> netdev_core_pick_tx
+ -> skb_tx_hash
+
+When device is unregistering, the 'dev->real_num_tx_queues' goes to
+zero and the 'while (unlikely(hash >= qcount))' loop inside the
+'skb_tx_hash' becomes infinite, locking up the core forever.
+
+But unfortunately, checking just the carrier status is not enough to
+fix the issue, because some devices may still be in unregistering
+state while reporting carrier status OK.
+
+One example of such device is a net/dummy. It sets carrier ON
+on start, but it doesn't implement .ndo_stop to set the carrier off.
+And it makes sense, because dummy doesn't really have a carrier.
+Therefore, while this device is unregistering, it's still easy to hit
+the infinite loop in the skb_tx_hash() from the OVS datapath. There
+might be other drivers that do the same, but dummy by itself is
+important for the OVS ecosystem, because it is frequently used as a
+packet sink for tcpdump while debugging OVS deployments. And when the
+issue is hit, the only way to recover is to reboot.
+
+Fix that by also checking if the device is running. The running
+state is handled by the net core during unregistering, so it covers
+unregistering case better, and we don't really need to send packets
+to devices that are not running anyway.
+
+While only checking the running state might be enough, the carrier
+check is preserved. The running and the carrier states seem disjoined
+throughout the code and different drivers. And other core functions
+like __dev_direct_xmit() check both before attempting to transmit
+a packet. So, it seems safer to check both flags in OVS as well.
+
+Fixes: 066b86787fa3 ("net: openvswitch: fix race on port output")
+Reported-by: Friedrich Weber <f.weber@proxmox.com>
+Closes: https://mail.openvswitch.org/pipermail/ovs-discuss/2025-January/053423.html
+Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
+Tested-by: Friedrich Weber <f.weber@proxmox.com>
+Reviewed-by: Aaron Conole <aconole@redhat.com>
+Link: https://patch.msgid.link/20250109122225.4034688-1-i.maximets@ovn.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Carlos Soto <carlos.soto@broadcom.com>
+Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/actions.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -912,7 +912,9 @@ static void do_output(struct datapath *d
+ {
+ struct vport *vport = ovs_vport_rcu(dp, out_port);
+
+- if (likely(vport && netif_carrier_ok(vport->dev))) {
++ if (likely(vport &&
++ netif_running(vport->dev) &&
++ netif_carrier_ok(vport->dev))) {
+ u16 mru = OVS_CB(skb)->mru;
+ u32 cutlen = OVS_CB(skb)->cutlen;
+
smb-client-fix-null-ptr-deref-in-crypto_aead_setkey.patch
bpf-avoid-holding-freeze_mutex-during-mmap-operation.patch
bpf-check-rcu_read_lock_trace_held-before-calling-bpf-map-helpers.patch
+blk-cgroup-support-to-track-if-policy-is-online.patch
+blk-iocost-do-not-warn-if-iocg-was-already-offlined.patch
+ext4-fix-timer-use-after-free-on-failed-mount.patch
+mmc-sdhci-brcmstb-add-ability-to-increase-max-clock-rate-for-72116b0.patch
+mmc-sdhci-brcmstb-initialize-base_clk-to-null-in-sdhci_brcmstb_probe.patch
+mmc-sdhci-brcmstb-use-clk_get_rate-base_clk-in-pm-resume.patch
+net-mlx5e-fix-use-after-free-of-encap-entry-in-neigh-update-handler.patch
+ipvs-properly-dereference-pe-in-ip_vs_add_service.patch
+net-openvswitch-fix-race-on-port-output.patch
+openvswitch-fix-lockup-on-tx-to-unregistering-netdev-with-carrier.patch