--- /dev/null
+From 519054b3d9d59cf90ea518d6ad6a11f05fb35b3e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 13:32:46 +0100
+Subject: ASoC: Intel: bytcr_rt5640: Add quirk for the Advantech MICA-071
+ tablet
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+[ Upstream commit a1dec9d70b6ad97087b60b81d2492134a84208c6 ]
+
+The Advantech MICA-071 tablet deviates from the defaults for
+a non CR Bay Trail based tablet in several ways:
+
+1. It uses an analog MIC on IN3 rather then using DMIC1
+2. It only has 1 speaker
+3. It needs the OVCD current threshold to be set to 1500uA instead of
+ the default 2000uA to reliable differentiate between headphones vs
+ headsets
+
+Add a quirk with these settings for this tablet.
+
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+Link: https://lore.kernel.org/r/20221213123246.11226-1-hdegoede@redhat.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/intel/boards/bytcr_rt5640.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
+index 7830d014d924..6a8edb0a559d 100644
+--- a/sound/soc/intel/boards/bytcr_rt5640.c
++++ b/sound/soc/intel/boards/bytcr_rt5640.c
+@@ -428,6 +428,21 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
+ BYT_RT5640_SSP0_AIF1 |
+ BYT_RT5640_MCLK_EN),
+ },
++ {
++ /* Advantech MICA-071 */
++ .matches = {
++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Advantech"),
++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MICA-071"),
++ },
++ /* OVCD Th = 1500uA to reliable detect head-phones vs -set */
++ .driver_data = (void *)(BYT_RT5640_IN3_MAP |
++ BYT_RT5640_JD_SRC_JD2_IN4N |
++ BYT_RT5640_OVCD_TH_1500UA |
++ BYT_RT5640_OVCD_SF_0P75 |
++ BYT_RT5640_MONO_SPEAKER |
++ BYT_RT5640_DIFF_MIC |
++ BYT_RT5640_MCLK_EN),
++ },
+ {
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"),
+--
+2.35.1
+
--- /dev/null
+From e8522a77d18724fa5fbc00e97a697aa6264d3e14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 16:47:00 -0800
+Subject: bpf: pull before calling skb_postpull_rcsum()
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 54c3f1a81421f85e60ae2eaae7be3727a09916ee ]
+
+Anand hit a BUG() when pulling off headers on egress to a SW tunnel.
+We get to skb_checksum_help() with an invalid checksum offset
+(commit d7ea0d9df2a6 ("net: remove two BUG() from skb_checksum_help()")
+converted those BUGs to WARN_ONs()).
+He points out oddness in how skb_postpull_rcsum() gets used.
+Indeed looks like we should pull before "postpull", otherwise
+the CHECKSUM_PARTIAL fixup from skb_postpull_rcsum() will not
+be able to do its job:
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start_offset(skb) < 0)
+ skb->ip_summed = CHECKSUM_NONE;
+
+Reported-by: Anand Parthasarathy <anpartha@meta.com>
+Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Link: https://lore.kernel.org/r/20221220004701.402165-1-kuba@kernel.org
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/filter.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/filter.c b/net/core/filter.c
+index ec4f7e68b21a..71fcb4e7edae 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -2795,15 +2795,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
+
+ static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
+ {
++ void *old_data;
++
+ /* skb_ensure_writable() is not needed here, as we're
+ * already working on an uncloned skb.
+ */
+ if (unlikely(!pskb_may_pull(skb, off + len)))
+ return -ENOMEM;
+
+- skb_postpull_rcsum(skb, skb->data + off, len);
+- memmove(skb->data + len, skb->data, off);
++ old_data = skb->data;
+ __skb_pull(skb, len);
++ skb_postpull_rcsum(skb, old_data + off, len);
++ memmove(skb->data, old_data, off);
+
+ return 0;
+ }
+--
+2.35.1
+
--- /dev/null
+From bc57e7edd2dee1b788d8192eb049fca24c4bab97 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Jan 2023 14:51:46 +0800
+Subject: caif: fix memory leak in cfctrl_linkup_request()
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit fe69230f05897b3de758427b574fc98025dfc907 ]
+
+When linktype is unknown or kzalloc failed in cfctrl_linkup_request(),
+pkt is not released. Add release process to error path.
+
+Fixes: b482cd2053e3 ("net-caif: add CAIF core protocol stack")
+Fixes: 8d545c8f958f ("caif: Disconnect without waiting for response")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Link: https://lore.kernel.org/r/20230104065146.1153009-1-shaozhengchao@huawei.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/caif/cfctrl.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
+index 2809cbd6b7f7..d8cb4b2a076b 100644
+--- a/net/caif/cfctrl.c
++++ b/net/caif/cfctrl.c
+@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer,
+ default:
+ pr_warn("Request setup of bad link type = %d\n",
+ param->linktype);
++ cfpkt_destroy(pkt);
+ return -EINVAL;
+ }
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+- if (!req)
++ if (!req) {
++ cfpkt_destroy(pkt);
+ return -ENOMEM;
++ }
++
+ req->client_layer = user_layer;
+ req->cmd = CFCTRL_CMD_LINK_SETUP;
+ req->param = *param;
+--
+2.35.1
+
--- /dev/null
+From ac99413d3a22b5c9db7fc55f0ecca8d9798a622f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Nov 2022 10:09:45 +0800
+Subject: dm thin: resume even if in FAIL mode
+
+From: Luo Meng <luomeng12@huawei.com>
+
+[ Upstream commit 19eb1650afeb1aa86151f61900e9e5f1de5d8d02 ]
+
+If a thinpool set fail_io while suspending, resume will fail with:
+ device-mapper: resume ioctl on vg-thinpool failed: Invalid argument
+
+The thin-pool also can't be removed if an in-flight bio is in the
+deferred list.
+
+This can be easily reproduced using:
+
+ echo "offline" > /sys/block/sda/device/state
+ dd if=/dev/zero of=/dev/mapper/thin bs=4K count=1
+ dmsetup suspend /dev/mapper/pool
+ mkfs.ext4 /dev/mapper/thin
+ dmsetup resume /dev/mapper/pool
+
+The root cause is maybe_resize_data_dev() will check fail_io and return
+error before called dm_resume.
+
+Fix this by adding FAIL mode check at the end of pool_preresume().
+
+Cc: stable@vger.kernel.org
+Fixes: da105ed5fd7e ("dm thin metadata: introduce dm_pool_abort_metadata")
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-thin.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
+index 1af5873923e8..4f161725dda0 100644
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -3593,23 +3593,31 @@ static int pool_preresume(struct dm_target *ti)
+ */
+ r = bind_control_target(pool, ti);
+ if (r)
+- return r;
++ goto out;
+
+ dm_pool_register_pre_commit_callback(pool->pmd,
+ metadata_pre_commit_callback, pt);
+
+ r = maybe_resize_data_dev(ti, &need_commit1);
+ if (r)
+- return r;
++ goto out;
+
+ r = maybe_resize_metadata_dev(ti, &need_commit2);
+ if (r)
+- return r;
++ goto out;
+
+ if (need_commit1 || need_commit2)
+ (void) commit(pool);
++out:
++ /*
++ * When a thin-pool is PM_FAIL, it cannot be rebuilt if
++ * bio is in deferred list. Therefore need to return 0
++ * to allow pool_resume() to flush IO.
++ */
++ if (r && get_pool_mode(pool) == PM_FAIL)
++ r = 0;
+
+- return 0;
++ return r;
+ }
+
+ static void pool_suspend_active_thins(struct pool *pool)
+--
+2.35.1
+
--- /dev/null
+From e507b1ffd90ffc541fe254be4b63d4f3ff420a7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2020 05:08:23 +0000
+Subject: driver core: Fix driver_deferred_probe_check_state() logic
+
+From: John Stultz <john.stultz@linaro.org>
+
+[ Upstream commit c8c43cee29f6ca2575c953ae600263690db28f41 ]
+
+driver_deferred_probe_check_state() has some uninituitive behavior.
+
+* From boot to late_initcall, it returns -EPROBE_DEFER
+
+* From late_initcall to the deferred_probe_timeout (if set)
+ it returns -ENODEV
+
+* If the deferred_probe_timeout it set, after it fires, it
+ returns -ETIMEDOUT
+
+This is a bit confusing, as its useful to have the function
+return -EPROBE_DEFER while the timeout is still running. This
+behavior has resulted in the somwhat duplicative
+driver_deferred_probe_check_state_continue() function being
+added.
+
+Thus this patch tries to improve the logic, so that it behaves
+as such:
+
+* If late_initcall has passed, and modules are not enabled
+ it returns -ENODEV
+
+* If modules are enabled and deferred_probe_timeout is set,
+ it returns -EPROBE_DEFER until the timeout, afterwhich it
+ returns -ETIMEDOUT.
+
+* In all other cases, it returns -EPROBE_DEFER
+
+This will make the deferred_probe_timeout value much more
+functional, and will allow us to consolidate the
+driver_deferred_probe_check_state() and
+driver_deferred_probe_check_state_continue() logic in a later
+patch.
+
+Cc: linux-pm@vger.kernel.org
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Linus Walleij <linus.walleij@linaro.org>
+Cc: Thierry Reding <treding@nvidia.com>
+Cc: Mark Brown <broonie@kernel.org>
+Cc: Liam Girdwood <lgirdwood@gmail.com>
+Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
+Cc: Saravana Kannan <saravanak@google.com>
+Cc: Todd Kjos <tkjos@google.com>
+Cc: Len Brown <len.brown@intel.com>
+Cc: Pavel Machek <pavel@ucw.cz>
+Cc: Ulf Hansson <ulf.hansson@linaro.org>
+Cc: Kevin Hilman <khilman@kernel.org>
+Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
+Cc: Rob Herring <robh@kernel.org>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Link: https://lore.kernel.org/r/20200225050828.56458-2-john.stultz@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/dd.c | 18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/base/dd.c b/drivers/base/dd.c
+index 10063d8a1b7d..2a28e07d2e0b 100644
+--- a/drivers/base/dd.c
++++ b/drivers/base/dd.c
+@@ -237,24 +237,26 @@ __setup("deferred_probe_timeout=", deferred_probe_timeout_setup);
+
+ static int __driver_deferred_probe_check_state(struct device *dev)
+ {
+- if (!initcalls_done)
+- return -EPROBE_DEFER;
++ if (!IS_ENABLED(CONFIG_MODULES) && initcalls_done)
++ return -ENODEV;
+
+ if (!deferred_probe_timeout) {
+ dev_WARN(dev, "deferred probe timeout, ignoring dependency");
+ return -ETIMEDOUT;
+ }
+
+- return 0;
++ return -EPROBE_DEFER;
+ }
+
+ /**
+ * driver_deferred_probe_check_state() - Check deferred probe state
+ * @dev: device to check
+ *
+- * Returns -ENODEV if init is done and all built-in drivers have had a chance
+- * to probe (i.e. initcalls are done), -ETIMEDOUT if deferred probe debug
+- * timeout has expired, or -EPROBE_DEFER if none of those conditions are met.
++ * Return:
++ * -ENODEV if initcalls have completed and modules are disabled.
++ * -ETIMEDOUT if the deferred probe timeout was set and has expired
++ * and modules are enabled.
++ * -EPROBE_DEFER in other cases.
+ *
+ * Drivers or subsystems can opt-in to calling this function instead of directly
+ * returning -EPROBE_DEFER.
+@@ -264,7 +266,7 @@ int driver_deferred_probe_check_state(struct device *dev)
+ int ret;
+
+ ret = __driver_deferred_probe_check_state(dev);
+- if (ret < 0)
++ if (ret != -ENODEV)
+ return ret;
+
+ dev_warn(dev, "ignoring dependency for device, assuming no driver");
+@@ -292,7 +294,7 @@ int driver_deferred_probe_check_state_continue(struct device *dev)
+ int ret;
+
+ ret = __driver_deferred_probe_check_state(dev);
+- if (ret < 0)
++ if (ret != -ENODEV)
+ return ret;
+
+ return -EPROBE_DEFER;
+--
+2.35.1
+
--- /dev/null
+From cae2bae22ae13e0c31293862dc9addea3118695e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2020 05:08:24 +0000
+Subject: driver core: Set deferred_probe_timeout to a longer default if
+ CONFIG_MODULES is set
+
+From: John Stultz <john.stultz@linaro.org>
+
+[ Upstream commit e2cec7d6853712295cef5377762165a489b2957f ]
+
+When using modules, its common for the modules not to be loaded
+until quite late by userland. With the current code,
+driver_deferred_probe_check_state() will stop returning
+EPROBE_DEFER after late_initcall, which can cause module
+dependency resolution to fail after that.
+
+So allow a longer window of 30 seconds (picked somewhat
+arbitrarily, but influenced by the similar regulator core
+timeout value) in the case where modules are enabled.
+
+Cc: linux-pm@vger.kernel.org
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Linus Walleij <linus.walleij@linaro.org>
+Cc: Thierry Reding <treding@nvidia.com>
+Cc: Mark Brown <broonie@kernel.org>
+Cc: Liam Girdwood <lgirdwood@gmail.com>
+Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
+Cc: Saravana Kannan <saravanak@google.com>
+Cc: Todd Kjos <tkjos@google.com>
+Cc: Len Brown <len.brown@intel.com>
+Cc: Pavel Machek <pavel@ucw.cz>
+Cc: Ulf Hansson <ulf.hansson@linaro.org>
+Cc: Kevin Hilman <khilman@kernel.org>
+Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
+Cc: Rob Herring <robh@kernel.org>
+Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Link: https://lore.kernel.org/r/20200225050828.56458-3-john.stultz@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/dd.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/drivers/base/dd.c b/drivers/base/dd.c
+index 2a28e07d2e0b..490cffffcae4 100644
+--- a/drivers/base/dd.c
++++ b/drivers/base/dd.c
+@@ -224,7 +224,16 @@ static int deferred_devs_show(struct seq_file *s, void *data)
+ }
+ DEFINE_SHOW_ATTRIBUTE(deferred_devs);
+
++#ifdef CONFIG_MODULES
++/*
++ * In the case of modules, set the default probe timeout to
++ * 30 seconds to give userland some time to load needed modules
++ */
++static int deferred_probe_timeout = 30;
++#else
++/* In the case of !modules, no probe timeout needed */
+ static int deferred_probe_timeout = -1;
++#endif
+ static int __init deferred_probe_timeout_setup(char *str)
+ {
+ int timeout;
+--
+2.35.1
+
--- /dev/null
+From 957709588ecf1922dd0ea67c5aa8d5c7a71c5e6d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 12:53:35 +0300
+Subject: drivers/net/bonding/bond_3ad: return when there's no aggregator
+
+From: Daniil Tatianin <d-tatianin@yandex-team.ru>
+
+[ Upstream commit 9c807965483f42df1d053b7436eedd6cf28ece6f ]
+
+Otherwise we would dereference a NULL aggregator pointer when calling
+__set_agg_ports_ready on the line below.
+
+Found by Linux Verification Center (linuxtesting.org) with the SVACE
+static analysis tool.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/bonding/bond_3ad.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index 0d6cd2a4cc41..0c4e6fcac58e 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -1529,6 +1529,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
+ slave_err(bond->dev, port->slave->dev,
+ "Port %d did not find a suitable aggregator\n",
+ port->actor_port_number);
++ return;
+ }
+ }
+ /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE
+--
+2.35.1
+
--- /dev/null
+From ac32335c8c7724003ad254073e7dedbeee1cd13a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Nov 2022 16:15:18 +0300
+Subject: drm/i915: unpin on error in intel_vgpu_shadow_mm_pin()
+
+From: Dan Carpenter <error27@gmail.com>
+
+[ Upstream commit 3792fc508c095abd84b10ceae12bd773e61fdc36 ]
+
+Call intel_vgpu_unpin_mm() on this error path.
+
+Fixes: 418741480809 ("drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled.")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/Y3OQ5tgZIVxyQ/WV@kili
+Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gvt/scheduler.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
+index 058dcd541644..c1dc225d8436 100644
+--- a/drivers/gpu/drm/i915/gvt/scheduler.c
++++ b/drivers/gpu/drm/i915/gvt/scheduler.c
+@@ -632,6 +632,7 @@ static int prepare_workload(struct intel_vgpu_workload *workload)
+
+ if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT ||
+ !workload->shadow_mm->ppgtt_mm.shadowed) {
++ intel_vgpu_unpin_mm(workload->shadow_mm);
+ gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
+ return -EINVAL;
+ }
+--
+2.35.1
+
--- /dev/null
+From 04c215fe03ab45d3fbd55d6d4cb077f0828b777e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 15:43:43 +0800
+Subject: ext4: correct inconsistent error msg in nojournal mode
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 89481b5fa8c0640e62ba84c6020cee895f7ac643 ]
+
+When we used the journal_async_commit mounting option in nojournal mode,
+the kernel told me that "can't mount with journal_checksum", was very
+confusing. I find that when we mount with journal_async_commit, both the
+JOURNAL_ASYNC_COMMIT and EXPLICIT_JOURNAL_CHECKSUM flags are set. However,
+in the error branch, CHECKSUM is checked before ASYNC_COMMIT. As a result,
+the above inconsistency occurs, and the ASYNC_COMMIT branch becomes dead
+code that cannot be executed. Therefore, we exchange the positions of the
+two judgments to make the error msg more accurate.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20221109074343.4184862-1-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/super.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index e3e616c55d0d..932c1619cea5 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -4391,14 +4391,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ goto failed_mount3a;
+ } else {
+ /* Nojournal mode, all journal mount options are illegal */
+- if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
++ if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+- "journal_checksum, fs mounted w/o journal");
++ "journal_async_commit, fs mounted w/o journal");
+ goto failed_mount3a;
+ }
+- if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
++
++ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+- "journal_async_commit, fs mounted w/o journal");
++ "journal_checksum, fs mounted w/o journal");
+ goto failed_mount3a;
+ }
+ if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
+--
+2.35.1
+
--- /dev/null
+From 52350ff80a6fee8019734d107d455f9a4abefec2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Nov 2022 20:39:50 +0100
+Subject: ext4: fix deadlock due to mbcache entry corruption
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit a44e84a9b7764c72896f7241a0ec9ac7e7ef38dd ]
+
+When manipulating xattr blocks, we can deadlock infinitely looping
+inside ext4_xattr_block_set() where we constantly keep finding xattr
+block for reuse in mbcache but we are unable to reuse it because its
+reference count is too big. This happens because cache entry for the
+xattr block is marked as reusable (e_reusable set) although its
+reference count is too big. When this inconsistency happens, this
+inconsistent state is kept indefinitely and so ext4_xattr_block_set()
+keeps retrying indefinitely.
+
+The inconsistent state is caused by non-atomic update of e_reusable bit.
+e_reusable is part of a bitfield and e_reusable update can race with
+update of e_referenced bit in the same bitfield resulting in loss of one
+of the updates. Fix the problem by using atomic bitops instead.
+
+This bug has been around for many years, but it became *much* easier
+to hit after commit 65f8b80053a1 ("ext4: fix race when reusing xattr
+blocks").
+
+Cc: stable@vger.kernel.org
+Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries")
+Fixes: 65f8b80053a1 ("ext4: fix race when reusing xattr blocks")
+Reported-and-tested-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
+Reported-by: Thilo Fromm <t-lo@linux.microsoft.com>
+Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 4 ++--
+ fs/mbcache.c | 14 ++++++++------
+ include/linux/mbcache.h | 9 +++++++--
+ 3 files changed, 17 insertions(+), 10 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 131de3fcd2be..78df2d65998e 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1293,7 +1293,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+ ce = mb_cache_entry_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+- ce->e_reusable = 1;
++ set_bit(MBE_REUSABLE_B, &ce->e_flags);
+ mb_cache_entry_put(ea_block_cache, ce);
+ }
+ }
+@@ -2054,7 +2054,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ }
+ BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
+- ce->e_reusable = 0;
++ clear_bit(MBE_REUSABLE_B, &ce->e_flags);
+ ea_bdebug(new_bh, "reusing; refcount now=%d",
+ ref);
+ ext4_xattr_block_csum_set(inode, new_bh);
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index 950f1829a7fd..7a12ae87c806 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -94,8 +94,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ atomic_set(&entry->e_refcnt, 1);
+ entry->e_key = key;
+ entry->e_value = value;
+- entry->e_reusable = reusable;
+- entry->e_referenced = 0;
++ entry->e_flags = 0;
++ if (reusable)
++ set_bit(MBE_REUSABLE_B, &entry->e_flags);
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+@@ -162,7 +163,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+ while (node) {
+ entry = hlist_bl_entry(node, struct mb_cache_entry,
+ e_hash_list);
+- if (entry->e_key == key && entry->e_reusable &&
++ if (entry->e_key == key &&
++ test_bit(MBE_REUSABLE_B, &entry->e_flags) &&
+ atomic_inc_not_zero(&entry->e_refcnt))
+ goto out;
+ node = node->next;
+@@ -318,7 +320,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
+ void mb_cache_entry_touch(struct mb_cache *cache,
+ struct mb_cache_entry *entry)
+ {
+- entry->e_referenced = 1;
++ set_bit(MBE_REFERENCED_B, &entry->e_flags);
+ }
+ EXPORT_SYMBOL(mb_cache_entry_touch);
+
+@@ -343,9 +345,9 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+ entry = list_first_entry(&cache->c_list,
+ struct mb_cache_entry, e_list);
+ /* Drop initial hash reference if there is no user */
+- if (entry->e_referenced ||
++ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
+ atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
+- entry->e_referenced = 0;
++ clear_bit(MBE_REFERENCED_B, &entry->e_flags);
+ list_move_tail(&entry->e_list, &cache->c_list);
+ continue;
+ }
+diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
+index e9d5ece87794..591bc4cefe1d 100644
+--- a/include/linux/mbcache.h
++++ b/include/linux/mbcache.h
+@@ -10,6 +10,12 @@
+
+ struct mb_cache;
+
++/* Cache entry flags */
++enum {
++ MBE_REFERENCED_B = 0,
++ MBE_REUSABLE_B
++};
++
+ struct mb_cache_entry {
+ /* List of entries in cache - protected by cache->c_list_lock */
+ struct list_head e_list;
+@@ -26,8 +32,7 @@ struct mb_cache_entry {
+ atomic_t e_refcnt;
+ /* Key in hash - stable during lifetime of the entry */
+ u32 e_key;
+- u32 e_referenced:1;
+- u32 e_reusable:1;
++ unsigned long e_flags;
+ /* User provided value - stable during lifetime of the entry */
+ u64 e_value;
+ };
+--
+2.35.1
+
--- /dev/null
+From 1c924039e9d8cb8f9964dd8a5f5c3a811bd37cab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:24 +0200
+Subject: ext4: fix race when reusing xattr blocks
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b ]
+
+When ext4_xattr_block_set() decides to remove xattr block the following
+race can happen:
+
+CPU1 CPU2
+ext4_xattr_block_set() ext4_xattr_release_block()
+ new_bh = ext4_xattr_block_cache_find()
+
+ lock_buffer(bh);
+ ref = le32_to_cpu(BHDR(bh)->h_refcount);
+ if (ref == 1) {
+ ...
+ mb_cache_entry_delete();
+ unlock_buffer(bh);
+ ext4_free_blocks();
+ ...
+ ext4_forget(..., bh, ...);
+ jbd2_journal_revoke(..., bh);
+
+ ext4_journal_get_write_access(..., new_bh, ...)
+ do_get_write_access()
+ jbd2_journal_cancel_revoke(..., new_bh);
+
+Later the code in ext4_xattr_block_set() finds out the block got freed
+and cancels reusal of the block but the revoke stays canceled and so in
+case of block reuse and journal replay the filesystem can get corrupted.
+If the race works out slightly differently, we can also hit assertions
+in the jbd2 code.
+
+Fix the problem by making sure that once matching mbcache entry is
+found, code dropping the last xattr block reference (or trying to modify
+xattr block in place) waits until the mbcache entry reference is
+dropped. This way code trying to reuse xattr block is protected from
+someone trying to drop the last reference to xattr block.
+
+Reported-and-tested-by: Ritesh Harjani <ritesh.list@gmail.com>
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 67 +++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 45 insertions(+), 22 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 24a4396933c0..131de3fcd2be 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -437,9 +437,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ /* Remove entry from mbcache when EA inode is getting evicted */
+ void ext4_evict_ea_inode(struct inode *inode)
+ {
+- if (EA_INODE_CACHE(inode))
+- mb_cache_entry_delete(EA_INODE_CACHE(inode),
+- ext4_xattr_inode_get_hash(inode), inode->i_ino);
++ struct mb_cache_entry *oe;
++
++ if (!EA_INODE_CACHE(inode))
++ return;
++ /* Wait for entry to get unused so that we can remove it */
++ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
++ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
++ mb_cache_entry_wait_unused(oe);
++ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
++ }
+ }
+
+ static int
+@@ -1241,6 +1248,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+ if (error)
+ goto out;
+
++retry_ref:
+ lock_buffer(bh);
+ hash = le32_to_cpu(BHDR(bh)->h_hash);
+ ref = le32_to_cpu(BHDR(bh)->h_refcount);
+@@ -1250,9 +1258,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect freed block
+ */
+- if (ea_block_cache)
+- mb_cache_entry_delete(ea_block_cache, hash,
+- bh->b_blocknr);
++ if (ea_block_cache) {
++ struct mb_cache_entry *oe;
++
++ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
++ bh->b_blocknr);
++ if (oe) {
++ unlock_buffer(bh);
++ mb_cache_entry_wait_unused(oe);
++ mb_cache_entry_put(ea_block_cache, oe);
++ goto retry_ref;
++ }
++ }
+ get_bh(bh);
+ unlock_buffer(bh);
+
+@@ -1879,9 +1896,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ * ext4_xattr_block_set() to reliably detect modified
+ * block
+ */
+- if (ea_block_cache)
+- mb_cache_entry_delete(ea_block_cache, hash,
+- bs->bh->b_blocknr);
++ if (ea_block_cache) {
++ struct mb_cache_entry *oe;
++
++ oe = mb_cache_entry_delete_or_get(ea_block_cache,
++ hash, bs->bh->b_blocknr);
++ if (oe) {
++ /*
++ * Xattr block is getting reused. Leave
++ * it alone.
++ */
++ mb_cache_entry_put(ea_block_cache, oe);
++ goto clone_block;
++ }
++ }
+ ea_bdebug(bs->bh, "modifying in-place");
+ error = ext4_xattr_set_entry(i, s, handle, inode,
+ true /* is_block */);
+@@ -1897,6 +1925,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ goto cleanup;
+ goto inserted;
+ }
++clone_block:
+ unlock_buffer(bs->bh);
+ ea_bdebug(bs->bh, "cloning");
+ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+@@ -2002,18 +2031,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ lock_buffer(new_bh);
+ /*
+ * We have to be careful about races with
+- * freeing, rehashing or adding references to
+- * xattr block. Once we hold buffer lock xattr
+- * block's state is stable so we can check
+- * whether the block got freed / rehashed or
+- * not. Since we unhash mbcache entry under
+- * buffer lock when freeing / rehashing xattr
+- * block, checking whether entry is still
+- * hashed is reliable. Same rules hold for
+- * e_reusable handling.
++ * adding references to xattr block. Once we
++ * hold buffer lock xattr block's state is
++ * stable so we can check the additional
++ * reference fits.
+ */
+- if (hlist_bl_unhashed(&ce->e_hash_list) ||
+- !ce->e_reusable) {
++ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
++ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
+ /*
+ * Undo everything and check mbcache
+ * again.
+@@ -2028,9 +2052,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ new_bh = NULL;
+ goto inserted;
+ }
+- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
++ if (ref == EXT4_XATTR_REFCOUNT_MAX)
+ ce->e_reusable = 0;
+ ea_bdebug(new_bh, "reusing; refcount now=%d",
+ ref);
+--
+2.35.1
+
--- /dev/null
+From 85bcc3077f205090a967f47e87165324a172359d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Sep 2022 22:15:12 +0800
+Subject: ext4: goto right label 'failed_mount3a'
+
+From: Jason Yan <yanaijie@huawei.com>
+
+[ Upstream commit 43bd6f1b49b61f43de4d4e33661b8dbe8c911f14 ]
+
+Before these two branches neither loaded the journal nor created the
+xattr cache. So the right label to goto is 'failed_mount3a'. Although
+this did not cause any issues because the error handler validated if the
+pointer is null. However this still made me confused when reading
+the code. So it's still worth to modify to goto the right label.
+
+Signed-off-by: Jason Yan <yanaijie@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20220916141527.1012715-2-yanaijie@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 89481b5fa8c0 ("ext4: correct inconsistent error msg in nojournal mode")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/super.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 5fdf584101e6..e3e616c55d0d 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -4388,30 +4388,30 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ ext4_has_feature_journal_needs_recovery(sb)) {
+ ext4_msg(sb, KERN_ERR, "required journal recovery "
+ "suppressed and not mounted read-only");
+- goto failed_mount_wq;
++ goto failed_mount3a;
+ } else {
+ /* Nojournal mode, all journal mount options are illegal */
+ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_checksum, fs mounted w/o journal");
+- goto failed_mount_wq;
++ goto failed_mount3a;
+ }
+ if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_async_commit, fs mounted w/o journal");
+- goto failed_mount_wq;
++ goto failed_mount3a;
+ }
+ if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "commit=%lu, fs mounted w/o journal",
+ sbi->s_commit_interval / HZ);
+- goto failed_mount_wq;
++ goto failed_mount3a;
+ }
+ if (EXT4_MOUNT_DATA_FLAGS &
+ (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "data=, fs mounted w/o journal");
+- goto failed_mount_wq;
++ goto failed_mount3a;
+ }
+ sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
+ clear_opt(sb, JOURNAL_CHECKSUM);
+--
+2.35.1
+
--- /dev/null
+From 9fe868e42d4ee415f73237f1bc063c2467deb6f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:22 +0200
+Subject: ext4: remove EA inode entry from mbcache on inode eviction
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 6bc0d63dad7f9f54d381925ee855b402f652fa39 ]
+
+Currently we remove EA inode from mbcache as soon as its xattr refcount
+drops to zero. However there can be pending attempts to reuse the inode
+and thus refcount handling code has to handle the situation when
+refcount increases from zero anyway. So save some work and just keep EA
+inode in mbcache until it is getting evicted. At that moment we are sure
+following iget() of EA inode will fail anyway (or wait for eviction to
+finish and load things from the disk again) and so removing mbcache
+entry at that moment is fine and simplifies the code a bit.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-3-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 2 ++
+ fs/ext4/xattr.c | 24 ++++++++----------------
+ fs/ext4/xattr.h | 1 +
+ 3 files changed, 11 insertions(+), 16 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index a26e5ed6d61c..b38427b8d083 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -207,6 +207,8 @@ void ext4_evict_inode(struct inode *inode)
+
+ trace_ext4_evict_inode(inode);
+
++ if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
++ ext4_evict_ea_inode(inode);
+ if (inode->i_nlink) {
+ /*
+ * When journalling data dirty buffers are tracked only in the
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 8f0e8b60ea20..4ade87a32315 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -434,6 +434,14 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ return err;
+ }
+
++/* Remove entry from mbcache when EA inode is getting evicted */
++void ext4_evict_ea_inode(struct inode *inode)
++{
++ if (EA_INODE_CACHE(inode))
++ mb_cache_entry_delete(EA_INODE_CACHE(inode),
++ ext4_xattr_inode_get_hash(inode), inode->i_ino);
++}
++
+ static int
+ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
+ struct ext4_xattr_entry *entry, void *buffer,
+@@ -1019,10 +1027,8 @@ static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
+ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+ int ref_change)
+ {
+- struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
+ struct ext4_iloc iloc;
+ s64 ref_count;
+- u32 hash;
+ int ret;
+
+ inode_lock(ea_inode);
+@@ -1045,14 +1051,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+
+ set_nlink(ea_inode, 1);
+ ext4_orphan_del(handle, ea_inode);
+-
+- if (ea_inode_cache) {
+- hash = ext4_xattr_inode_get_hash(ea_inode);
+- mb_cache_entry_create(ea_inode_cache,
+- GFP_NOFS, hash,
+- ea_inode->i_ino,
+- true /* reusable */);
+- }
+ }
+ } else {
+ WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
+@@ -1065,12 +1063,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+
+ clear_nlink(ea_inode);
+ ext4_orphan_add(handle, ea_inode);
+-
+- if (ea_inode_cache) {
+- hash = ext4_xattr_inode_get_hash(ea_inode);
+- mb_cache_entry_delete(ea_inode_cache, hash,
+- ea_inode->i_ino);
+- }
+ }
+ }
+
+diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
+index 990084e00374..231ef308d10c 100644
+--- a/fs/ext4/xattr.h
++++ b/fs/ext4/xattr.h
+@@ -190,6 +190,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
+
+ extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+ struct ext4_inode *raw_inode, handle_t *handle);
++extern void ext4_evict_ea_inode(struct inode *inode);
+
+ extern const struct xattr_handler *ext4_xattr_handlers[];
+
+--
+2.35.1
+
--- /dev/null
+From 263e64afed205ce0446d5281716cf8d4b564408c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:23 +0200
+Subject: ext4: unindent codeblock in ext4_xattr_block_set()
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit fd48e9acdf26d0cbd80051de07d4a735d05d29b2 ]
+
+Remove unnecessary else (and thus indentation level) from a code block
+in ext4_xattr_block_set(). It will also make following code changes
+easier. No functional changes.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-4-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 77 ++++++++++++++++++++++++-------------------------
+ 1 file changed, 38 insertions(+), 39 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 4ade87a32315..24a4396933c0 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1863,6 +1863,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ #define header(x) ((struct ext4_xattr_header *)(x))
+
+ if (s->base) {
++ int offset = (char *)s->here - bs->bh->b_data;
++
+ BUFFER_TRACE(bs->bh, "get_write_access");
+ error = ext4_journal_get_write_access(handle, bs->bh);
+ if (error)
+@@ -1894,49 +1896,46 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ if (error)
+ goto cleanup;
+ goto inserted;
+- } else {
+- int offset = (char *)s->here - bs->bh->b_data;
++ }
++ unlock_buffer(bs->bh);
++ ea_bdebug(bs->bh, "cloning");
++ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
++ error = -ENOMEM;
++ if (s->base == NULL)
++ goto cleanup;
++ s->first = ENTRY(header(s->base)+1);
++ header(s->base)->h_refcount = cpu_to_le32(1);
++ s->here = ENTRY(s->base + offset);
++ s->end = s->base + bs->bh->b_size;
+
+- unlock_buffer(bs->bh);
+- ea_bdebug(bs->bh, "cloning");
+- s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+- error = -ENOMEM;
+- if (s->base == NULL)
++ /*
++ * If existing entry points to an xattr inode, we need
++ * to prevent ext4_xattr_set_entry() from decrementing
++ * ref count on it because the reference belongs to the
++ * original block. In this case, make the entry look
++ * like it has an empty value.
++ */
++ if (!s->not_found && s->here->e_value_inum) {
++ ea_ino = le32_to_cpu(s->here->e_value_inum);
++ error = ext4_xattr_inode_iget(inode, ea_ino,
++ le32_to_cpu(s->here->e_hash),
++ &tmp_inode);
++ if (error)
+ goto cleanup;
+- s->first = ENTRY(header(s->base)+1);
+- header(s->base)->h_refcount = cpu_to_le32(1);
+- s->here = ENTRY(s->base + offset);
+- s->end = s->base + bs->bh->b_size;
+
+- /*
+- * If existing entry points to an xattr inode, we need
+- * to prevent ext4_xattr_set_entry() from decrementing
+- * ref count on it because the reference belongs to the
+- * original block. In this case, make the entry look
+- * like it has an empty value.
+- */
+- if (!s->not_found && s->here->e_value_inum) {
+- ea_ino = le32_to_cpu(s->here->e_value_inum);
+- error = ext4_xattr_inode_iget(inode, ea_ino,
+- le32_to_cpu(s->here->e_hash),
+- &tmp_inode);
+- if (error)
+- goto cleanup;
+-
+- if (!ext4_test_inode_state(tmp_inode,
+- EXT4_STATE_LUSTRE_EA_INODE)) {
+- /*
+- * Defer quota free call for previous
+- * inode until success is guaranteed.
+- */
+- old_ea_inode_quota = le32_to_cpu(
+- s->here->e_value_size);
+- }
+- iput(tmp_inode);
+-
+- s->here->e_value_inum = 0;
+- s->here->e_value_size = 0;
++ if (!ext4_test_inode_state(tmp_inode,
++ EXT4_STATE_LUSTRE_EA_INODE)) {
++ /*
++ * Defer quota free call for previous
++ * inode until success is guaranteed.
++ */
++ old_ea_inode_quota = le32_to_cpu(
++ s->here->e_value_size);
+ }
++ iput(tmp_inode);
++
++ s->here->e_value_inum = 0;
++ s->here->e_value_size = 0;
+ }
+ } else {
+ /* Allocate a buffer where we construct the new block. */
+--
+2.35.1
+
--- /dev/null
+From e4ead00e1f53862a44eabf274f6a31a135bf6169 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 May 2022 11:01:20 +0800
+Subject: ext4: use kmemdup() to replace kmalloc + memcpy
+
+From: Shuqi Zhang <zhangshuqi3@huawei.com>
+
+[ Upstream commit 4efd9f0d120c55b08852ee5605dbb02a77089a5d ]
+
+Replace kmalloc + memcpy with kmemdup()
+
+Signed-off-by: Shuqi Zhang <zhangshuqi3@huawei.com>
+Reviewed-by: Ritesh Harjani <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20220525030120.803330-1-zhangshuqi3@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 0081eab74b20..8f0e8b60ea20 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1907,11 +1907,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+
+ unlock_buffer(bs->bh);
+ ea_bdebug(bs->bh, "cloning");
+- s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
++ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+ error = -ENOMEM;
+ if (s->base == NULL)
+ goto cleanup;
+- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
+ s->first = ENTRY(header(s->base)+1);
+ header(s->base)->h_refcount = cpu_to_le32(1);
+ s->here = ENTRY(s->base + offset);
+--
+2.35.1
+
--- /dev/null
+From 0b8b519467585e564104f6fdeb80aeba83ac0bc9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 7 Feb 2021 11:04:23 -0800
+Subject: ext4: use memcpy_to_page() in pagecache_write()
+
+From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+
+[ Upstream commit bd256fda92efe97b692dc72e246d35fa724d42d8 ]
+
+Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Link: https://lore.kernel.org/r/20210207190425.38107-7-chaitanya.kulkarni@wdc.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 956510c0c743 ("fs: ext4: initialize fsdata in pagecache_write()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/verity.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
+index 6a30e54c1128..0c67b7060eb4 100644
+--- a/fs/ext4/verity.c
++++ b/fs/ext4/verity.c
+@@ -80,7 +80,6 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+ void *fsdata;
+- void *addr;
+ int res;
+
+ res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+@@ -88,9 +87,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ if (res)
+ return res;
+
+- addr = kmap_atomic(page);
+- memcpy(addr + offset_in_page(pos), buf, n);
+- kunmap_atomic(addr);
++ memcpy_to_page(page, offset_in_page(pos), buf, n);
+
+ res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
+ page, fsdata);
+--
+2.35.1
+
--- /dev/null
+From 3311e1aa8f8765434003bbf646bd4ddd16c9ead1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Nov 2022 12:21:30 +0100
+Subject: fs: ext4: initialize fsdata in pagecache_write()
+
+From: Alexander Potapenko <glider@google.com>
+
+[ Upstream commit 956510c0c7439e90b8103aaeaf4da92878c622f0 ]
+
+When aops->write_begin() does not initialize fsdata, KMSAN reports
+an error passing the latter to aops->write_end().
+
+Fix this by unconditionally initializing fsdata.
+
+Cc: Eric Biggers <ebiggers@kernel.org>
+Fixes: c93d8f885809 ("ext4: add basic fs-verity support")
+Reported-by: syzbot+9767be679ef5016b6082@syzkaller.appspotmail.com
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20221121112134.407362-1-glider@google.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/verity.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
+index 0c67b7060eb4..9879ea046e5a 100644
+--- a/fs/ext4/verity.c
++++ b/fs/ext4/verity.c
+@@ -79,7 +79,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+- void *fsdata;
++ void *fsdata = NULL;
+ int res;
+
+ res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+--
+2.35.1
+
--- /dev/null
+From de47e33cc38f2766203fc065a3323b800c7d59ee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 06:23:03 +0000
+Subject: KVM: nVMX: Properly expose ENABLE_USR_WAIT_PAUSE control to L1
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 31de69f4eea77b28a9724b3fa55aae104fc91fc7 ]
+
+Set ENABLE_USR_WAIT_PAUSE in KVM's supported VMX MSR configuration if the
+feature is supported in hardware and enabled in KVM's base, non-nested
+configuration, i.e. expose ENABLE_USR_WAIT_PAUSE to L1 if it's supported.
+This fixes a bug where saving/restoring, i.e. migrating, a vCPU will fail
+if WAITPKG (the associated CPUID feature) is enabled for the vCPU, and
+obviously allows L1 to enable the feature for L2.
+
+KVM already effectively exposes ENABLE_USR_WAIT_PAUSE to L1 by stuffing
+the allowed-1 control ina vCPU's virtual MSR_IA32_VMX_PROCBASED_CTLS2 when
+updating secondary controls in response to KVM_SET_CPUID(2), but (a) that
+depends on flawed code (KVM shouldn't touch VMX MSRs in response to CPUID
+updates) and (b) runs afoul of vmx_restore_control_msr()'s restriction
+that the guest value must be a strict subset of the supported host value.
+
+Although no past commit explicitly enabled nested support for WAITPKG,
+doing so is safe and functionally correct from an architectural
+perspective as no additional KVM support is needed to virtualize TPAUSE,
+UMONITOR, and UMWAIT for L2 relative to L1, and KVM already forwards
+VM-Exits to L1 as necessary (commit bf653b78f960, "KVM: vmx: Introduce
+handle_unexpected_vmexit and handle WAITPKG vmexit").
+
+Note, KVM always keeps the hosts MSR_IA32_UMWAIT_CONTROL resident in
+hardware, i.e. always runs both L1 and L2 with the host's power management
+settings for TPAUSE and UMWAIT. See commit bf09fb6cba4f ("KVM: VMX: Stop
+context switching MSR_IA32_UMWAIT_CONTROL") for more details.
+
+Fixes: e69e72faa3a0 ("KVM: x86: Add support for user wait instructions")
+Cc: stable@vger.kernel.org
+Reported-by: Aaron Lewis <aaronlewis@google.com>
+Reported-by: Yu Zhang <yu.c.zhang@linux.intel.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Message-Id: <20221213062306.667649-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 1dd693d18395..00f3336194a9 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -5916,7 +5916,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
+ SECONDARY_EXEC_RDRAND_EXITING |
+ SECONDARY_EXEC_ENABLE_INVPCID |
+ SECONDARY_EXEC_RDSEED_EXITING |
+- SECONDARY_EXEC_XSAVES;
++ SECONDARY_EXEC_XSAVES |
++ SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+
+ /*
+ * We can emulate "VMCS shadowing," even if the hardware
+--
+2.35.1
+
--- /dev/null
+From 3c1597d7ec45ff271a75103e2981fc3b6c58a96d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Nov 2019 17:59:59 -0500
+Subject: KVM: retpolines: x86: eliminate retpoline from vmx.c exit handlers
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+[ Upstream commit 4289d2728664fc1fb49cfc76a6a7d96d913b921f ]
+
+It's enough to check the exit value and issue a direct call to avoid
+the retpoline for all the common vmexit reasons.
+
+Of course CONFIG_RETPOLINE already forbids gcc to use indirect jumps
+while compiling all switch() statements, however switch() would still
+allow the compiler to bisect the case value. It's more efficient to
+prioritize the most frequent vmexits instead.
+
+The halt may be slow paths from the point of the guest, but not
+necessarily so from the point of the host if the host runs at full CPU
+capacity and no host CPU is ever left idle.
+
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Stable-dep-of: 31de69f4eea7 ("KVM: nVMX: Properly expose ENABLE_USR_WAIT_PAUSE control to L1")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 0fae9b448ab9..668505c6abe9 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6015,9 +6015,23 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
+ }
+
+ if (exit_reason < kvm_vmx_max_exit_handlers
+- && kvm_vmx_exit_handlers[exit_reason])
++ && kvm_vmx_exit_handlers[exit_reason]) {
++#ifdef CONFIG_RETPOLINE
++ if (exit_reason == EXIT_REASON_MSR_WRITE)
++ return kvm_emulate_wrmsr(vcpu);
++ else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
++ return handle_preemption_timer(vcpu);
++ else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT)
++ return handle_interrupt_window(vcpu);
++ else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
++ return handle_external_interrupt(vcpu);
++ else if (exit_reason == EXIT_REASON_HLT)
++ return kvm_emulate_halt(vcpu);
++ else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
++ return handle_ept_misconfig(vcpu);
++#endif
+ return kvm_vmx_exit_handlers[exit_reason](vcpu);
+- else {
++ } else {
+ vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
+ exit_reason);
+ dump_vmcs();
+--
+2.35.1
+
--- /dev/null
+From 8f9706aac0be8c39909b04db5082921bafb8b19e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Dec 2019 16:45:26 +0800
+Subject: KVM: VMX: Fix the spelling of CPU_BASED_USE_TSC_OFFSETTING
+
+From: Xiaoyao Li <xiaoyao.li@intel.com>
+
+[ Upstream commit 5e3d394fdd9e6b49cd8b28d85adff100a5bddc66 ]
+
+The mis-spelling is found by checkpatch.pl, so fix them.
+
+Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Stable-dep-of: 31de69f4eea7 ("KVM: nVMX: Properly expose ENABLE_USR_WAIT_PAUSE control to L1")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/vmx.h | 2 +-
+ arch/x86/kvm/vmx/nested.c | 8 ++++----
+ arch/x86/kvm/vmx/vmx.c | 6 +++---
+ tools/testing/selftests/kvm/include/x86_64/vmx.h | 2 +-
+ tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 2 +-
+ 5 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
+index 06d4420508c5..d716fe938fc0 100644
+--- a/arch/x86/include/asm/vmx.h
++++ b/arch/x86/include/asm/vmx.h
+@@ -20,7 +20,7 @@
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+ #define CPU_BASED_INTR_WINDOW_EXITING 0x00000004
+-#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
++#define CPU_BASED_USE_TSC_OFFSETTING 0x00000008
+ #define CPU_BASED_HLT_EXITING 0x00000080
+ #define CPU_BASED_INVLPG_EXITING 0x00000200
+ #define CPU_BASED_MWAIT_EXITING 0x00000400
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index dca2c78db5d0..1dd693d18395 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -3090,7 +3090,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ }
+
+ enter_guest_mode(vcpu);
+- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
++ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
+ vcpu->arch.tsc_offset += vmcs12->tsc_offset;
+
+ if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
+@@ -3154,7 +3154,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ * 26.7 "VM-entry failures during or after loading guest state".
+ */
+ vmentry_fail_vmexit_guest_mode:
+- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
++ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
+ vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
+ leave_guest_mode(vcpu);
+
+@@ -4073,7 +4073,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
+ if (nested_cpu_has_preemption_timer(vmcs12))
+ hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
+
+- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
++ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
+ vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
+
+ if (likely(!vmx->fail)) {
+@@ -5870,7 +5870,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
+ CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
+ msrs->procbased_ctls_high &=
+ CPU_BASED_INTR_WINDOW_EXITING |
+- CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETING |
++ CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
+ CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING |
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 470a8f9a0046..df77207d93b0 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -1780,7 +1780,7 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ if (is_guest_mode(vcpu) &&
+- (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
++ (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
+ return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
+
+ return vcpu->arch.tsc_offset;
+@@ -1798,7 +1798,7 @@ static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+ * to the newly set TSC to get L2's TSC.
+ */
+ if (is_guest_mode(vcpu) &&
+- (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
++ (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
+ g_tsc_offset = vmcs12->tsc_offset;
+
+ trace_kvm_write_tsc_offset(vcpu->vcpu_id,
+@@ -2425,7 +2425,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
+ CPU_BASED_CR3_STORE_EXITING |
+ CPU_BASED_UNCOND_IO_EXITING |
+ CPU_BASED_MOV_DR_EXITING |
+- CPU_BASED_USE_TSC_OFFSETING |
++ CPU_BASED_USE_TSC_OFFSETTING |
+ CPU_BASED_MWAIT_EXITING |
+ CPU_BASED_MONITOR_EXITING |
+ CPU_BASED_INVLPG_EXITING |
+diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
+index 7eb38451c359..3d27069b9ed9 100644
+--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
++++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
+@@ -19,7 +19,7 @@
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+ #define CPU_BASED_INTR_WINDOW_EXITING 0x00000004
+-#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
++#define CPU_BASED_USE_TSC_OFFSETTING 0x00000008
+ #define CPU_BASED_HLT_EXITING 0x00000080
+ #define CPU_BASED_INVLPG_EXITING 0x00000200
+ #define CPU_BASED_MWAIT_EXITING 0x00000400
+diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+index 5590fd2bcf87..69e482a95c47 100644
+--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
++++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+@@ -98,7 +98,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
++ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+--
+2.35.1
+
--- /dev/null
+From e916ed024c3ded636f11be23596b2f86bfcb8053 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Dec 2019 16:45:24 +0800
+Subject: KVM: VMX: Rename INTERRUPT_PENDING to INTERRUPT_WINDOW
+
+From: Xiaoyao Li <xiaoyao.li@intel.com>
+
+[ Upstream commit 9dadc2f918df26e64aa04794cdb4d8667c934f47 ]
+
+Rename interrupt-windown exiting related definitions to match the
+latest Intel SDM. No functional changes.
+
+Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Stable-dep-of: 31de69f4eea7 ("KVM: nVMX: Properly expose ENABLE_USR_WAIT_PAUSE control to L1")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/vmx.h | 2 +-
+ arch/x86/include/uapi/asm/vmx.h | 4 ++--
+ arch/x86/kvm/vmx/nested.c | 12 ++++++------
+ arch/x86/kvm/vmx/vmx.c | 10 +++++-----
+ tools/arch/x86/include/uapi/asm/vmx.h | 4 ++--
+ tools/testing/selftests/kvm/include/x86_64/vmx.h | 4 ++--
+ 6 files changed, 18 insertions(+), 18 deletions(-)
+
+diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
+index 1835767aa335..5acda8d9b9a7 100644
+--- a/arch/x86/include/asm/vmx.h
++++ b/arch/x86/include/asm/vmx.h
+@@ -19,7 +19,7 @@
+ /*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+-#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
++#define CPU_BASED_INTR_WINDOW_EXITING 0x00000004
+ #define CPU_BASED_USE_TSC_OFFSETING 0x00000008
+ #define CPU_BASED_HLT_EXITING 0x00000080
+ #define CPU_BASED_INVLPG_EXITING 0x00000200
+diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
+index 3eb8411ab60e..e95b72ec19bc 100644
+--- a/arch/x86/include/uapi/asm/vmx.h
++++ b/arch/x86/include/uapi/asm/vmx.h
+@@ -33,7 +33,7 @@
+ #define EXIT_REASON_TRIPLE_FAULT 2
+ #define EXIT_REASON_INIT_SIGNAL 3
+
+-#define EXIT_REASON_PENDING_INTERRUPT 7
++#define EXIT_REASON_INTERRUPT_WINDOW 7
+ #define EXIT_REASON_NMI_WINDOW 8
+ #define EXIT_REASON_TASK_SWITCH 9
+ #define EXIT_REASON_CPUID 10
+@@ -94,7 +94,7 @@
+ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
+ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
+ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \
+- { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
++ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \
+ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
+ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
+ { EXIT_REASON_CPUID, "CPUID" }, \
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 6ab0410f1030..ee768f977a0a 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2073,7 +2073,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+ * EXEC CONTROLS
+ */
+ exec_control = vmx_exec_control(vmx); /* L0's desires */
+- exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
++ exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
+ exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+ exec_control &= ~CPU_BASED_TPR_SHADOW;
+ exec_control |= vmcs12->cpu_based_vm_exec_control;
+@@ -3039,7 +3039,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ u32 exit_qual;
+
+ evaluate_pending_interrupts = exec_controls_get(vmx) &
+- (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
++ (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_VIRTUAL_NMI_PENDING);
+ if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
+ evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
+
+@@ -3268,7 +3268,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
+ if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
+ !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
+ !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_NMI_PENDING) &&
+- !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING) &&
++ !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) &&
+ (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
+ vmx->nested.nested_run_pending = 0;
+ return kvm_vcpu_halt(vcpu);
+@@ -5376,8 +5376,8 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
+ return false;
+ case EXIT_REASON_TRIPLE_FAULT:
+ return true;
+- case EXIT_REASON_PENDING_INTERRUPT:
+- return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
++ case EXIT_REASON_INTERRUPT_WINDOW:
++ return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
+ case EXIT_REASON_NMI_WINDOW:
+ return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
+ case EXIT_REASON_TASK_SWITCH:
+@@ -5869,7 +5869,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
+ msrs->procbased_ctls_low =
+ CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
+ msrs->procbased_ctls_high &=
+- CPU_BASED_VIRTUAL_INTR_PENDING |
++ CPU_BASED_INTR_WINDOW_EXITING |
+ CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
+ CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 668505c6abe9..51aa5851011c 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -4458,7 +4458,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+
+ static void enable_irq_window(struct kvm_vcpu *vcpu)
+ {
+- exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
++ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
+ }
+
+ static void enable_nmi_window(struct kvm_vcpu *vcpu)
+@@ -5082,7 +5082,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
+
+ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
+ {
+- exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
++ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
+
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+@@ -5316,7 +5316,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
+ WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
+
+ intr_window_requested = exec_controls_get(vmx) &
+- CPU_BASED_VIRTUAL_INTR_PENDING;
++ CPU_BASED_INTR_WINDOW_EXITING;
+
+ while (vmx->emulation_required && count-- != 0) {
+ if (intr_window_requested && vmx_interrupt_allowed(vcpu))
+@@ -5640,7 +5640,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
+ [EXIT_REASON_CPUID] = kvm_emulate_cpuid,
+ [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr,
+ [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
+- [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
++ [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window,
+ [EXIT_REASON_HLT] = kvm_emulate_halt,
+ [EXIT_REASON_INVD] = handle_invd,
+ [EXIT_REASON_INVLPG] = handle_invlpg,
+@@ -6021,7 +6021,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
+ return kvm_emulate_wrmsr(vcpu);
+ else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
+ return handle_preemption_timer(vcpu);
+- else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT)
++ else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW)
+ return handle_interrupt_window(vcpu);
+ else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+ return handle_external_interrupt(vcpu);
+diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
+index 3eb8411ab60e..e95b72ec19bc 100644
+--- a/tools/arch/x86/include/uapi/asm/vmx.h
++++ b/tools/arch/x86/include/uapi/asm/vmx.h
+@@ -33,7 +33,7 @@
+ #define EXIT_REASON_TRIPLE_FAULT 2
+ #define EXIT_REASON_INIT_SIGNAL 3
+
+-#define EXIT_REASON_PENDING_INTERRUPT 7
++#define EXIT_REASON_INTERRUPT_WINDOW 7
+ #define EXIT_REASON_NMI_WINDOW 8
+ #define EXIT_REASON_TASK_SWITCH 9
+ #define EXIT_REASON_CPUID 10
+@@ -94,7 +94,7 @@
+ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
+ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
+ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \
+- { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
++ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \
+ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
+ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
+ { EXIT_REASON_CPUID, "CPUID" }, \
+diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
+index f52e0ba84fed..c6e442d7a241 100644
+--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
++++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
+@@ -18,7 +18,7 @@
+ /*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+-#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
++#define CPU_BASED_INTR_WINDOW_EXITING 0x00000004
+ #define CPU_BASED_USE_TSC_OFFSETING 0x00000008
+ #define CPU_BASED_HLT_EXITING 0x00000080
+ #define CPU_BASED_INVLPG_EXITING 0x00000200
+@@ -103,7 +103,7 @@
+ #define EXIT_REASON_EXCEPTION_NMI 0
+ #define EXIT_REASON_EXTERNAL_INTERRUPT 1
+ #define EXIT_REASON_TRIPLE_FAULT 2
+-#define EXIT_REASON_PENDING_INTERRUPT 7
++#define EXIT_REASON_INTERRUPT_WINDOW 7
+ #define EXIT_REASON_NMI_WINDOW 8
+ #define EXIT_REASON_TASK_SWITCH 9
+ #define EXIT_REASON_CPUID 10
+--
+2.35.1
+
--- /dev/null
+From 1049ab38cbf6c1232aac92176a3d4e63f690d93e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Dec 2019 16:45:25 +0800
+Subject: KVM: VMX: Rename NMI_PENDING to NMI_WINDOW
+
+From: Xiaoyao Li <xiaoyao.li@intel.com>
+
+[ Upstream commit 4e2a0bc56ad197e5ccfab8395649b681067fe8cb ]
+
+Rename the NMI-window exiting related definitions to match the latest
+Intel SDM. No functional changes.
+
+Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Stable-dep-of: 31de69f4eea7 ("KVM: nVMX: Properly expose ENABLE_USR_WAIT_PAUSE control to L1")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/vmx.h | 2 +-
+ arch/x86/kvm/vmx/nested.c | 12 ++++++------
+ arch/x86/kvm/vmx/vmx.c | 4 ++--
+ tools/testing/selftests/kvm/include/x86_64/vmx.h | 2 +-
+ 4 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
+index 5acda8d9b9a7..06d4420508c5 100644
+--- a/arch/x86/include/asm/vmx.h
++++ b/arch/x86/include/asm/vmx.h
+@@ -31,7 +31,7 @@
+ #define CPU_BASED_CR8_LOAD_EXITING 0x00080000
+ #define CPU_BASED_CR8_STORE_EXITING 0x00100000
+ #define CPU_BASED_TPR_SHADOW 0x00200000
+-#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
++#define CPU_BASED_NMI_WINDOW_EXITING 0x00400000
+ #define CPU_BASED_MOV_DR_EXITING 0x00800000
+ #define CPU_BASED_UNCOND_IO_EXITING 0x01000000
+ #define CPU_BASED_USE_IO_BITMAPS 0x02000000
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index ee768f977a0a..dca2c78db5d0 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2074,7 +2074,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+ */
+ exec_control = vmx_exec_control(vmx); /* L0's desires */
+ exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
+- exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
++ exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
+ exec_control &= ~CPU_BASED_TPR_SHADOW;
+ exec_control |= vmcs12->cpu_based_vm_exec_control;
+
+@@ -2459,7 +2459,7 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
+ return -EINVAL;
+
+ if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
+- nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)))
++ nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
+ return -EINVAL;
+
+ return 0;
+@@ -3039,7 +3039,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ u32 exit_qual;
+
+ evaluate_pending_interrupts = exec_controls_get(vmx) &
+- (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_VIRTUAL_NMI_PENDING);
++ (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
+ if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
+ evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
+
+@@ -3267,7 +3267,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
+ */
+ if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
+ !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
+- !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_NMI_PENDING) &&
++ !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) &&
+ !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) &&
+ (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
+ vmx->nested.nested_run_pending = 0;
+@@ -5379,7 +5379,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
+ case EXIT_REASON_INTERRUPT_WINDOW:
+ return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
+ case EXIT_REASON_NMI_WINDOW:
+- return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
++ return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
+ case EXIT_REASON_TASK_SWITCH:
+ return true;
+ case EXIT_REASON_CPUID:
+@@ -5870,7 +5870,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
+ CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
+ msrs->procbased_ctls_high &=
+ CPU_BASED_INTR_WINDOW_EXITING |
+- CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
++ CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETING |
+ CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING |
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 51aa5851011c..470a8f9a0046 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -4469,7 +4469,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
+ return;
+ }
+
+- exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
++ exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
+ }
+
+ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
+@@ -5295,7 +5295,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
+ static int handle_nmi_window(struct kvm_vcpu *vcpu)
+ {
+ WARN_ON_ONCE(!enable_vnmi);
+- exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
++ exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
+ ++vcpu->stat.nmi_window_exits;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
+index c6e442d7a241..7eb38451c359 100644
+--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
++++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
+@@ -30,7 +30,7 @@
+ #define CPU_BASED_CR8_LOAD_EXITING 0x00080000
+ #define CPU_BASED_CR8_STORE_EXITING 0x00100000
+ #define CPU_BASED_TPR_SHADOW 0x00200000
+-#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
++#define CPU_BASED_NMI_WINDOW_EXITING 0x00400000
+ #define CPU_BASED_MOV_DR_EXITING 0x00800000
+ #define CPU_BASED_UNCOND_IO_EXITING 0x01000000
+ #define CPU_BASED_USE_IO_BITMAPS 0x02000000
+--
+2.35.1
+
--- /dev/null
+From ee16a936986468f7909687e3530a8912664707e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Nov 2019 17:59:58 -0500
+Subject: KVM: x86: optimize more exit handlers in vmx.c
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+[ Upstream commit f399e60c45f6b6e6ad6dfcedff1dd6386e086b0b ]
+
+Eliminate wasteful call/ret non RETPOLINE case and unnecessary fentry
+dynamic tracing hooking points.
+
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Stable-dep-of: 31de69f4eea7 ("KVM: nVMX: Properly expose ENABLE_USR_WAIT_PAUSE control to L1")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 30 +++++-------------------------
+ 1 file changed, 5 insertions(+), 25 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 52f024eeac3d..0fae9b448ab9 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -4802,7 +4802,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
+ return 0;
+ }
+
+-static int handle_external_interrupt(struct kvm_vcpu *vcpu)
++static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
+ {
+ ++vcpu->stat.irq_exits;
+ return 1;
+@@ -5074,21 +5074,6 @@ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
+ vmcs_writel(GUEST_DR7, val);
+ }
+
+-static int handle_cpuid(struct kvm_vcpu *vcpu)
+-{
+- return kvm_emulate_cpuid(vcpu);
+-}
+-
+-static int handle_rdmsr(struct kvm_vcpu *vcpu)
+-{
+- return kvm_emulate_rdmsr(vcpu);
+-}
+-
+-static int handle_wrmsr(struct kvm_vcpu *vcpu)
+-{
+- return kvm_emulate_wrmsr(vcpu);
+-}
+-
+ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
+ {
+ kvm_apic_update_ppr(vcpu);
+@@ -5105,11 +5090,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
+ return 1;
+ }
+
+-static int handle_halt(struct kvm_vcpu *vcpu)
+-{
+- return kvm_emulate_halt(vcpu);
+-}
+-
+ static int handle_vmcall(struct kvm_vcpu *vcpu)
+ {
+ return kvm_emulate_hypercall(vcpu);
+@@ -5657,11 +5637,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
+ [EXIT_REASON_IO_INSTRUCTION] = handle_io,
+ [EXIT_REASON_CR_ACCESS] = handle_cr,
+ [EXIT_REASON_DR_ACCESS] = handle_dr,
+- [EXIT_REASON_CPUID] = handle_cpuid,
+- [EXIT_REASON_MSR_READ] = handle_rdmsr,
+- [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
++ [EXIT_REASON_CPUID] = kvm_emulate_cpuid,
++ [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr,
++ [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
+ [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
+- [EXIT_REASON_HLT] = handle_halt,
++ [EXIT_REASON_HLT] = kvm_emulate_halt,
+ [EXIT_REASON_INVD] = handle_invd,
+ [EXIT_REASON_INVLPG] = handle_invlpg,
+ [EXIT_REASON_RDPMC] = handle_rdpmc,
+--
+2.35.1
+
--- /dev/null
+From 432f1ccf6545771425ca6cca6e0c1914e0b398c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:21 +0200
+Subject: mbcache: add functions to delete entry if unused
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 3dc96bba65f53daa217f0a8f43edad145286a8f5 ]
+
+Add function mb_cache_entry_delete_or_get() to delete mbcache entry if
+it is unused and also add a function to wait for entry to become unused
+- mb_cache_entry_wait_unused(). We do not share code between the two
+deleting function as one of them will go away soon.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-2-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/mbcache.c | 66 +++++++++++++++++++++++++++++++++++++++--
+ include/linux/mbcache.h | 10 ++++++-
+ 2 files changed, 73 insertions(+), 3 deletions(-)
+
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index cfc28129fb6f..2010bc80a3f2 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -11,7 +11,7 @@
+ /*
+ * Mbcache is a simple key-value store. Keys need not be unique, however
+ * key-value pairs are expected to be unique (we use this fact in
+- * mb_cache_entry_delete()).
++ * mb_cache_entry_delete_or_get()).
+ *
+ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+ * Ext4 also uses it for deduplication of xattr values stored in inodes.
+@@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry)
+ }
+ EXPORT_SYMBOL(__mb_cache_entry_free);
+
++/*
++ * mb_cache_entry_wait_unused - wait to be the last user of the entry
++ *
++ * @entry - entry to work on
++ *
++ * Wait to be the last user of the entry.
++ */
++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
++{
++ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
++}
++EXPORT_SYMBOL(mb_cache_entry_wait_unused);
++
+ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+ struct mb_cache_entry *entry,
+ u32 key)
+@@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+ }
+ EXPORT_SYMBOL(mb_cache_entry_get);
+
+-/* mb_cache_entry_delete - remove a cache entry
++/* mb_cache_entry_delete - try to remove a cache entry
+ * @cache - cache we work with
+ * @key - key
+ * @value - value
+@@ -254,6 +267,55 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
+ }
+ EXPORT_SYMBOL(mb_cache_entry_delete);
+
++/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
++ * @cache - cache we work with
++ * @key - key
++ * @value - value
++ *
++ * Remove entry from cache @cache with key @key and value @value. The removal
++ * happens only if the entry is unused. The function returns NULL in case the
++ * entry was successfully removed or there's no entry in cache. Otherwise the
++ * function grabs reference of the entry that we failed to delete because it
++ * still has users and return it.
++ */
++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
++ u32 key, u64 value)
++{
++ struct hlist_bl_node *node;
++ struct hlist_bl_head *head;
++ struct mb_cache_entry *entry;
++
++ head = mb_cache_entry_head(cache, key);
++ hlist_bl_lock(head);
++ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
++ if (entry->e_key == key && entry->e_value == value) {
++ if (atomic_read(&entry->e_refcnt) > 2) {
++ atomic_inc(&entry->e_refcnt);
++ hlist_bl_unlock(head);
++ return entry;
++ }
++ /* We keep hash list reference to keep entry alive */
++ hlist_bl_del_init(&entry->e_hash_list);
++ hlist_bl_unlock(head);
++ spin_lock(&cache->c_list_lock);
++ if (!list_empty(&entry->e_list)) {
++ list_del_init(&entry->e_list);
++ if (!WARN_ONCE(cache->c_entry_count == 0,
++ "mbcache: attempt to decrement c_entry_count past zero"))
++ cache->c_entry_count--;
++ atomic_dec(&entry->e_refcnt);
++ }
++ spin_unlock(&cache->c_list_lock);
++ mb_cache_entry_put(cache, entry);
++ return NULL;
++ }
++ }
++ hlist_bl_unlock(head);
++
++ return NULL;
++}
++EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
++
+ /* mb_cache_entry_touch - cache entry got used
+ * @cache - cache the entry belongs to
+ * @entry - entry that got used
+diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
+index 20f1e3ff6013..8eca7f25c432 100644
+--- a/include/linux/mbcache.h
++++ b/include/linux/mbcache.h
+@@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache);
+ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ u64 value, bool reusable);
+ void __mb_cache_entry_free(struct mb_cache_entry *entry);
++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
+ static inline int mb_cache_entry_put(struct mb_cache *cache,
+ struct mb_cache_entry *entry)
+ {
+- if (!atomic_dec_and_test(&entry->e_refcnt))
++ unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
++
++ if (cnt > 0) {
++ if (cnt <= 3)
++ wake_up_var(&entry->e_refcnt);
+ return 0;
++ }
+ __mb_cache_entry_free(entry);
+ return 1;
+ }
+
++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
++ u32 key, u64 value);
+ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
+ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+ u64 value);
+--
+2.35.1
+
--- /dev/null
+From c7d9d3671c90cfdcc47f373a2da48a51535960bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:29 +0200
+Subject: mbcache: automatically delete entries from cache on freeing
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 307af6c879377c1c63e71cbdd978201f9c7ee8df ]
+
+Use the fact that entries with elevated refcount are not removed from
+the hash and just move removal of the entry from the hash to the entry
+freeing time. When doing this we also change the generic code to hold
+one reference to the cache entry, not two of them, which makes code
+somewhat more obvious.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-10-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/mbcache.c | 108 +++++++++++++++-------------------------
+ include/linux/mbcache.h | 24 ++++++---
+ 2 files changed, 55 insertions(+), 77 deletions(-)
+
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index 2010bc80a3f2..950f1829a7fd 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -90,7 +90,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&entry->e_list);
+- /* One ref for hash, one ref returned */
++ /* Initial hash reference */
+ atomic_set(&entry->e_refcnt, 1);
+ entry->e_key = key;
+ entry->e_value = value;
+@@ -106,21 +106,28 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ }
+ }
+ hlist_bl_add_head(&entry->e_hash_list, head);
+- hlist_bl_unlock(head);
+-
++ /*
++ * Add entry to LRU list before it can be found by
++ * mb_cache_entry_delete() to avoid races
++ */
+ spin_lock(&cache->c_list_lock);
+ list_add_tail(&entry->e_list, &cache->c_list);
+- /* Grab ref for LRU list */
+- atomic_inc(&entry->e_refcnt);
+ cache->c_entry_count++;
+ spin_unlock(&cache->c_list_lock);
++ hlist_bl_unlock(head);
+
+ return 0;
+ }
+ EXPORT_SYMBOL(mb_cache_entry_create);
+
+-void __mb_cache_entry_free(struct mb_cache_entry *entry)
++void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
+ {
++ struct hlist_bl_head *head;
++
++ head = mb_cache_entry_head(cache, entry->e_key);
++ hlist_bl_lock(head);
++ hlist_bl_del(&entry->e_hash_list);
++ hlist_bl_unlock(head);
+ kmem_cache_free(mb_entry_cache, entry);
+ }
+ EXPORT_SYMBOL(__mb_cache_entry_free);
+@@ -134,7 +141,7 @@ EXPORT_SYMBOL(__mb_cache_entry_free);
+ */
+ void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
+ {
+- wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
++ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
+ }
+ EXPORT_SYMBOL(mb_cache_entry_wait_unused);
+
+@@ -155,10 +162,9 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+ while (node) {
+ entry = hlist_bl_entry(node, struct mb_cache_entry,
+ e_hash_list);
+- if (entry->e_key == key && entry->e_reusable) {
+- atomic_inc(&entry->e_refcnt);
++ if (entry->e_key == key && entry->e_reusable &&
++ atomic_inc_not_zero(&entry->e_refcnt))
+ goto out;
+- }
+ node = node->next;
+ }
+ entry = NULL;
+@@ -218,10 +224,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+- if (entry->e_key == key && entry->e_value == value) {
+- atomic_inc(&entry->e_refcnt);
++ if (entry->e_key == key && entry->e_value == value &&
++ atomic_inc_not_zero(&entry->e_refcnt))
+ goto out;
+- }
+ }
+ entry = NULL;
+ out:
+@@ -281,37 +286,25 @@ EXPORT_SYMBOL(mb_cache_entry_delete);
+ struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+ u32 key, u64 value)
+ {
+- struct hlist_bl_node *node;
+- struct hlist_bl_head *head;
+ struct mb_cache_entry *entry;
+
+- head = mb_cache_entry_head(cache, key);
+- hlist_bl_lock(head);
+- hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+- if (entry->e_key == key && entry->e_value == value) {
+- if (atomic_read(&entry->e_refcnt) > 2) {
+- atomic_inc(&entry->e_refcnt);
+- hlist_bl_unlock(head);
+- return entry;
+- }
+- /* We keep hash list reference to keep entry alive */
+- hlist_bl_del_init(&entry->e_hash_list);
+- hlist_bl_unlock(head);
+- spin_lock(&cache->c_list_lock);
+- if (!list_empty(&entry->e_list)) {
+- list_del_init(&entry->e_list);
+- if (!WARN_ONCE(cache->c_entry_count == 0,
+- "mbcache: attempt to decrement c_entry_count past zero"))
+- cache->c_entry_count--;
+- atomic_dec(&entry->e_refcnt);
+- }
+- spin_unlock(&cache->c_list_lock);
+- mb_cache_entry_put(cache, entry);
+- return NULL;
+- }
+- }
+- hlist_bl_unlock(head);
++ entry = mb_cache_entry_get(cache, key, value);
++ if (!entry)
++ return NULL;
+
++ /*
++ * Drop the ref we got from mb_cache_entry_get() and the initial hash
++ * ref if we are the last user
++ */
++ if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
++ return entry;
++
++ spin_lock(&cache->c_list_lock);
++ if (!list_empty(&entry->e_list))
++ list_del_init(&entry->e_list);
++ cache->c_entry_count--;
++ spin_unlock(&cache->c_list_lock);
++ __mb_cache_entry_free(cache, entry);
+ return NULL;
+ }
+ EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
+@@ -343,42 +336,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+ unsigned long nr_to_scan)
+ {
+ struct mb_cache_entry *entry;
+- struct hlist_bl_head *head;
+ unsigned long shrunk = 0;
+
+ spin_lock(&cache->c_list_lock);
+ while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+ entry = list_first_entry(&cache->c_list,
+ struct mb_cache_entry, e_list);
+- if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
++ /* Drop initial hash reference if there is no user */
++ if (entry->e_referenced ||
++ atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
+ entry->e_referenced = 0;
+ list_move_tail(&entry->e_list, &cache->c_list);
+ continue;
+ }
+ list_del_init(&entry->e_list);
+ cache->c_entry_count--;
+- /*
+- * We keep LRU list reference so that entry doesn't go away
+- * from under us.
+- */
+ spin_unlock(&cache->c_list_lock);
+- head = mb_cache_entry_head(cache, entry->e_key);
+- hlist_bl_lock(head);
+- /* Now a reliable check if the entry didn't get used... */
+- if (atomic_read(&entry->e_refcnt) > 2) {
+- hlist_bl_unlock(head);
+- spin_lock(&cache->c_list_lock);
+- list_add_tail(&entry->e_list, &cache->c_list);
+- cache->c_entry_count++;
+- continue;
+- }
+- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+- hlist_bl_del_init(&entry->e_hash_list);
+- atomic_dec(&entry->e_refcnt);
+- }
+- hlist_bl_unlock(head);
+- if (mb_cache_entry_put(cache, entry))
+- shrunk++;
++ __mb_cache_entry_free(cache, entry);
++ shrunk++;
+ cond_resched();
+ spin_lock(&cache->c_list_lock);
+ }
+@@ -470,11 +445,6 @@ void mb_cache_destroy(struct mb_cache *cache)
+ * point.
+ */
+ list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
+- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+- hlist_bl_del_init(&entry->e_hash_list);
+- atomic_dec(&entry->e_refcnt);
+- } else
+- WARN_ON(1);
+ list_del(&entry->e_list);
+ WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+ mb_cache_entry_put(cache, entry);
+diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
+index 8eca7f25c432..e9d5ece87794 100644
+--- a/include/linux/mbcache.h
++++ b/include/linux/mbcache.h
+@@ -13,8 +13,16 @@ struct mb_cache;
+ struct mb_cache_entry {
+ /* List of entries in cache - protected by cache->c_list_lock */
+ struct list_head e_list;
+- /* Hash table list - protected by hash chain bitlock */
++ /*
++ * Hash table list - protected by hash chain bitlock. The entry is
++ * guaranteed to be hashed while e_refcnt > 0.
++ */
+ struct hlist_bl_node e_hash_list;
++ /*
++ * Entry refcount. Once it reaches zero, entry is unhashed and freed.
++ * While refcount > 0, the entry is guaranteed to stay in the hash and
++ * e.g. mb_cache_entry_try_delete() will fail.
++ */
+ atomic_t e_refcnt;
+ /* Key in hash - stable during lifetime of the entry */
+ u32 e_key;
+@@ -29,20 +37,20 @@ void mb_cache_destroy(struct mb_cache *cache);
+
+ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ u64 value, bool reusable);
+-void __mb_cache_entry_free(struct mb_cache_entry *entry);
++void __mb_cache_entry_free(struct mb_cache *cache,
++ struct mb_cache_entry *entry);
+ void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
+-static inline int mb_cache_entry_put(struct mb_cache *cache,
+- struct mb_cache_entry *entry)
++static inline void mb_cache_entry_put(struct mb_cache *cache,
++ struct mb_cache_entry *entry)
+ {
+ unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
+
+ if (cnt > 0) {
+- if (cnt <= 3)
++ if (cnt <= 2)
+ wake_up_var(&entry->e_refcnt);
+- return 0;
++ return;
+ }
+- __mb_cache_entry_free(entry);
+- return 1;
++ __mb_cache_entry_free(cache, entry);
+ }
+
+ struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+--
+2.35.1
+
--- /dev/null
+From 3fdd29921b1dd5c928461ad78613944700e56e38 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:20 +0200
+Subject: mbcache: don't reclaim used entries
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 58318914186c157477b978b1739dfe2f1b9dc0fe ]
+
+Do not reclaim entries that are currently used by somebody from a
+shrinker. Firstly, these entries are likely useful. Secondly, we will
+need to keep such entries to protect pending increment of xattr block
+refcount.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/mbcache.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index 97c54d3a2227..cfc28129fb6f 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -288,7 +288,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+ while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+ entry = list_first_entry(&cache->c_list,
+ struct mb_cache_entry, e_list);
+- if (entry->e_referenced) {
++ if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
+ entry->e_referenced = 0;
+ list_move_tail(&entry->e_list, &cache->c_list);
+ continue;
+@@ -302,6 +302,14 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+ spin_unlock(&cache->c_list_lock);
+ head = mb_cache_entry_head(cache, entry->e_key);
+ hlist_bl_lock(head);
++ /* Now a reliable check if the entry didn't get used... */
++ if (atomic_read(&entry->e_refcnt) > 2) {
++ hlist_bl_unlock(head);
++ spin_lock(&cache->c_list_lock);
++ list_add_tail(&entry->e_list, &cache->c_list);
++ cache->c_entry_count++;
++ continue;
++ }
+ if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+ hlist_bl_del_init(&entry->e_hash_list);
+ atomic_dec(&entry->e_refcnt);
+--
+2.35.1
+
--- /dev/null
+From 6a652141db3eebe1e2ee16009210cb3d4e76d78c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Feb 2021 22:22:14 -0800
+Subject: mm/highmem: Lift memcpy_[to|from]_page to core
+
+From: Ira Weiny <ira.weiny@intel.com>
+
+[ Upstream commit bb90d4bc7b6a536b2e4db45f4763e467c2008251 ]
+
+Working through a conversion to a call kmap_local_page() instead of
+kmap() revealed many places where the pattern kmap/memcpy/kunmap
+occurred.
+
+Eric Biggers, Matthew Wilcox, Christoph Hellwig, Dan Williams, and Al
+Viro all suggested putting this code into helper functions. Al Viro
+further pointed out that these functions already existed in the iov_iter
+code.[1]
+
+Various locations for the lifted functions were considered.
+
+Headers like mm.h or string.h seem ok but don't really portray the
+functionality well. pagemap.h made some sense but is for page cache
+functionality.[2]
+
+Another alternative would be to create a new header for the promoted
+memcpy functions, but it masks the fact that these are designed to copy
+to/from pages using the kernel direct mappings and complicates matters
+with a new header.
+
+Placing these functions in 'highmem.h' is suboptimal especially with the
+changes being proposed in the functionality of kmap. From a caller
+perspective including/using 'highmem.h' implies that the functions
+defined in that header are only required when highmem is in use which is
+increasingly not the case with modern processors. However, highmem.h is
+where all the current functions like this reside (zero_user(),
+clear_highpage(), clear_user_highpage(), copy_user_highpage(), and
+copy_highpage()). So it makes the most sense even though it is
+distasteful for some.[3]
+
+Lift memcpy_to_page() and memcpy_from_page() to pagemap.h.
+
+[1] https://lore.kernel.org/lkml/20201013200149.GI3576660@ZenIV.linux.org.uk/
+ https://lore.kernel.org/lkml/20201013112544.GA5249@infradead.org/
+
+[2] https://lore.kernel.org/lkml/20201208122316.GH7338@casper.infradead.org/
+
+[3] https://lore.kernel.org/lkml/20201013200149.GI3576660@ZenIV.linux.org.uk/#t
+ https://lore.kernel.org/lkml/20201208163814.GN1563847@iweiny-DESK2.sc.intel.com/
+
+Cc: Boris Pismenny <borisp@mellanox.com>
+Cc: Or Gerlitz <gerlitz.or@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Suggested-by: Matthew Wilcox <willy@infradead.org>
+Suggested-by: Christoph Hellwig <hch@infradead.org>
+Suggested-by: Dan Williams <dan.j.williams@intel.com>
+Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
+Suggested-by: Eric Biggers <ebiggers@kernel.org>
+Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Ira Weiny <ira.weiny@intel.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 956510c0c743 ("fs: ext4: initialize fsdata in pagecache_write()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/highmem.h | 18 ++++++++++++++++++
+ lib/iov_iter.c | 14 --------------
+ 2 files changed, 18 insertions(+), 14 deletions(-)
+
+diff --git a/include/linux/highmem.h b/include/linux/highmem.h
+index ea5cdbd8c2c3..900f224bb640 100644
+--- a/include/linux/highmem.h
++++ b/include/linux/highmem.h
+@@ -276,4 +276,22 @@ static inline void copy_highpage(struct page *to, struct page *from)
+
+ #endif
+
++static inline void memcpy_from_page(char *to, struct page *page,
++ size_t offset, size_t len)
++{
++ char *from = kmap_atomic(page);
++
++ memcpy(to, from + offset, len);
++ kunmap_atomic(from);
++}
++
++static inline void memcpy_to_page(struct page *page, size_t offset,
++ const char *from, size_t len)
++{
++ char *to = kmap_atomic(page);
++
++ memcpy(to + offset, from, len);
++ kunmap_atomic(to);
++}
++
+ #endif /* _LINUX_HIGHMEM_H */
+diff --git a/lib/iov_iter.c b/lib/iov_iter.c
+index 9d3bda3d49fe..5c6a0b8a2adb 100644
+--- a/lib/iov_iter.c
++++ b/lib/iov_iter.c
+@@ -455,20 +455,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
+ }
+ EXPORT_SYMBOL(iov_iter_init);
+
+-static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
+-{
+- char *from = kmap_atomic(page);
+- memcpy(to, from + offset, len);
+- kunmap_atomic(from);
+-}
+-
+-static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
+-{
+- char *to = kmap_atomic(page);
+- memcpy(to + offset, from, len);
+- kunmap_atomic(to);
+-}
+-
+ static void memzero_page(struct page *page, size_t offset, size_t len)
+ {
+ char *addr = kmap_atomic(page);
+--
+2.35.1
+
--- /dev/null
+From 0c7034ff703597cb436ff4afd9c3b0037ed5837e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Dec 2022 16:14:47 +0800
+Subject: net: amd-xgbe: add missed tasklet_kill
+
+From: Jiguang Xiao <jiguang.xiao@windriver.com>
+
+[ Upstream commit d530ece70f16f912e1d1bfeea694246ab78b0a4b ]
+
+The driver does not call tasklet_kill in several places.
+Add the calls to fix it.
+
+Fixes: 85b85c853401 ("amd-xgbe: Re-issue interrupt if interrupt status not cleared")
+Signed-off-by: Jiguang Xiao <jiguang.xiao@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 3 +++
+ drivers/net/ethernet/amd/xgbe/xgbe-i2c.c | 4 +++-
+ drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 +++-
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+index 0442d7e1cd20..7f705483c1c5 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -1139,6 +1139,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
+
+ devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
+
++ tasklet_kill(&pdata->tasklet_dev);
++ tasklet_kill(&pdata->tasklet_ecc);
++
+ if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
+ devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
+
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+index 4d9062d35930..530043742a07 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+@@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata)
+ xgbe_i2c_disable(pdata);
+ xgbe_i2c_clear_all_interrupts(pdata);
+
+- if (pdata->dev_irq != pdata->i2c_irq)
++ if (pdata->dev_irq != pdata->i2c_irq) {
+ devm_free_irq(pdata->dev, pdata->i2c_irq, pdata);
++ tasklet_kill(&pdata->tasklet_i2c);
++ }
+ }
+
+ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+index 156a0bc8ab01..97167fc9bebe 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+@@ -1390,8 +1390,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
+ /* Disable auto-negotiation */
+ xgbe_an_disable_all(pdata);
+
+- if (pdata->dev_irq != pdata->an_irq)
++ if (pdata->dev_irq != pdata->an_irq) {
+ devm_free_irq(pdata->dev, pdata->an_irq, pdata);
++ tasklet_kill(&pdata->tasklet_an);
++ }
+
+ pdata->phy_if.phy_impl.stop(pdata);
+
+--
+2.35.1
+
--- /dev/null
+From d80916643dac2b2d3ebce525117cb05e3fde4a01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 14:43:41 +0800
+Subject: net: hns3: add interrupts re-initialization while doing VF FLR
+
+From: Jie Wang <wangjie125@huawei.com>
+
+[ Upstream commit 09e6b30eeb254f1818a008cace3547159e908dfd ]
+
+Currently keep alive message between PF and VF may be lost and the VF is
+unalive in PF. So the VF will not do reset during PF FLR reset process.
+This would make the allocated interrupt resources of VF invalid and VF
+would't receive or respond to PF any more.
+
+So this patch adds VF interrupts re-initialization during VF FLR for VF
+recovery in above cases.
+
+Fixes: 862d969a3a4d ("net: hns3: do VF's pci re-initialization while PF doing FLR")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Hao Lan <lanhao@huawei.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+index 403c1b9cf6ab..48956c30d2ee 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+@@ -2592,7 +2592,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev)
+ struct pci_dev *pdev = hdev->pdev;
+ int ret = 0;
+
+- if (hdev->reset_type == HNAE3_VF_FULL_RESET &&
++ if ((hdev->reset_type == HNAE3_VF_FULL_RESET ||
++ hdev->reset_type == HNAE3_FLR_RESET) &&
+ test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
+ hclgevf_misc_irq_uninit(hdev);
+ hclgevf_uninit_msi(hdev);
+--
+2.35.1
+
--- /dev/null
+From b3cec34c25dd6e26eb88b121ad678d0c1df2bd42 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 10:29:25 +0400
+Subject: net: phy: xgmiitorgmii: Fix refcount leak in xgmiitorgmii_probe
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit d039535850ee47079d59527e96be18d8e0daa84b ]
+
+of_phy_find_device() return device node with refcount incremented.
+Call put_device() to relese it when not needed anymore.
+
+Fixes: ab4e6ee578e8 ("net: phy: xgmiitorgmii: Check phy_driver ready before accessing")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/xilinx_gmii2rgmii.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
+index 151c2a3f0b3a..7a78dfdfa5bd 100644
+--- a/drivers/net/phy/xilinx_gmii2rgmii.c
++++ b/drivers/net/phy/xilinx_gmii2rgmii.c
+@@ -82,6 +82,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
+
+ if (!priv->phy_dev->drv) {
+ dev_info(dev, "Attached phy not ready\n");
++ put_device(&priv->phy_dev->mdio.dev);
+ return -EPROBE_DEFER;
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 5e60452adf97e5eb139296d126fcbcf75c0eaed4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 1 Jan 2023 16:57:43 -0500
+Subject: net: sched: atm: dont intepret cls results when asked to drop
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit a2965c7be0522eaa18808684b7b82b248515511b ]
+
+If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume
+res.class contains a valid pointer
+Fixes: b0188d4dbe5f ("[NET_SCHED]: sch_atm: Lindent")
+
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_atm.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
+index 6385995dc700..34dd0434d99d 100644
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -396,10 +396,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ result = tcf_classify(skb, fl, &res, true);
+ if (result < 0)
+ continue;
++ if (result == TC_ACT_SHOT)
++ goto done;
++
+ flow = (struct atm_flow_data *)res.class;
+ if (!flow)
+ flow = lookup_flow(sch, res.classid);
+- goto done;
++ goto drop;
+ }
+ }
+ flow = NULL;
+--
+2.35.1
+
--- /dev/null
+From 81d14728d7f39dace568263c6a7647b42c6658d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 1 Jan 2023 16:57:44 -0500
+Subject: net: sched: cbq: dont intepret cls results when asked to drop
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit caa4b35b4317d5147b3ab0fbdc9c075c7d2e9c12 ]
+
+If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume that
+res.class contains a valid pointer
+
+Sample splat reported by Kyle Zeng
+
+[ 5.405624] 0: reclassify loop, rule prio 0, protocol 800
+[ 5.406326] ==================================================================
+[ 5.407240] BUG: KASAN: slab-out-of-bounds in cbq_enqueue+0x54b/0xea0
+[ 5.407987] Read of size 1 at addr ffff88800e3122aa by task poc/299
+[ 5.408731]
+[ 5.408897] CPU: 0 PID: 299 Comm: poc Not tainted 5.10.155+ #15
+[ 5.409516] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
+BIOS 1.15.0-1 04/01/2014
+[ 5.410439] Call Trace:
+[ 5.410764] dump_stack+0x87/0xcd
+[ 5.411153] print_address_description+0x7a/0x6b0
+[ 5.411687] ? vprintk_func+0xb9/0xc0
+[ 5.411905] ? printk+0x76/0x96
+[ 5.412110] ? cbq_enqueue+0x54b/0xea0
+[ 5.412323] kasan_report+0x17d/0x220
+[ 5.412591] ? cbq_enqueue+0x54b/0xea0
+[ 5.412803] __asan_report_load1_noabort+0x10/0x20
+[ 5.413119] cbq_enqueue+0x54b/0xea0
+[ 5.413400] ? __kasan_check_write+0x10/0x20
+[ 5.413679] __dev_queue_xmit+0x9c0/0x1db0
+[ 5.413922] dev_queue_xmit+0xc/0x10
+[ 5.414136] ip_finish_output2+0x8bc/0xcd0
+[ 5.414436] __ip_finish_output+0x472/0x7a0
+[ 5.414692] ip_finish_output+0x5c/0x190
+[ 5.414940] ip_output+0x2d8/0x3c0
+[ 5.415150] ? ip_mc_finish_output+0x320/0x320
+[ 5.415429] __ip_queue_xmit+0x753/0x1760
+[ 5.415664] ip_queue_xmit+0x47/0x60
+[ 5.415874] __tcp_transmit_skb+0x1ef9/0x34c0
+[ 5.416129] tcp_connect+0x1f5e/0x4cb0
+[ 5.416347] tcp_v4_connect+0xc8d/0x18c0
+[ 5.416577] __inet_stream_connect+0x1ae/0xb40
+[ 5.416836] ? local_bh_enable+0x11/0x20
+[ 5.417066] ? lock_sock_nested+0x175/0x1d0
+[ 5.417309] inet_stream_connect+0x5d/0x90
+[ 5.417548] ? __inet_stream_connect+0xb40/0xb40
+[ 5.417817] __sys_connect+0x260/0x2b0
+[ 5.418037] __x64_sys_connect+0x76/0x80
+[ 5.418267] do_syscall_64+0x31/0x50
+[ 5.418477] entry_SYSCALL_64_after_hwframe+0x61/0xc6
+[ 5.418770] RIP: 0033:0x473bb7
+[ 5.418952] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00
+00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00
+00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34
+24 89
+[ 5.420046] RSP: 002b:00007fffd20eb0f8 EFLAGS: 00000246 ORIG_RAX:
+000000000000002a
+[ 5.420472] RAX: ffffffffffffffda RBX: 00007fffd20eb578 RCX: 0000000000473bb7
+[ 5.420872] RDX: 0000000000000010 RSI: 00007fffd20eb110 RDI: 0000000000000007
+[ 5.421271] RBP: 00007fffd20eb150 R08: 0000000000000001 R09: 0000000000000004
+[ 5.421671] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
+[ 5.422071] R13: 00007fffd20eb568 R14: 00000000004fc740 R15: 0000000000000002
+[ 5.422471]
+[ 5.422562] Allocated by task 299:
+[ 5.422782] __kasan_kmalloc+0x12d/0x160
+[ 5.423007] kasan_kmalloc+0x5/0x10
+[ 5.423208] kmem_cache_alloc_trace+0x201/0x2e0
+[ 5.423492] tcf_proto_create+0x65/0x290
+[ 5.423721] tc_new_tfilter+0x137e/0x1830
+[ 5.423957] rtnetlink_rcv_msg+0x730/0x9f0
+[ 5.424197] netlink_rcv_skb+0x166/0x300
+[ 5.424428] rtnetlink_rcv+0x11/0x20
+[ 5.424639] netlink_unicast+0x673/0x860
+[ 5.424870] netlink_sendmsg+0x6af/0x9f0
+[ 5.425100] __sys_sendto+0x58d/0x5a0
+[ 5.425315] __x64_sys_sendto+0xda/0xf0
+[ 5.425539] do_syscall_64+0x31/0x50
+[ 5.425764] entry_SYSCALL_64_after_hwframe+0x61/0xc6
+[ 5.426065]
+[ 5.426157] The buggy address belongs to the object at ffff88800e312200
+[ 5.426157] which belongs to the cache kmalloc-128 of size 128
+[ 5.426955] The buggy address is located 42 bytes to the right of
+[ 5.426955] 128-byte region [ffff88800e312200, ffff88800e312280)
+[ 5.427688] The buggy address belongs to the page:
+[ 5.427992] page:000000009875fabc refcount:1 mapcount:0
+mapping:0000000000000000 index:0x0 pfn:0xe312
+[ 5.428562] flags: 0x100000000000200(slab)
+[ 5.428812] raw: 0100000000000200 dead000000000100 dead000000000122
+ffff888007843680
+[ 5.429325] raw: 0000000000000000 0000000000100010 00000001ffffffff
+ffff88800e312401
+[ 5.429875] page dumped because: kasan: bad access detected
+[ 5.430214] page->mem_cgroup:ffff88800e312401
+[ 5.430471]
+[ 5.430564] Memory state around the buggy address:
+[ 5.430846] ffff88800e312180: fc fc fc fc fc fc fc fc fc fc fc fc
+fc fc fc fc
+[ 5.431267] ffff88800e312200: 00 00 00 00 00 00 00 00 00 00 00 00
+00 00 00 fc
+[ 5.431705] >ffff88800e312280: fc fc fc fc fc fc fc fc fc fc fc fc
+fc fc fc fc
+[ 5.432123] ^
+[ 5.432391] ffff88800e312300: 00 00 00 00 00 00 00 00 00 00 00 00
+00 00 00 fc
+[ 5.432810] ffff88800e312380: fc fc fc fc fc fc fc fc fc fc fc fc
+fc fc fc fc
+[ 5.433229] ==================================================================
+[ 5.433648] Disabling lock debugging due to kernel taint
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Kyle Zeng <zengyhkyle@gmail.com>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_cbq.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
+index e5972889cd81..12893dac8461 100644
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -231,6 +231,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+ result = tcf_classify(skb, fl, &res, true);
+ if (!fl || result < 0)
+ goto fallback;
++ if (result == TC_ACT_SHOT)
++ return NULL;
+
+ cl = (void *)res.class;
+ if (!cl) {
+@@ -251,8 +253,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+ case TC_ACT_TRAP:
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
+- case TC_ACT_SHOT:
+- return NULL;
++ fallthrough;
+ case TC_ACT_RECLASSIFY:
+ return cbq_reclassify(skb, cl);
+ }
+--
+2.35.1
+
--- /dev/null
+From a37a00057e8cbf9cb16e562e9209f19202401081 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 11:51:19 +0800
+Subject: net: sched: fix memory leak in tcindex_set_parms
+
+From: Hawkins Jiawei <yin31149@gmail.com>
+
+[ Upstream commit 399ab7fe0fa0d846881685fd4e57e9a8ef7559f7 ]
+
+Syzkaller reports a memory leak as follows:
+====================================
+BUG: memory leak
+unreferenced object 0xffff88810c287f00 (size 256):
+ comm "syz-executor105", pid 3600, jiffies 4294943292 (age 12.990s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace:
+ [<ffffffff814cf9f0>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046
+ [<ffffffff839c9e07>] kmalloc include/linux/slab.h:576 [inline]
+ [<ffffffff839c9e07>] kmalloc_array include/linux/slab.h:627 [inline]
+ [<ffffffff839c9e07>] kcalloc include/linux/slab.h:659 [inline]
+ [<ffffffff839c9e07>] tcf_exts_init include/net/pkt_cls.h:250 [inline]
+ [<ffffffff839c9e07>] tcindex_set_parms+0xa7/0xbe0 net/sched/cls_tcindex.c:342
+ [<ffffffff839caa1f>] tcindex_change+0xdf/0x120 net/sched/cls_tcindex.c:553
+ [<ffffffff8394db62>] tc_new_tfilter+0x4f2/0x1100 net/sched/cls_api.c:2147
+ [<ffffffff8389e91c>] rtnetlink_rcv_msg+0x4dc/0x5d0 net/core/rtnetlink.c:6082
+ [<ffffffff839eba67>] netlink_rcv_skb+0x87/0x1d0 net/netlink/af_netlink.c:2540
+ [<ffffffff839eab87>] netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+ [<ffffffff839eab87>] netlink_unicast+0x397/0x4c0 net/netlink/af_netlink.c:1345
+ [<ffffffff839eb046>] netlink_sendmsg+0x396/0x710 net/netlink/af_netlink.c:1921
+ [<ffffffff8383e796>] sock_sendmsg_nosec net/socket.c:714 [inline]
+ [<ffffffff8383e796>] sock_sendmsg+0x56/0x80 net/socket.c:734
+ [<ffffffff8383eb08>] ____sys_sendmsg+0x178/0x410 net/socket.c:2482
+ [<ffffffff83843678>] ___sys_sendmsg+0xa8/0x110 net/socket.c:2536
+ [<ffffffff838439c5>] __sys_sendmmsg+0x105/0x330 net/socket.c:2622
+ [<ffffffff83843c14>] __do_sys_sendmmsg net/socket.c:2651 [inline]
+ [<ffffffff83843c14>] __se_sys_sendmmsg net/socket.c:2648 [inline]
+ [<ffffffff83843c14>] __x64_sys_sendmmsg+0x24/0x30 net/socket.c:2648
+ [<ffffffff84605fd5>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ [<ffffffff84605fd5>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ [<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
+====================================
+
+Kernel uses tcindex_change() to change an existing
+filter properties.
+
+Yet the problem is that, during the process of changing,
+if `old_r` is retrieved from `p->perfect`, then
+kernel uses tcindex_alloc_perfect_hash() to newly
+allocate filter results, uses tcindex_filter_result_init()
+to clear the old filter result, without destroying
+its tcf_exts structure, which triggers the above memory leak.
+
+To be more specific, there are only two source for the `old_r`,
+according to the tcindex_lookup(). `old_r` is retrieved from
+`p->perfect`, or `old_r` is retrieved from `p->h`.
+
+ * If `old_r` is retrieved from `p->perfect`, kernel uses
+tcindex_alloc_perfect_hash() to newly allocate the
+filter results. Then `r` is assigned with `cp->perfect + handle`,
+which is newly allocated. So condition `old_r && old_r != r` is
+true in this situation, and kernel uses tcindex_filter_result_init()
+to clear the old filter result, without destroying
+its tcf_exts structure
+
+ * If `old_r` is retrieved from `p->h`, then `p->perfect` is NULL
+according to the tcindex_lookup(). Considering that `cp->h`
+is directly copied from `p->h` and `p->perfect` is NULL,
+`r` is assigned with `tcindex_lookup(cp, handle)`, whose value
+should be the same as `old_r`, so condition `old_r && old_r != r`
+is false in this situation, kernel ignores using
+tcindex_filter_result_init() to clear the old filter result.
+
+So only when `old_r` is retrieved from `p->perfect` does kernel use
+tcindex_filter_result_init() to clear the old filter result, which
+triggers the above memory leak.
+
+Considering that there already exists a tc_filter_wq workqueue
+to destroy the old tcindex_data by tcindex_partial_destroy_work()
+at the end of tcindex_set_parms(), this patch solves
+this memory leak bug by removing this old filter result
+clearing part and delegating it to the tc_filter_wq workqueue.
+
+Note that this patch doesn't introduce any other issues. If
+`old_r` is retrieved from `p->perfect`, this patch just
+delegates old filter result clearing part to the
+tc_filter_wq workqueue; If `old_r` is retrieved from `p->h`,
+kernel doesn't reach the old filter result clearing part, so
+removing this part has no effect.
+
+[Thanks to the suggestion from Jakub Kicinski, Cong Wang, Paolo Abeni
+and Dmitry Vyukov]
+
+Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()")
+Link: https://lore.kernel.org/all/0000000000001de5c505ebc9ec59@google.com/
+Reported-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com
+Tested-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com
+Cc: Cong Wang <cong.wang@bytedance.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_tcindex.c | 12 ++----------
+ 1 file changed, 2 insertions(+), 10 deletions(-)
+
+diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
+index 684187a1fdb9..768cf7cf65b4 100644
+--- a/net/sched/cls_tcindex.c
++++ b/net/sched/cls_tcindex.c
+@@ -332,7 +332,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+ struct tcindex_filter_result *r, struct nlattr **tb,
+ struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
+ {
+- struct tcindex_filter_result new_filter_result, *old_r = r;
++ struct tcindex_filter_result new_filter_result;
+ struct tcindex_data *cp = NULL, *oldp;
+ struct tcindex_filter *f = NULL; /* make gcc behave */
+ struct tcf_result cr = {};
+@@ -401,7 +401,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+ err = tcindex_filter_result_init(&new_filter_result, cp, net);
+ if (err < 0)
+ goto errout_alloc;
+- if (old_r)
++ if (r)
+ cr = r->res;
+
+ err = -EBUSY;
+@@ -478,14 +478,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+ tcf_bind_filter(tp, &cr, base);
+ }
+
+- if (old_r && old_r != r) {
+- err = tcindex_filter_result_init(old_r, cp, net);
+- if (err < 0) {
+- kfree(f);
+- goto errout_alloc;
+- }
+- }
+-
+ oldp = p;
+ r->res = cr;
+ tcf_exts_change(&r->exts, &e);
+--
+2.35.1
+
--- /dev/null
+From 7556ba6b14b5120103548a4fbbe05f63e3287b06 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Dec 2022 11:37:18 +0400
+Subject: nfc: Fix potential resource leaks
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit df49908f3c52d211aea5e2a14a93bbe67a2cb3af ]
+
+nfc_get_device() take reference for the device, add missing
+nfc_put_device() to release it when not need anymore.
+Also fix the style warnning by use error EOPNOTSUPP instead of
+ENOTSUPP.
+
+Fixes: 5ce3f32b5264 ("NFC: netlink: SE API implementation")
+Fixes: 29e76924cf08 ("nfc: netlink: Add capability to reply to vendor_cmd with data")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/nfc/netlink.c | 52 ++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 38 insertions(+), 14 deletions(-)
+
+diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
+index 9e94f732e717..b53d5eb86864 100644
+--- a/net/nfc/netlink.c
++++ b/net/nfc/netlink.c
+@@ -1505,6 +1505,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
+ u32 dev_idx, se_idx;
+ u8 *apdu;
+ size_t apdu_len;
++ int rc;
+
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+ !info->attrs[NFC_ATTR_SE_INDEX] ||
+@@ -1518,25 +1519,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
+ if (!dev)
+ return -ENODEV;
+
+- if (!dev->ops || !dev->ops->se_io)
+- return -ENOTSUPP;
++ if (!dev->ops || !dev->ops->se_io) {
++ rc = -EOPNOTSUPP;
++ goto put_dev;
++ }
+
+ apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]);
+- if (apdu_len == 0)
+- return -EINVAL;
++ if (apdu_len == 0) {
++ rc = -EINVAL;
++ goto put_dev;
++ }
+
+ apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
+- if (!apdu)
+- return -EINVAL;
++ if (!apdu) {
++ rc = -EINVAL;
++ goto put_dev;
++ }
+
+ ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
+- if (!ctx)
+- return -ENOMEM;
++ if (!ctx) {
++ rc = -ENOMEM;
++ goto put_dev;
++ }
+
+ ctx->dev_idx = dev_idx;
+ ctx->se_idx = se_idx;
+
+- return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
++ rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
++
++put_dev:
++ nfc_put_device(dev);
++ return rc;
+ }
+
+ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+@@ -1559,14 +1572,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+ subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
+
+ dev = nfc_get_device(dev_idx);
+- if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
++ if (!dev)
+ return -ENODEV;
+
++ if (!dev->vendor_cmds || !dev->n_vendor_cmds) {
++ err = -ENODEV;
++ goto put_dev;
++ }
++
+ if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
+ data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
+ data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
+- if (data_len == 0)
+- return -EINVAL;
++ if (data_len == 0) {
++ err = -EINVAL;
++ goto put_dev;
++ }
+ } else {
+ data = NULL;
+ data_len = 0;
+@@ -1581,10 +1601,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+ dev->cur_cmd_info = info;
+ err = cmd->doit(dev, data, data_len);
+ dev->cur_cmd_info = NULL;
+- return err;
++ goto put_dev;
+ }
+
+- return -EOPNOTSUPP;
++ err = -EOPNOTSUPP;
++
++put_dev:
++ nfc_put_device(dev);
++ return err;
+ }
+
+ /* message building helper */
+--
+2.35.1
+
--- /dev/null
+From 3e807fb175909014c5f772ceaa325526fb72da24 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 09:51:30 -0500
+Subject: nfsd: shut down the NFSv4 state objects before the filecache
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 789e1e10f214c00ca18fc6610824c5b9876ba5f2 ]
+
+Currently, we shut down the filecache before trying to clean up the
+stateids that depend on it. This leads to the kernel trying to free an
+nfsd_file twice, and a refcount overput on the nf_mark.
+
+Change the shutdown procedure to tear down all of the stateids prior
+to shutting down the filecache.
+
+Reported-and-tested-by: Wang Yugui <wangyugui@e16-tech.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Fixes: 5e113224c17e ("nfsd: nfsd_file cache entries should be per net namespace")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfssvc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index d63cdda1782d..70684c7ae94b 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -420,8 +420,8 @@ static void nfsd_shutdown_net(struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+- nfsd_file_cache_shutdown_net(net);
+ nfs4_state_shutdown_net(net);
++ nfsd_file_cache_shutdown_net(net);
+ if (nn->lockd_up) {
+ lockd_down(net);
+ nn->lockd_up = 0;
+--
+2.35.1
+
--- /dev/null
+From 83600433d190f921a3f5c26d1854ef825496ee05 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 5 Nov 2022 12:01:14 +0900
+Subject: perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as
+ unsinged data
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit a9dfc46c67b52ad43b8e335e28f4cf8002c67793 ]
+
+DWARF version 5 standard Sec 2.14 says that
+
+ Any debugging information entry representing the declaration of an object,
+ module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and
+ DW_AT_decl_column attributes, each of whose value is an unsigned integer
+ constant.
+
+So it should be an unsigned integer data. Also, even though the standard
+doesn't clearly say the DW_AT_call_file is signed or unsigned, the
+elfutils (eu-readelf) interprets it as unsigned integer data and it is
+natural to handle it as unsigned integer data as same as DW_AT_decl_file.
+This changes the DW_AT_call_file as unsigned integer data too.
+
+Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/dwarf-aux.c | 21 ++++-----------------
+ 1 file changed, 4 insertions(+), 17 deletions(-)
+
+diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
+index b51e0ba363b2..f1e2f566ce6f 100644
+--- a/tools/perf/util/dwarf-aux.c
++++ b/tools/perf/util/dwarf-aux.c
+@@ -261,19 +261,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+ return 0;
+ }
+
+-/* Get attribute and translate it as a sdata */
+-static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+- Dwarf_Sword *result)
+-{
+- Dwarf_Attribute attr;
+-
+- if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+- dwarf_formsdata(&attr, result) != 0)
+- return -ENOENT;
+-
+- return 0;
+-}
+-
+ /**
+ * die_is_signed_type - Check whether a type DIE is signed or not
+ * @tp_die: a DIE of a type
+@@ -397,9 +384,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
+ /* Get the call file index number in CU DIE */
+ static int die_get_call_fileno(Dwarf_Die *in_die)
+ {
+- Dwarf_Sword idx;
++ Dwarf_Word idx;
+
+- if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
++ if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0)
+ return (int)idx;
+ else
+ return -ENOENT;
+@@ -408,9 +395,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die)
+ /* Get the declared file index number in CU DIE */
+ static int die_get_decl_fileno(Dwarf_Die *pdie)
+ {
+- Dwarf_Sword idx;
++ Dwarf_Word idx;
+
+- if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
++ if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0)
+ return (int)idx;
+ else
+ return -ENOENT;
+--
+2.35.1
+
--- /dev/null
+From 9aa5a2b138cf59fd838a7665d93f41ca4a5d2f39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Nov 2022 22:48:39 +0900
+Subject: perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit f828929ab7f0dc3353e4a617f94f297fa8f3dec3 ]
+
+Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute
+acccessor functions, so that it can find the specified attribute from
+abstact origin DIE etc.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: a9dfc46c67b5 ("perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/dwarf-aux.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
+index ab34ef2c661f..b51e0ba363b2 100644
+--- a/tools/perf/util/dwarf-aux.c
++++ b/tools/perf/util/dwarf-aux.c
+@@ -254,7 +254,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+ {
+ Dwarf_Attribute attr;
+
+- if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
++ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+ dwarf_formudata(&attr, result) != 0)
+ return -ENOENT;
+
+@@ -267,7 +267,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+ {
+ Dwarf_Attribute attr;
+
+- if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
++ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+ dwarf_formsdata(&attr, result) != 0)
+ return -ENOENT;
+
+--
+2.35.1
+
--- /dev/null
+From 49eb6ec88a10c5179dd366ae1dddf387b4ca92ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 13:09:00 +0400
+Subject: perf tools: Fix resources leak in perf_data__open_dir()
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit 0a6564ebd953c4590663c9a3c99a3ea9920ade6f ]
+
+In perf_data__open_dir(), opendir() opens the directory stream. Add
+missing closedir() to release it after use.
+
+Fixes: eb6176709b235b96 ("perf data: Add perf_data__open_dir_data function")
+Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20221229090903.1402395-1-linmq006@gmail.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/data.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
+index 3c874f52f1a2..4da900bdb2f1 100644
+--- a/tools/perf/util/data.c
++++ b/tools/perf/util/data.c
+@@ -120,6 +120,7 @@ int perf_data__open_dir(struct perf_data *data)
+ file->size = st.st_size;
+ }
+
++ closedir(dir);
+ if (!files)
+ return -EINVAL;
+
+@@ -128,6 +129,7 @@ int perf_data__open_dir(struct perf_data *data)
+ return 0;
+
+ out_err:
++ closedir(dir);
+ close_dir(files, nr);
+ return ret;
+ }
+--
+2.35.1
+
--- /dev/null
+From 448169ea8aa8e1dad9c045d811ce88ba3c3bcbfa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 14:52:28 +0300
+Subject: qlcnic: prevent ->dcb use-after-free on qlcnic_dcb_enable() failure
+
+From: Daniil Tatianin <d-tatianin@yandex-team.ru>
+
+[ Upstream commit 13a7c8964afcd8ca43c0b6001ebb0127baa95362 ]
+
+adapter->dcb would get silently freed inside qlcnic_dcb_enable() in
+case qlcnic_dcb_attach() would return an error, which always happens
+under OOM conditions. This would lead to use-after-free because both
+of the existing callers invoke qlcnic_dcb_get_info() on the obtained
+pointer, which is potentially freed at that point.
+
+Propagate errors from qlcnic_dcb_enable(), and instead free the dcb
+pointer at callsite using qlcnic_dcb_free(). This also removes the now
+unused qlcnic_clear_dcb_ops() helper, which was a simple wrapper around
+kfree() also causing memory leaks for partially initialized dcb.
+
+Found by Linux Verification Center (linuxtesting.org) with the SVACE
+static analysis tool.
+
+Fixes: 3c44bba1d270 ("qlcnic: Disable DCB operations from SR-IOV VFs")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 8 +++++++-
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h | 10 ++--------
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 +++++++-
+ 3 files changed, 16 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+index 10286215092f..85419b8258b5 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+@@ -2525,7 +2525,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac)
+ goto disable_mbx_intr;
+
+ qlcnic_83xx_clear_function_resources(adapter);
+- qlcnic_dcb_enable(adapter->dcb);
++
++ err = qlcnic_dcb_enable(adapter->dcb);
++ if (err) {
++ qlcnic_dcb_free(adapter->dcb);
++ goto disable_mbx_intr;
++ }
++
+ qlcnic_83xx_initialize_nic(adapter, 1);
+ qlcnic_dcb_get_info(adapter->dcb);
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
+index 0a9d24e86715..eb8000d9b6d0 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
+@@ -42,11 +42,6 @@ struct qlcnic_dcb {
+ unsigned long state;
+ };
+
+-static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb)
+-{
+- kfree(dcb);
+-}
+-
+ static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb)
+ {
+ if (dcb && dcb->ops->get_hw_capability)
+@@ -113,9 +108,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb)
+ dcb->ops->init_dcbnl_ops(dcb);
+ }
+
+-static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
++static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
+ {
+- if (dcb && qlcnic_dcb_attach(dcb))
+- qlcnic_clear_dcb_ops(dcb);
++ return dcb ? qlcnic_dcb_attach(dcb) : 0;
+ }
+ #endif
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+index 3a96fd6deef7..9d5b74c804b5 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+@@ -2639,7 +2639,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ "Device does not support MSI interrupts\n");
+
+ if (qlcnic_82xx_check(adapter)) {
+- qlcnic_dcb_enable(adapter->dcb);
++ err = qlcnic_dcb_enable(adapter->dcb);
++ if (err) {
++ qlcnic_dcb_free(adapter->dcb);
++ dev_err(&pdev->dev, "Failed to enable DCB\n");
++ goto err_out_free_hw;
++ }
++
+ qlcnic_dcb_get_info(adapter->dcb);
+ err = qlcnic_setup_intr(adapter);
+
+--
+2.35.1
+
--- /dev/null
+From 21f1ad85a9d28639bf41f0013df8e71652969f57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Dec 2022 10:51:18 +0000
+Subject: ravb: Fix "failed to switch device to config mode" message during
+ unbind
+
+From: Biju Das <biju.das.jz@bp.renesas.com>
+
+[ Upstream commit c72a7e42592b2e18d862cf120876070947000d7a ]
+
+This patch fixes the error "ravb 11c20000.ethernet eth0: failed to switch
+device to config mode" during unbind.
+
+We are doing register access after pm_runtime_put_sync().
+
+We usually do cleanup in reverse order of init. Currently in
+remove(), the "pm_runtime_put_sync" is not in reverse order.
+
+Probe
+ reset_control_deassert(rstc);
+ pm_runtime_enable(&pdev->dev);
+ pm_runtime_get_sync(&pdev->dev);
+
+remove
+ pm_runtime_put_sync(&pdev->dev);
+ unregister_netdev(ndev);
+ ..
+ ravb_mdio_release(priv);
+ pm_runtime_disable(&pdev->dev);
+
+Consider the call to unregister_netdev()
+unregister_netdev->unregister_netdevice_queue->rollback_registered_many
+that calls the below functions which access the registers after
+pm_runtime_put_sync()
+ 1) ravb_get_stats
+ 2) ravb_close
+
+Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper")
+Cc: stable@vger.kernel.org
+Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20221214105118.2495313-1-biju.das.jz@bp.renesas.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/renesas/ravb_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
+index 95fd1f2d5439..3fd5155bdd5f 100644
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -2216,11 +2216,11 @@ static int ravb_remove(struct platform_device *pdev)
+ priv->desc_bat_dma);
+ /* Set reset mode */
+ ravb_write(ndev, CCC_OPC_RESET, CCC);
+- pm_runtime_put_sync(&pdev->dev);
+ unregister_netdev(ndev);
+ netif_napi_del(&priv->napi[RAVB_NC]);
+ netif_napi_del(&priv->napi[RAVB_BE]);
+ ravb_mdio_release(priv);
++ pm_runtime_put_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ free_netdev(ndev);
+ platform_set_drvdata(pdev, NULL);
+--
+2.35.1
+
--- /dev/null
+From 18b1fdf952b0e3cb095182f80a241e69bd4d9773 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Dec 2022 14:56:10 +0200
+Subject: RDMA/mlx5: Fix validation of max_rd_atomic caps for DC
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit 8de8482fe5732fbef4f5af82bc0c0362c804cd1f ]
+
+Currently, when modifying DC, we validate max_rd_atomic user attribute
+against the RC cap, validate against DC. RC and DC QP types have different
+device limitations.
+
+This can cause userspace created DC QPs to malfunction.
+
+Fixes: c32a4f296e1d ("IB/mlx5: Add support for DC Initiator QP")
+Link: https://lore.kernel.org/r/0c5aee72cea188c3bb770f4207cce7abc9b6fc74.1672231736.git.leonro@nvidia.com
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/qp.c | 49 +++++++++++++++++++++++----------
+ 1 file changed, 35 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index 634f29cb7395..6edd30c92156 100644
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -3890,6 +3890,40 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ return err;
+ }
+
++static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr,
++ int attr_mask, enum ib_qp_type qp_type)
++{
++ int log_max_ra_res;
++ int log_max_ra_req;
++
++ if (qp_type == MLX5_IB_QPT_DCI) {
++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
++ log_max_ra_res_dc);
++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
++ log_max_ra_req_dc);
++ } else {
++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
++ log_max_ra_res_qp);
++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
++ log_max_ra_req_qp);
++ }
++
++ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
++ attr->max_rd_atomic > log_max_ra_res) {
++ mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
++ attr->max_rd_atomic);
++ return false;
++ }
++
++ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
++ attr->max_dest_rd_atomic > log_max_ra_req) {
++ mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
++ attr->max_dest_rd_atomic);
++ return false;
++ }
++ return true;
++}
++
+ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+ {
+@@ -3986,21 +4020,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ }
+ }
+
+- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+- attr->max_rd_atomic >
+- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
+- mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
+- attr->max_rd_atomic);
+- goto out;
+- }
+-
+- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+- attr->max_dest_rd_atomic >
+- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
+- mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
+- attr->max_dest_rd_atomic);
++ if (!validate_rd_atomic(dev, attr, attr_mask, qp_type))
+ goto out;
+- }
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+--
+2.35.1
+
--- /dev/null
+From 7015fff14a0ab737684dc4af1e6793aafbbf8a29 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Jul 2020 20:56:27 +0300
+Subject: RDMA/uverbs: Silence shiftTooManyBitsSigned warning
+
+From: Leon Romanovsky <leonro@mellanox.com>
+
+[ Upstream commit 9b8d846924856570625b93f83ae0624391193bce ]
+
+Fix reported by kbuild warning.
+
+ drivers/infiniband/core/uverbs_cmd.c:1897:47: warning: Shifting signed 32-bit value by 31 bits is undefined behaviour [shiftTooManyBitsSigned]
+ BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
+ ^
+Link: https://lore.kernel.org/r/20200720175627.1273096-3-leon@kernel.org
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Stable-dep-of: 8de8482fe573 ("RDMA/mlx5: Fix validation of max_rd_atomic caps for DC")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/core/uverbs_cmd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
+index d413dafb9211..39cbb853f913 100644
+--- a/drivers/infiniband/core/uverbs_cmd.c
++++ b/drivers/infiniband/core/uverbs_cmd.c
+@@ -1952,7 +1952,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
+ * Last bit is reserved for extending the attr_mask by
+ * using another field.
+ */
+- BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
++ BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31));
+
+ if (cmd.base.attr_mask &
+ ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+--
+2.35.1
+
--- /dev/null
+From e92954d051587a06c852db0cb6f3173e9f9f651e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Jan 2021 20:40:14 +0800
+Subject: riscv/stacktrace: Fix stack output without ra on the stack top
+
+From: Chen Huang <chenhuang5@huawei.com>
+
+[ Upstream commit f766f77a74f5784d8d4d3c36b1900731f97d08d0 ]
+
+When a function doesn't have a callee, then it will not
+push ra into the stack, such as lkdtm_BUG() function,
+
+addi sp,sp,-16
+sd s0,8(sp)
+addi s0,sp,16
+ebreak
+
+The struct stackframe use {fp,ra} to get information from
+stack, if walk_stackframe() with pr_regs, we will obtain
+wrong value and bad stacktrace,
+
+[<ffffffe00066c56c>] lkdtm_BUG+0x6/0x8
+---[ end trace 18da3fbdf08e25d5 ]---
+
+Correct the next fp and pc, after that, full stacktrace
+shown as expects,
+
+[<ffffffe00066c56c>] lkdtm_BUG+0x6/0x8
+[<ffffffe0008b24a4>] lkdtm_do_action+0x14/0x1c
+[<ffffffe00066c372>] direct_entry+0xc0/0x10a
+[<ffffffe000439f86>] full_proxy_write+0x42/0x6a
+[<ffffffe000309626>] vfs_write+0x7e/0x214
+[<ffffffe00030992a>] ksys_write+0x98/0xc0
+[<ffffffe000309960>] sys_write+0xe/0x16
+[<ffffffe0002014bc>] ret_from_syscall+0x0/0x2
+---[ end trace 61917f3d9a9fadcd ]---
+
+Signed-off-by: Chen Huang <chenhuang5@huawei.com>
+Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Stable-dep-of: 5c3022e4a616 ("riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/stacktrace.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
+index 19e46f4160cc..1a512a24879e 100644
+--- a/arch/riscv/kernel/stacktrace.c
++++ b/arch/riscv/kernel/stacktrace.c
+@@ -55,9 +55,15 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
+ /* Unwind stack frame */
+ frame = (struct stackframe *)fp - 1;
+ sp = fp;
+- fp = frame->fp;
+- pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+- (unsigned long *)(fp - 8));
++ if (regs && (regs->epc == pc) && (frame->fp & 0x7)) {
++ fp = frame->ra;
++ pc = regs->ra;
++ } else {
++ fp = frame->fp;
++ pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
++ (unsigned long *)(fp - 8));
++ }
++
+ }
+ }
+
+--
+2.35.1
+
--- /dev/null
+From eacfe5acf0bf82c6079fc7250a2c8731b481e7a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 01:49:36 -0500
+Subject: riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit 5c3022e4a616d800cf5f4c3a981d7992179e44a1 ]
+
+The 'retp' is a pointer to the return address on the stack, so we
+must pass the current return address pointer as the 'retp'
+argument to ftrace_push_return_trace(). Not parent function's
+return address on the stack.
+
+Fixes: b785ec129bd9 ("riscv/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support")
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20221109064937.3643993-2-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/stacktrace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
+index 1a512a24879e..a1ee7f33c205 100644
+--- a/arch/riscv/kernel/stacktrace.c
++++ b/arch/riscv/kernel/stacktrace.c
+@@ -61,7 +61,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
+ } else {
+ fp = frame->fp;
+ pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+- (unsigned long *)(fp - 8));
++ &frame->ra);
+ }
+
+ }
+--
+2.35.1
+
media-s5p-mfc-fix-to-handle-reference-queue-during-f.patch
media-s5p-mfc-clear-workbit-to-handle-error-conditio.patch
media-s5p-mfc-fix-in-register-read-and-write-for-h26.patch
+dm-thin-resume-even-if-in-fail-mode.patch
+perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch
+perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch
+kvm-x86-optimize-more-exit-handlers-in-vmx.c.patch
+kvm-retpolines-x86-eliminate-retpoline-from-vmx.c-ex.patch
+kvm-vmx-rename-interrupt_pending-to-interrupt_window.patch
+kvm-vmx-rename-nmi_pending-to-nmi_window.patch
+kvm-vmx-fix-the-spelling-of-cpu_based_use_tsc_offset.patch
+kvm-nvmx-properly-expose-enable_usr_wait_pause-contr.patch
+ravb-fix-failed-to-switch-device-to-config-mode-mess.patch
+riscv-stacktrace-fix-stack-output-without-ra-on-the-.patch
+riscv-stacktrace-fixup-ftrace_graph_ret_addr-retp-ar.patch
+driver-core-fix-driver_deferred_probe_check_state-lo.patch
+driver-core-set-deferred_probe_timeout-to-a-longer-d.patch
+ext4-goto-right-label-failed_mount3a.patch
+ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch
+mm-highmem-lift-memcpy_-to-from-_page-to-core.patch
+ext4-use-memcpy_to_page-in-pagecache_write.patch
+fs-ext4-initialize-fsdata-in-pagecache_write.patch
+ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch
+mbcache-don-t-reclaim-used-entries.patch
+mbcache-add-functions-to-delete-entry-if-unused.patch
+ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch
+ext4-unindent-codeblock-in-ext4_xattr_block_set.patch
+ext4-fix-race-when-reusing-xattr-blocks.patch
+mbcache-automatically-delete-entries-from-cache-on-f.patch
+ext4-fix-deadlock-due-to-mbcache-entry-corruption.patch
+sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch
+bpf-pull-before-calling-skb_postpull_rcsum.patch
+nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch
+net-hns3-add-interrupts-re-initialization-while-doin.patch
+net-sched-fix-memory-leak-in-tcindex_set_parms.patch
+qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch
+nfc-fix-potential-resource-leaks.patch
+vhost-fix-range-used-in-translate_desc.patch
+net-amd-xgbe-add-missed-tasklet_kill.patch
+net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch
+rdma-uverbs-silence-shifttoomanybitssigned-warning.patch
+rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch
+net-sched-atm-dont-intepret-cls-results-when-asked-t.patch
+net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch
+perf-tools-fix-resources-leak-in-perf_data__open_dir.patch
+drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch
+usb-rndis_host-secure-rndis_query-check-against-int-.patch
+drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch
+caif-fix-memory-leak-in-cfctrl_linkup_request.patch
+udf-fix-extension-of-the-last-extent-in-the-file.patch
+asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch
--- /dev/null
+From e23beb280a5642b830a0c1c0657730f9158bd0de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 13:14:31 +0900
+Subject: SUNRPC: ensure the matching upcall is in-flight upon downcall
+
+From: minoura makoto <minoura@valinux.co.jp>
+
+[ Upstream commit b18cba09e374637a0a3759d856a6bca94c133952 ]
+
+Commit 9130b8dbc6ac ("SUNRPC: allow for upcalls for the same uid
+but different gss service") introduced `auth` argument to
+__gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL
+since it (and auth->service) was not (yet) determined.
+
+When multiple upcalls with the same uid and different service are
+ongoing, it could happen that __gss_find_upcall(), which returns the
+first match found in the pipe->in_downcall list, could not find the
+correct gss_msg corresponding to the downcall we are looking for.
+Moreover, it might return a msg which is not sent to rpc.gssd yet.
+
+We could see mount.nfs process hung in D state with multiple mount.nfs
+are executed in parallel. The call trace below is of CentOS 7.9
+kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/
+elrepo kernel-ml-6.0.7-1.el7.
+
+PID: 71258 TASK: ffff91ebd4be0000 CPU: 36 COMMAND: "mount.nfs"
+ #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f
+ #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9
+ #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss]
+ #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc
+[sunrpc]
+ #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss]
+ #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc]
+ #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc]
+ #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc]
+ #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc]
+ #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc]
+
+The scenario is like this. Let's say there are two upcalls for
+services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe.
+
+When rpc.gssd reads pipe to get the upcall msg corresponding to
+service B from pipe->pipe and then writes the response, in
+gss_pipe_downcall the msg corresponding to service A will be picked
+because only uid is used to find the msg and it is before the one for
+B in pipe->in_downcall. And the process waiting for the msg
+corresponding to service A will be woken up.
+
+Actual scheduing of that process might be after rpc.gssd processes the
+next msg. In rpc_pipe_generic_upcall it clears msg->errno (for A).
+The process is scheduled to see gss_msg->ctx == NULL and
+gss_msg->msg.errno == 0, therefore it cannot break the loop in
+gss_create_upcall and is never woken up after that.
+
+This patch adds a simple check to ensure that a msg which is not
+sent to rpc.gssd yet is not chosen as the matching upcall upon
+receiving a downcall.
+
+Signed-off-by: minoura makoto <minoura@valinux.co.jp>
+Signed-off-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
+Tested-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
+Cc: Trond Myklebust <trondmy@hammerspace.com>
+Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sunrpc/rpc_pipe_fs.h | 5 +++++
+ net/sunrpc/auth_gss/auth_gss.c | 19 +++++++++++++++++--
+ 2 files changed, 22 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
+index e90b9bd99ded..396de2ef8767 100644
+--- a/include/linux/sunrpc/rpc_pipe_fs.h
++++ b/include/linux/sunrpc/rpc_pipe_fs.h
+@@ -94,6 +94,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
+ char __user *, size_t);
+ extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *);
+
++/* returns true if the msg is in-flight, i.e., already eaten by the peer */
++static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) {
++ return (msg->copied != 0 && list_empty(&msg->list));
++}
++
+ struct rpc_clnt;
+ extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
+ extern int rpc_remove_client_dir(struct rpc_clnt *);
+diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
+index b7a71578bd98..4d3cf146f50a 100644
+--- a/net/sunrpc/auth_gss/auth_gss.c
++++ b/net/sunrpc/auth_gss/auth_gss.c
+@@ -301,7 +301,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
+ list_for_each_entry(pos, &pipe->in_downcall, list) {
+ if (!uid_eq(pos->uid, uid))
+ continue;
+- if (auth && pos->auth->service != auth->service)
++ if (pos->auth->service != auth->service)
+ continue;
+ refcount_inc(&pos->count);
+ return pos;
+@@ -683,6 +683,21 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+ return err;
+ }
+
++static struct gss_upcall_msg *
++gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid)
++{
++ struct gss_upcall_msg *pos;
++ list_for_each_entry(pos, &pipe->in_downcall, list) {
++ if (!uid_eq(pos->uid, uid))
++ continue;
++ if (!rpc_msg_is_inflight(&pos->msg))
++ continue;
++ refcount_inc(&pos->count);
++ return pos;
++ }
++ return NULL;
++}
++
+ #define MSG_BUF_MAXSIZE 1024
+
+ static ssize_t
+@@ -729,7 +744,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+ err = -ENOENT;
+ /* Find a matching upcall */
+ spin_lock(&pipe->lock);
+- gss_msg = __gss_find_upcall(pipe, uid, NULL);
++ gss_msg = gss_find_downcall(pipe, uid);
+ if (gss_msg == NULL) {
+ spin_unlock(&pipe->lock);
+ goto err_put_ctx;
+--
+2.35.1
+
--- /dev/null
+From 6c5a87ab8ba62c5768392044c4c0101d1390ee5f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Dec 2022 17:45:51 +0100
+Subject: udf: Fix extension of the last extent in the file
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 83c7423d1eb6806d13c521d1002cc1a012111719 ]
+
+When extending the last extent in the file within the last block, we
+wrongly computed the length of the last extent. This is mostly a
+cosmetical problem since the extent does not contain any data and the
+length will be fixed up by following operations but still.
+
+Fixes: 1f3868f06855 ("udf: Fix extending file within last block")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index f6bbf395ce07..37a6bbd5a19c 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -602,7 +602,7 @@ static void udf_do_extend_final_block(struct inode *inode,
+ */
+ if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK))
+ return;
+- added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen;
++ added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+ last_ext->extLength += added_bytes;
+ UDF_I(inode)->i_lenExtents += added_bytes;
+
+--
+2.35.1
+
--- /dev/null
+From 2864f82d070bdf818bc8ee3f022e224f8f2f1540 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 10:17:09 +0100
+Subject: usb: rndis_host: Secure rndis_query check against int overflow
+
+From: Szymon Heidrich <szymon.heidrich@gmail.com>
+
+[ Upstream commit c7dd13805f8b8fc1ce3b6d40f6aff47e66b72ad2 ]
+
+Variables off and len typed as uint32 in rndis_query function
+are controlled by incoming RNDIS response message thus their
+value may be manipulated. Setting off to a unexpectetly large
+value will cause the sum with len and 8 to overflow and pass
+the implemented validation step. Consequently the response
+pointer will be referring to a location past the expected
+buffer boundaries allowing information leakage e.g. via
+RNDIS_OID_802_3_PERMANENT_ADDRESS OID.
+
+Fixes: ddda08624013 ("USB: rndis_host, various cleanups")
+Signed-off-by: Szymon Heidrich <szymon.heidrich@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/rndis_host.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
+index 1505fe3f87ed..1ff723e15d52 100644
+--- a/drivers/net/usb/rndis_host.c
++++ b/drivers/net/usb/rndis_host.c
+@@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf,
+
+ off = le32_to_cpu(u.get_c->offset);
+ len = le32_to_cpu(u.get_c->len);
+- if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE))
++ if (unlikely((off > CONTROL_BUFFER_SIZE - 8) ||
++ (len > CONTROL_BUFFER_SIZE - 8 - off)))
+ goto response_error;
+
+ if (*reply_len != -1 && len != *reply_len)
+--
+2.35.1
+
--- /dev/null
+From 09be1b540fae534164adeefb63cf9f0e419e02c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 11:25:03 +0100
+Subject: vhost: fix range used in translate_desc()
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 98047313cdb46828093894d0ac8b1183b8b317f9 ]
+
+vhost_iotlb_itree_first() requires `start` and `last` parameters
+to search for a mapping that overlaps the range.
+
+In translate_desc() we cyclically call vhost_iotlb_itree_first(),
+incrementing `addr` by the amount already translated, so rightly
+we move the `start` parameter passed to vhost_iotlb_itree_first(),
+but we should hold the `last` parameter constant.
+
+Let's fix it by saving the `last` parameter value before incrementing
+`addr` in the loop.
+
+Fixes: a9709d6874d5 ("vhost: convert pre sorted vhost memory array to interval tree")
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-Id: <20221109102503.18816-3-sgarzare@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vhost.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
+index 97be299f0a8d..fdfa399700fe 100644
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -2050,7 +2050,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
+ struct vhost_dev *dev = vq->dev;
+ struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem;
+ struct iovec *_iov;
+- u64 s = 0;
++ u64 s = 0, last = addr + len - 1;
+ int ret = 0;
+
+ while ((u64)len > s) {
+@@ -2061,7 +2061,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
+ }
+
+ node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
+- addr, addr + len - 1);
++ addr, last);
+ if (node == NULL || node->start > addr) {
+ if (umem != dev->iotlb) {
+ ret = -EFAULT;
+--
+2.35.1
+