From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 27 Aug 2023 06:50:45 +0000 (+0200)
Subject: 6.4-stable patches
X-Git-Tag: v6.1.49~39
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5628542b8f8bb53a88b1e9d1a01709e591cde470;p=thirdparty%2Fkernel%2Fstable-queue.git

6.4-stable patches

added patches:
	drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch
	drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch
	drm-i915-dgfx-enable-d3cold-at-s2idle.patch
	drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch
	drm-panfrost-skip-speed-binning-on-eopnotsupp.patch
	drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch
	drm-vmwgfx-fix-shader-stage-validation.patch
	loongarch-fix-hw_breakpoint_control-for-watchpoints.patch
	x86-fpu-invalidate-fpu-state-correctly-on-exec.patch
	x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch
---

diff --git a/queue-6.4/drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch b/queue-6.4/drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch
new file mode 100644
index 00000000000..f5d10ccb72c
--- /dev/null
+++ b/queue-6.4/drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch
@@ -0,0 +1,144 @@
+From a94e7ccfc400c024976f3c2f31689ed843498b7c Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Tue, 22 Aug 2023 14:30:14 +0300
+Subject: drm: Add an HPD poll helper to reschedule the poll work
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit a94e7ccfc400c024976f3c2f31689ed843498b7c upstream.
+
+Add a helper to reschedule drm_mode_config::output_poll_work after
+polling has been enabled for a connector (and needing a reschedule,
+since previously polling was disabled for all connectors and hence
+output_poll_work was not running).
+
+This is needed by the next patch fixing HPD polling on i915.
+
+CC: stable@vger.kernel.org # 6.4+
+Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Cc: dri-devel@lists.freedesktop.org
+Reviewed-by: Jouni HÃ¶gander <jouni.hogander@intel.com>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230822113015.41224-1-imre.deak@intel.com
+(cherry picked from commit fe2352fd64029918174de4b460dfe6df0c6911cd)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/drm_probe_helper.c | 68 ++++++++++++++++++++----------
+ include/drm/drm_probe_helper.h     |  1 +
+ 2 files changed, 47 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
+index 2fb9bf901a2c..3f479483d7d8 100644
+--- a/drivers/gpu/drm/drm_probe_helper.c
++++ b/drivers/gpu/drm/drm_probe_helper.c
+@@ -262,6 +262,26 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
+ }
+ 
+ #define DRM_OUTPUT_POLL_PERIOD (10*HZ)
++static void reschedule_output_poll_work(struct drm_device *dev)
++{
++	unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
++
++	if (dev->mode_config.delayed_event)
++		/*
++		 * FIXME:
++		 *
++		 * Use short (1s) delay to handle the initial delayed event.
++		 * This delay should not be needed, but Optimus/nouveau will
++		 * fail in a mysterious way if the delayed event is handled as
++		 * soon as possible like it is done in
++		 * drm_helper_probe_single_connector_modes() in case the poll
++		 * was enabled before.
++		 */
++		delay = HZ;
++
++	schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
++}
++
+ /**
+  * drm_kms_helper_poll_enable - re-enable output polling.
+  * @dev: drm_device
+@@ -279,37 +299,41 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
+  */
+ void drm_kms_helper_poll_enable(struct drm_device *dev)
+ {
+-	bool poll = false;
+-	unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
+-
+ 	if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll ||
+ 	    dev->mode_config.poll_running)
+ 		return;
+ 
+-	poll = drm_kms_helper_enable_hpd(dev);
+-
+-	if (dev->mode_config.delayed_event) {
+-		/*
+-		 * FIXME:
+-		 *
+-		 * Use short (1s) delay to handle the initial delayed event.
+-		 * This delay should not be needed, but Optimus/nouveau will
+-		 * fail in a mysterious way if the delayed event is handled as
+-		 * soon as possible like it is done in
+-		 * drm_helper_probe_single_connector_modes() in case the poll
+-		 * was enabled before.
+-		 */
+-		poll = true;
+-		delay = HZ;
+-	}
+-
+-	if (poll)
+-		schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
++	if (drm_kms_helper_enable_hpd(dev) ||
++	    dev->mode_config.delayed_event)
++		reschedule_output_poll_work(dev);
+ 
+ 	dev->mode_config.poll_running = true;
+ }
+ EXPORT_SYMBOL(drm_kms_helper_poll_enable);
+ 
++/**
++ * drm_kms_helper_poll_reschedule - reschedule the output polling work
++ * @dev: drm_device
++ *
++ * This function reschedules the output polling work, after polling for a
++ * connector has been enabled.
++ *
++ * Drivers must call this helper after enabling polling for a connector by
++ * setting %DRM_CONNECTOR_POLL_CONNECT / %DRM_CONNECTOR_POLL_DISCONNECT flags
++ * in drm_connector::polled. Note that after disabling polling by clearing these
++ * flags for a connector will stop the output polling work automatically if
++ * the polling is disabled for all other connectors as well.
++ *
++ * The function can be called only after polling has been enabled by calling
++ * drm_kms_helper_poll_init() / drm_kms_helper_poll_enable().
++ */
++void drm_kms_helper_poll_reschedule(struct drm_device *dev)
++{
++	if (dev->mode_config.poll_running)
++		reschedule_output_poll_work(dev);
++}
++EXPORT_SYMBOL(drm_kms_helper_poll_reschedule);
++
+ static enum drm_connector_status
+ drm_helper_probe_detect_ctx(struct drm_connector *connector, bool force)
+ {
+diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h
+index 4977e0ab72db..fad3c4003b2b 100644
+--- a/include/drm/drm_probe_helper.h
++++ b/include/drm/drm_probe_helper.h
+@@ -25,6 +25,7 @@ void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector);
+ 
+ void drm_kms_helper_poll_disable(struct drm_device *dev);
+ void drm_kms_helper_poll_enable(struct drm_device *dev);
++void drm_kms_helper_poll_reschedule(struct drm_device *dev);
+ bool drm_kms_helper_is_poll_worker(void);
+ 
+ enum drm_mode_status drm_crtc_helper_mode_valid_fixed(struct drm_crtc *crtc,
+-- 
+2.42.0
+
diff --git a/queue-6.4/drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch b/queue-6.4/drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch
new file mode 100644
index 00000000000..d1d8be7d325
--- /dev/null
+++ b/queue-6.4/drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch
@@ -0,0 +1,37 @@
+From 5ad1ab30ac0809d2963ddcf39ac34317a24a2f17 Mon Sep 17 00:00:00 2001
+From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
+Date: Fri, 18 Aug 2023 10:14:36 +0530
+Subject: drm/display/dp: Fix the DP DSC Receiver cap size
+
+From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
+
+commit 5ad1ab30ac0809d2963ddcf39ac34317a24a2f17 upstream.
+
+DP DSC Receiver Capabilities are exposed via DPCD 60h-6Fh.
+Fix the DSC RECEIVER CAP SIZE accordingly.
+
+Fixes: ffddc4363c28 ("drm/dp: Add DP DSC DPCD receiver capability size define and missing SHIFT")
+Cc: Anusha Srivatsa <anusha.srivatsa@intel.com>
+Cc: Manasi Navare <manasi.d.navare@intel.com>
+Cc: <stable@vger.kernel.org> # v5.0+
+
+Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
+Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230818044436.177806-1-ankit.k.nautiyal@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/drm/display/drm_dp.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/drm/display/drm_dp.h
++++ b/include/drm/display/drm_dp.h
+@@ -1534,7 +1534,7 @@ enum drm_dp_phy {
+ 
+ #define DP_BRANCH_OUI_HEADER_SIZE	0xc
+ #define DP_RECEIVER_CAP_SIZE		0xf
+-#define DP_DSC_RECEIVER_CAP_SIZE        0xf
++#define DP_DSC_RECEIVER_CAP_SIZE        0x10 /* DSC Capabilities 0x60 through 0x6F */
+ #define EDP_PSR_RECEIVER_CAP_SIZE	2
+ #define EDP_DISPLAY_CTL_CAP_SIZE	3
+ #define DP_LTTPR_COMMON_CAP_SIZE	8
diff --git a/queue-6.4/drm-i915-dgfx-enable-d3cold-at-s2idle.patch b/queue-6.4/drm-i915-dgfx-enable-d3cold-at-s2idle.patch
new file mode 100644
index 00000000000..4375f8b6495
--- /dev/null
+++ b/queue-6.4/drm-i915-dgfx-enable-d3cold-at-s2idle.patch
@@ -0,0 +1,127 @@
+From 2872144aec04baa7e43ecd2a60f7f0be3aa843fd Mon Sep 17 00:00:00 2001
+From: Anshuman Gupta <anshuman.gupta@intel.com>
+Date: Wed, 16 Aug 2023 18:22:16 +0530
+Subject: drm/i915/dgfx: Enable d3cold at s2idle
+
+From: Anshuman Gupta <anshuman.gupta@intel.com>
+
+commit 2872144aec04baa7e43ecd2a60f7f0be3aa843fd upstream.
+
+System wide suspend already has support for lmem save/restore during
+suspend therefore enabling d3cold for s2idle and keepng it disable for
+runtime PM.(Refer below commit for d3cold runtime PM disable justification)
+'commit 66eb93e71a7a ("drm/i915/dgfx: Keep PCI autosuspend control
+'on' by default on all dGPU")'
+
+It will reduce the DG2 Card power consumption to ~0 Watt
+for s2idle power KPI.
+
+v2:
+- Added "Cc: stable@vger.kernel.org".
+
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8755
+Cc: stable@vger.kernel.org
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
+Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Tested-by: Aaron Ma <aaron.ma@canonical.com>
+Tested-by: Jianshui Yu <Jianshui.yu@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230816125216.1722002-1-anshuman.gupta@intel.com
+(cherry picked from commit 2643e6d1f2a5e51877be24042d53cf956589be10)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_driver.c |   33 ++++++++++++++++++---------------
+ 1 file changed, 18 insertions(+), 15 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_driver.c
++++ b/drivers/gpu/drm/i915/i915_driver.c
+@@ -433,7 +433,6 @@ static int i915_pcode_init(struct drm_i9
+ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+-	struct pci_dev *root_pdev;
+ 	int ret;
+ 
+ 	if (i915_inject_probe_failure(dev_priv))
+@@ -547,15 +546,6 @@ static int i915_driver_hw_probe(struct d
+ 
+ 	intel_bw_init_hw(dev_priv);
+ 
+-	/*
+-	 * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
+-	 * This should be totally removed when we handle the pci states properly
+-	 * on runtime PM and on s2idle cases.
+-	 */
+-	root_pdev = pcie_find_root_port(pdev);
+-	if (root_pdev)
+-		pci_d3cold_disable(root_pdev);
+-
+ 	return 0;
+ 
+ err_opregion:
+@@ -581,7 +571,6 @@ err_perf:
+ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+-	struct pci_dev *root_pdev;
+ 
+ 	i915_perf_fini(dev_priv);
+ 
+@@ -589,10 +578,6 @@ static void i915_driver_hw_remove(struct
+ 
+ 	if (pdev->msi_enabled)
+ 		pci_disable_msi(pdev);
+-
+-	root_pdev = pcie_find_root_port(pdev);
+-	if (root_pdev)
+-		pci_d3cold_enable(root_pdev);
+ }
+ 
+ /**
+@@ -1499,6 +1484,8 @@ static int intel_runtime_suspend(struct
+ {
+ 	struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
+ 	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
++	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
++	struct pci_dev *root_pdev;
+ 	struct intel_gt *gt;
+ 	int ret, i;
+ 
+@@ -1550,6 +1537,15 @@ static int intel_runtime_suspend(struct
+ 		drm_err(&dev_priv->drm,
+ 			"Unclaimed access detected prior to suspending\n");
+ 
++	/*
++	 * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
++	 * This should be totally removed when we handle the pci states properly
++	 * on runtime PM.
++	 */
++	root_pdev = pcie_find_root_port(pdev);
++	if (root_pdev)
++		pci_d3cold_disable(root_pdev);
++
+ 	rpm->suspended = true;
+ 
+ 	/*
+@@ -1588,6 +1584,8 @@ static int intel_runtime_resume(struct d
+ {
+ 	struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
+ 	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
++	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
++	struct pci_dev *root_pdev;
+ 	struct intel_gt *gt;
+ 	int ret, i;
+ 
+@@ -1601,6 +1599,11 @@ static int intel_runtime_resume(struct d
+ 
+ 	intel_opregion_notify_adapter(dev_priv, PCI_D0);
+ 	rpm->suspended = false;
++
++	root_pdev = pcie_find_root_port(pdev);
++	if (root_pdev)
++		pci_d3cold_enable(root_pdev);
++
+ 	if (intel_uncore_unclaimed_mmio(&dev_priv->uncore))
+ 		drm_dbg(&dev_priv->drm,
+ 			"Unclaimed access during suspend, bios?\n");
diff --git a/queue-6.4/drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch b/queue-6.4/drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch
new file mode 100644
index 00000000000..b924ad3a40e
--- /dev/null
+++ b/queue-6.4/drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch
@@ -0,0 +1,59 @@
+From 1dcc437427bbcebc8381226352f7ade08a271191 Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Tue, 22 Aug 2023 14:30:15 +0300
+Subject: drm/i915: Fix HPD polling, reenabling the output poll work as needed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit 1dcc437427bbcebc8381226352f7ade08a271191 upstream.
+
+After the commit in the Fixes: line below, HPD polling stopped working
+on i915, since after that change calling drm_kms_helper_poll_enable()
+doesn't restart drm_mode_config::output_poll_work if the work was
+stopped (no connectors needing polling) and enabling polling for a
+connector (during runtime suspend or detecting an HPD IRQ storm).
+
+After the above change calling drm_kms_helper_poll_enable() is a nop
+after it's been called already and polling for some connectors was
+disabled/re-enabled.
+
+Fix this by calling drm_kms_helper_poll_reschedule() added in the
+previous patch instead, which reschedules the work whenever expected.
+
+Fixes: d33a54e3991d ("drm/probe_helper: sort out poll_running vs poll_enabled")
+CC: stable@vger.kernel.org # 6.4+
+Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Cc: dri-devel@lists.freedesktop.org
+Reviewed-by: Jouni HÃ¶gander <jouni.hogander@intel.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230822113015.41224-2-imre.deak@intel.com
+(cherry picked from commit 50452f2f76852322620b63e62922b85e955abe94)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/intel_hotplug.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
++++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
+@@ -210,7 +210,7 @@ intel_hpd_irq_storm_switch_to_polling(st
+ 
+ 	/* Enable polling and queue hotplug re-enabling. */
+ 	if (hpd_disabled) {
+-		drm_kms_helper_poll_enable(&dev_priv->drm);
++		drm_kms_helper_poll_reschedule(&dev_priv->drm);
+ 		mod_delayed_work(system_wq, &dev_priv->display.hotplug.reenable_work,
+ 				 msecs_to_jiffies(HPD_STORM_REENABLE_DELAY));
+ 	}
+@@ -644,7 +644,7 @@ static void i915_hpd_poll_init_work(stru
+ 	drm_connector_list_iter_end(&conn_iter);
+ 
+ 	if (enabled)
+-		drm_kms_helper_poll_enable(&dev_priv->drm);
++		drm_kms_helper_poll_reschedule(&dev_priv->drm);
+ 
+ 	mutex_unlock(&dev_priv->drm.mode_config.mutex);
+ 
diff --git a/queue-6.4/drm-panfrost-skip-speed-binning-on-eopnotsupp.patch b/queue-6.4/drm-panfrost-skip-speed-binning-on-eopnotsupp.patch
new file mode 100644
index 00000000000..aa2eba2d02c
--- /dev/null
+++ b/queue-6.4/drm-panfrost-skip-speed-binning-on-eopnotsupp.patch
@@ -0,0 +1,40 @@
+From f19df6e4de64b7fc6d71f192aa9ff3b701e4bade Mon Sep 17 00:00:00 2001
+From: David Michael <fedora.dm0@gmail.com>
+Date: Tue, 15 Aug 2023 21:42:41 -0400
+Subject: drm/panfrost: Skip speed binning on EOPNOTSUPP
+
+From: David Michael <fedora.dm0@gmail.com>
+
+commit f19df6e4de64b7fc6d71f192aa9ff3b701e4bade upstream.
+
+Encountered on an ARM Mali-T760 MP4, attempting to read the nvmem
+variable can also return EOPNOTSUPP instead of ENOENT when speed
+binning is unsupported.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 7d690f936e9b ("drm/panfrost: Add basic support for speed binning")
+Signed-off-by: David Michael <fedora.dm0@gmail.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/87msyryd7y.fsf@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/panfrost/panfrost_devfreq.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+index 58dfb15a8757..e78de99e9933 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
++++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+@@ -96,7 +96,7 @@ static int panfrost_read_speedbin(struct device *dev)
+ 		 * keep going without it; any other error means that we are
+ 		 * supposed to read the bin value, but we failed doing so.
+ 		 */
+-		if (ret != -ENOENT) {
++		if (ret != -ENOENT && ret != -EOPNOTSUPP) {
+ 			DRM_DEV_ERROR(dev, "Cannot read speed-bin (%d).", ret);
+ 			return ret;
+ 		}
+-- 
+2.42.0
+
diff --git a/queue-6.4/drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch b/queue-6.4/drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch
new file mode 100644
index 00000000000..41207127357
--- /dev/null
+++ b/queue-6.4/drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch
@@ -0,0 +1,137 @@
+From f9e96bf1905479f18e83a3a4c314a8dfa56ede2c Mon Sep 17 00:00:00 2001
+From: Zack Rusin <zackr@vmware.com>
+Date: Fri, 18 Aug 2023 00:13:01 -0400
+Subject: drm/vmwgfx: Fix possible invalid drm gem put calls
+
+From: Zack Rusin <zackr@vmware.com>
+
+commit f9e96bf1905479f18e83a3a4c314a8dfa56ede2c upstream.
+
+vmw_bo_unreference sets the input buffer to null on exit, resulting in
+null ptr deref's on the subsequent drm gem put calls.
+
+This went unnoticed because only very old userspace would be exercising
+those paths but it wouldn't be hard to hit on old distros with brand
+new kernels.
+
+Introduce a new function that abstracts unrefing of user bo's to make
+the code cleaner and more explicit.
+
+Signed-off-by: Zack Rusin <zackr@vmware.com>
+Reported-by: Ian Forbes <iforbes@vmware.com>
+Fixes: 9ef8d83e8e25 ("drm/vmwgfx: Do not drop the reference to the handle too soon")
+Cc: <stable@vger.kernel.org> # v6.4+
+Reviewed-by: Maaz Mombasawala<mombasawalam@vmware.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230818041301.407636-1-zack@kde.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/vmwgfx/vmwgfx_bo.c      |    6 ++----
+ drivers/gpu/drm/vmwgfx/vmwgfx_bo.h      |    8 ++++++++
+ drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |    6 ++----
+ drivers/gpu/drm/vmwgfx/vmwgfx_kms.c     |    6 ++----
+ drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c |    3 +--
+ drivers/gpu/drm/vmwgfx/vmwgfx_shader.c  |    3 +--
+ 6 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+@@ -497,10 +497,9 @@ static int vmw_user_bo_synccpu_release(s
+ 		if (!(flags & drm_vmw_synccpu_allow_cs)) {
+ 			atomic_dec(&vmw_bo->cpu_writers);
+ 		}
+-		ttm_bo_put(&vmw_bo->tbo);
++		vmw_user_bo_unref(vmw_bo);
+ 	}
+ 
+-	drm_gem_object_put(&vmw_bo->tbo.base);
+ 	return ret;
+ }
+ 
+@@ -540,8 +539,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm
+ 			return ret;
+ 
+ 		ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
+-		vmw_bo_unreference(&vbo);
+-		drm_gem_object_put(&vbo->tbo.base);
++		vmw_user_bo_unref(vbo);
+ 		if (unlikely(ret != 0)) {
+ 			if (ret == -ERESTARTSYS || ret == -EBUSY)
+ 				return -EBUSY;
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
+@@ -195,6 +195,14 @@ static inline struct vmw_bo *vmw_bo_refe
+ 	return buf;
+ }
+ 
++static inline void vmw_user_bo_unref(struct vmw_bo *vbo)
++{
++	if (vbo) {
++		ttm_bo_put(&vbo->tbo);
++		drm_gem_object_put(&vbo->tbo.base);
++	}
++}
++
+ static inline struct vmw_bo *to_vmw_bo(struct drm_gem_object *gobj)
+ {
+ 	return container_of((gobj), struct vmw_bo, tbo.base);
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+@@ -1164,8 +1164,7 @@ static int vmw_translate_mob_ptr(struct
+ 	}
+ 	vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB);
+ 	ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
+-	ttm_bo_put(&vmw_bo->tbo);
+-	drm_gem_object_put(&vmw_bo->tbo.base);
++	vmw_user_bo_unref(vmw_bo);
+ 	if (unlikely(ret != 0))
+ 		return ret;
+ 
+@@ -1221,8 +1220,7 @@ static int vmw_translate_guest_ptr(struc
+ 	vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM,
+ 			     VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM);
+ 	ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
+-	ttm_bo_put(&vmw_bo->tbo);
+-	drm_gem_object_put(&vmw_bo->tbo.base);
++	vmw_user_bo_unref(vmw_bo);
+ 	if (unlikely(ret != 0))
+ 		return ret;
+ 
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+@@ -1665,10 +1665,8 @@ static struct drm_framebuffer *vmw_kms_f
+ 
+ err_out:
+ 	/* vmw_user_lookup_handle takes one ref so does new_fb */
+-	if (bo) {
+-		vmw_bo_unreference(&bo);
+-		drm_gem_object_put(&bo->tbo.base);
+-	}
++	if (bo)
++		vmw_user_bo_unref(bo);
+ 	if (surface)
+ 		vmw_surface_unreference(&surface);
+ 
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+@@ -451,8 +451,7 @@ int vmw_overlay_ioctl(struct drm_device
+ 
+ 	ret = vmw_overlay_update_stream(dev_priv, buf, arg, true);
+ 
+-	vmw_bo_unreference(&buf);
+-	drm_gem_object_put(&buf->tbo.base);
++	vmw_user_bo_unref(buf);
+ 
+ out_unlock:
+ 	mutex_unlock(&overlay->mutex);
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+@@ -809,8 +809,7 @@ static int vmw_shader_define(struct drm_
+ 				    shader_type, num_input_sig,
+ 				    num_output_sig, tfile, shader_handle);
+ out_bad_arg:
+-	vmw_bo_unreference(&buffer);
+-	drm_gem_object_put(&buffer->tbo.base);
++	vmw_user_bo_unref(buffer);
+ 	return ret;
+ }
+ 
diff --git a/queue-6.4/drm-vmwgfx-fix-shader-stage-validation.patch b/queue-6.4/drm-vmwgfx-fix-shader-stage-validation.patch
new file mode 100644
index 00000000000..0c6e9b0511b
--- /dev/null
+++ b/queue-6.4/drm-vmwgfx-fix-shader-stage-validation.patch
@@ -0,0 +1,175 @@
+From 14abdfae508228a7307f7491b5c4215ae70c6542 Mon Sep 17 00:00:00 2001
+From: Zack Rusin <zackr@vmware.com>
+Date: Fri, 16 Jun 2023 15:09:34 -0400
+Subject: drm/vmwgfx: Fix shader stage validation
+
+From: Zack Rusin <zackr@vmware.com>
+
+commit 14abdfae508228a7307f7491b5c4215ae70c6542 upstream.
+
+For multiple commands the driver was not correctly validating the shader
+stages resulting in possible kernel oopses. The validation code was only.
+if ever, checking the upper bound on the shader stages but never a lower
+bound (valid shader stages start at 1 not 0).
+
+Fixes kernel oopses ending up in vmw_binding_add, e.g.:
+Oops: 0000 [#1] PREEMPT SMP PTI
+CPU: 1 PID: 2443 Comm: testcase Not tainted 6.3.0-rc4-vmwgfx #1
+Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020
+RIP: 0010:vmw_binding_add+0x4c/0x140 [vmwgfx]
+Code: 7e 30 49 83 ff 0e 0f 87 ea 00 00 00 4b 8d 04 7f 89 d2 89 cb 48 c1 e0 03 4c 8b b0 40 3d 93 c0 48 8b 80 48 3d 93 c0 49 0f af de <48> 03 1c d0 4c 01 e3 49 8>
+RSP: 0018:ffffb8014416b968 EFLAGS: 00010206
+RAX: ffffffffc0933ec0 RBX: 0000000000000000 RCX: 0000000000000000
+RDX: 00000000ffffffff RSI: ffffb8014416b9c0 RDI: ffffb8014316f000
+RBP: ffffb8014416b998 R08: 0000000000000003 R09: 746f6c735f726564
+R10: ffffffffaaf2bda0 R11: 732e676e69646e69 R12: ffffb8014316f000
+R13: ffffb8014416b9c0 R14: 0000000000000040 R15: 0000000000000006
+FS:  00007fba8c0af740(0000) GS:ffff8a1277c80000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00000007c0933eb8 CR3: 0000000118244001 CR4: 00000000003706e0
+Call Trace:
+ <TASK>
+ vmw_view_bindings_add+0xf5/0x1b0 [vmwgfx]
+ ? ___drm_dbg+0x8a/0xb0 [drm]
+ vmw_cmd_dx_set_shader_res+0x8f/0xc0 [vmwgfx]
+ vmw_execbuf_process+0x590/0x1360 [vmwgfx]
+ vmw_execbuf_ioctl+0x173/0x370 [vmwgfx]
+ ? __drm_dev_dbg+0xb4/0xe0 [drm]
+ ? __pfx_vmw_execbuf_ioctl+0x10/0x10 [vmwgfx]
+ drm_ioctl_kernel+0xbc/0x160 [drm]
+ drm_ioctl+0x2d2/0x580 [drm]
+ ? __pfx_vmw_execbuf_ioctl+0x10/0x10 [vmwgfx]
+ ? do_fault+0x1a6/0x420
+ vmw_generic_ioctl+0xbd/0x180 [vmwgfx]
+ vmw_unlocked_ioctl+0x19/0x20 [vmwgfx]
+ __x64_sys_ioctl+0x96/0xd0
+ do_syscall_64+0x5d/0x90
+ ? handle_mm_fault+0xe4/0x2f0
+ ? debug_smp_processor_id+0x1b/0x30
+ ? fpregs_assert_state_consistent+0x2e/0x50
+ ? exit_to_user_mode_prepare+0x40/0x180
+ ? irqentry_exit_to_user_mode+0xd/0x20
+ ? irqentry_exit+0x3f/0x50
+ ? exc_page_fault+0x8b/0x180
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+
+Signed-off-by: Zack Rusin <zackr@vmware.com>
+Cc: security@openanolis.org
+Reported-by: Ziming Zhang <ezrakiez@gmail.com>
+Testcase-found-by: Niels De Graef <ndegraef@redhat.com>
+Fixes: d80efd5cb3de ("drm/vmwgfx: Initial DX support")
+Cc: <stable@vger.kernel.org> # v4.3+
+Reviewed-by: Maaz Mombasawala<mombasawalam@vmware.com>
+Reviewed-by: Martin Krastev <krastevm@vmware.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230616190934.54828-1-zack@kde.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h     |   12 ++++++++++++
+ drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |   29 +++++++++++------------------
+ 2 files changed, 23 insertions(+), 18 deletions(-)
+
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+@@ -1513,4 +1513,16 @@ static inline bool vmw_has_fences(struct
+ 	return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0;
+ }
+ 
++static inline bool vmw_shadertype_is_valid(enum vmw_sm_type shader_model,
++					   u32 shader_type)
++{
++	SVGA3dShaderType max_allowed = SVGA3D_SHADERTYPE_PREDX_MAX;
++
++	if (shader_model >= VMW_SM_5)
++		max_allowed = SVGA3D_SHADERTYPE_MAX;
++	else if (shader_model >= VMW_SM_4)
++		max_allowed = SVGA3D_SHADERTYPE_DX10_MAX;
++	return shader_type >= SVGA3D_SHADERTYPE_MIN && shader_type < max_allowed;
++}
++
+ #endif
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+@@ -1992,7 +1992,7 @@ static int vmw_cmd_set_shader(struct vmw
+ 
+ 	cmd = container_of(header, typeof(*cmd), header);
+ 
+-	if (cmd->body.type >= SVGA3D_SHADERTYPE_PREDX_MAX) {
++	if (!vmw_shadertype_is_valid(VMW_SM_LEGACY, cmd->body.type)) {
+ 		VMW_DEBUG_USER("Illegal shader type %u.\n",
+ 			       (unsigned int) cmd->body.type);
+ 		return -EINVAL;
+@@ -2115,8 +2115,6 @@ vmw_cmd_dx_set_single_constant_buffer(st
+ 				      SVGA3dCmdHeader *header)
+ {
+ 	VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetSingleConstantBuffer);
+-	SVGA3dShaderType max_shader_num = has_sm5_context(dev_priv) ?
+-		SVGA3D_NUM_SHADERTYPE : SVGA3D_NUM_SHADERTYPE_DX10;
+ 
+ 	struct vmw_resource *res = NULL;
+ 	struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
+@@ -2133,6 +2131,14 @@ vmw_cmd_dx_set_single_constant_buffer(st
+ 	if (unlikely(ret != 0))
+ 		return ret;
+ 
++	if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type) ||
++	    cmd->body.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
++		VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
++			       (unsigned int) cmd->body.type,
++			       (unsigned int) cmd->body.slot);
++		return -EINVAL;
++	}
++
+ 	binding.bi.ctx = ctx_node->ctx;
+ 	binding.bi.res = res;
+ 	binding.bi.bt = vmw_ctx_binding_cb;
+@@ -2141,14 +2147,6 @@ vmw_cmd_dx_set_single_constant_buffer(st
+ 	binding.size = cmd->body.sizeInBytes;
+ 	binding.slot = cmd->body.slot;
+ 
+-	if (binding.shader_slot >= max_shader_num ||
+-	    binding.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
+-		VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
+-			       (unsigned int) cmd->body.type,
+-			       (unsigned int) binding.slot);
+-		return -EINVAL;
+-	}
+-
+ 	vmw_binding_add(ctx_node->staged, &binding.bi, binding.shader_slot,
+ 			binding.slot);
+ 
+@@ -2207,15 +2205,13 @@ static int vmw_cmd_dx_set_shader_res(str
+ {
+ 	VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShaderResources) =
+ 		container_of(header, typeof(*cmd), header);
+-	SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
+-		SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
+ 
+ 	u32 num_sr_view = (cmd->header.size - sizeof(cmd->body)) /
+ 		sizeof(SVGA3dShaderResourceViewId);
+ 
+ 	if ((u64) cmd->body.startView + (u64) num_sr_view >
+ 	    (u64) SVGA3D_DX_MAX_SRVIEWS ||
+-	    cmd->body.type >= max_allowed) {
++	    !vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
+ 		VMW_DEBUG_USER("Invalid shader binding.\n");
+ 		return -EINVAL;
+ 	}
+@@ -2239,8 +2235,6 @@ static int vmw_cmd_dx_set_shader(struct
+ 				 SVGA3dCmdHeader *header)
+ {
+ 	VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShader);
+-	SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
+-		SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
+ 	struct vmw_resource *res = NULL;
+ 	struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
+ 	struct vmw_ctx_bindinfo_shader binding;
+@@ -2251,8 +2245,7 @@ static int vmw_cmd_dx_set_shader(struct
+ 
+ 	cmd = container_of(header, typeof(*cmd), header);
+ 
+-	if (cmd->body.type >= max_allowed ||
+-	    cmd->body.type < SVGA3D_SHADERTYPE_MIN) {
++	if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
+ 		VMW_DEBUG_USER("Illegal shader type %u.\n",
+ 			       (unsigned int) cmd->body.type);
+ 		return -EINVAL;
diff --git a/queue-6.4/loongarch-fix-hw_breakpoint_control-for-watchpoints.patch b/queue-6.4/loongarch-fix-hw_breakpoint_control-for-watchpoints.patch
new file mode 100644
index 00000000000..d02d1225551
--- /dev/null
+++ b/queue-6.4/loongarch-fix-hw_breakpoint_control-for-watchpoints.patch
@@ -0,0 +1,34 @@
+From 9730870b484e9de852b51df08a8b357b1129489e Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Sat, 26 Aug 2023 22:21:57 +0800
+Subject: LoongArch: Fix hw_breakpoint_control() for watchpoints
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit 9730870b484e9de852b51df08a8b357b1129489e upstream.
+
+In hw_breakpoint_control(), encode_ctrl_reg() has already encoded the
+MWPnCFG3_LoadEn/MWPnCFG3_StoreEn bits in info->ctrl. We don't need to
+add (1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn) unconditionally.
+
+Otherwise we can't set read watchpoint and write watchpoint separately.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kernel/hw_breakpoint.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/loongarch/kernel/hw_breakpoint.c
++++ b/arch/loongarch/kernel/hw_breakpoint.c
+@@ -207,8 +207,7 @@ static int hw_breakpoint_control(struct
+ 			write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
+ 		} else {
+ 			ctrl = encode_ctrl_reg(info->ctrl);
+-			write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE |
+-				     1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn);
++			write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE);
+ 		}
+ 		enable = csr_read64(LOONGARCH_CSR_CRMD);
+ 		csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD);
diff --git a/queue-6.4/series b/queue-6.4/series
index 11f5bb2ff2e..97867061793 100644
--- a/queue-6.4/series
+++ b/queue-6.4/series
@@ -64,6 +64,16 @@ selftests-mm-foll_longterm-need-to-be-updated-to-0x100.patch
 mm-enable-page-walking-api-to-lock-vmas-during-the-walk.patch
 mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch
 mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch
+drm-vmwgfx-fix-shader-stage-validation.patch
+drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch
+drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch
+drm-panfrost-skip-speed-binning-on-eopnotsupp.patch
+drm-i915-dgfx-enable-d3cold-at-s2idle.patch
+drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch
+drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch
+loongarch-fix-hw_breakpoint_control-for-watchpoints.patch
+x86-fpu-invalidate-fpu-state-correctly-on-exec.patch
+x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch
 mm-add-a-call-to-flush_cache_vmap-in-vmap_pfn.patch
 mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch
 mm-multi-gen-lru-don-t-spin-during-memcg-release.patch
diff --git a/queue-6.4/x86-fpu-invalidate-fpu-state-correctly-on-exec.patch b/queue-6.4/x86-fpu-invalidate-fpu-state-correctly-on-exec.patch
new file mode 100644
index 00000000000..5cf755333c8
--- /dev/null
+++ b/queue-6.4/x86-fpu-invalidate-fpu-state-correctly-on-exec.patch
@@ -0,0 +1,135 @@
+From 1f69383b203e28cf8a4ca9570e572da1699f76cd Mon Sep 17 00:00:00 2001
+From: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Date: Fri, 18 Aug 2023 10:03:05 -0700
+Subject: x86/fpu: Invalidate FPU state correctly on exec()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rick Edgecombe <rick.p.edgecombe@intel.com>
+
+commit 1f69383b203e28cf8a4ca9570e572da1699f76cd upstream.
+
+The thread flag TIF_NEED_FPU_LOAD indicates that the FPU saved state is
+valid and should be reloaded when returning to userspace. However, the
+kernel will skip doing this if the FPU registers are already valid as
+determined by fpregs_state_valid(). The logic embedded there considers
+the state valid if two cases are both true:
+
+  1: fpu_fpregs_owner_ctx points to the current tasks FPU state
+  2: the last CPU the registers were live in was the current CPU.
+
+This is usually correct logic. A CPUâs fpu_fpregs_owner_ctx is set to
+the current FPU during the fpregs_restore_userregs() operation, so it
+indicates that the registers have been restored on this CPU. But this
+alone doesnât preclude that the task hasnât been rescheduled to a
+different CPU, where the registers were modified, and then back to the
+current CPU. To verify that this was not the case the logic relies on the
+second condition. So the assumption is that if the registers have been
+restored, AND they havenât had the chance to be modified (by being
+loaded on another CPU), then they MUST be valid on the current CPU.
+
+Besides the lazy FPU optimizations, the other cases where the FPU
+registers might not be valid are when the kernel modifies the FPU register
+state or the FPU saved buffer. In this case the operation modifying the
+FPU state needs to let the kernel know the correspondence has been
+broken. The comment in âarch/x86/kernel/fpu/context.hâ has:
+/*
+...
+ * If the FPU register state is valid, the kernel can skip restoring the
+ * FPU state from memory.
+ *
+ * Any code that clobbers the FPU registers or updates the in-memory
+ * FPU state for a task MUST let the rest of the kernel know that the
+ * FPU registers are no longer valid for this task.
+ *
+ * Either one of these invalidation functions is enough. Invalidate
+ * a resource you control: CPU if using the CPU for something else
+ * (with preemption disabled), FPU for the current task, or a task that
+ * is prevented from running by the current task.
+ */
+
+However, this is not completely true. When the kernel modifies the
+registers or saved FPU state, it can only rely on
+__fpu_invalidate_fpregs_state(), which wipes the FPUâs last_cpu
+tracking. The exec path instead relies on fpregs_deactivate(), which sets
+the CPUâs FPU context to NULL. This was observed to fail to restore the
+reset FPU state to the registers when returning to userspace in the
+following scenario:
+
+1. A task is executing in userspace on CPU0
+	- CPU0âs FPU context points to tasks
+	- fpu->last_cpu=CPU0
+
+2. The task exec()âs
+
+3. While in the kernel the task is preempted
+	- CPU0 gets a thread executing in the kernel (such that no other
+		FPU context is activated)
+	- Scheduler sets taskâs fpu->last_cpu=CPU0 when scheduling out
+
+4. Task is migrated to CPU1
+
+5. Continuing the exec(), the task gets to
+   fpu_flush_thread()->fpu_reset_fpregs()
+	- Sets CPU1âs fpu context to NULL
+	- Copies the init state to the taskâs FPU buffer
+	- Sets TIF_NEED_FPU_LOAD on the task
+
+6. The task reschedules back to CPU0 before completing the exec() and
+   returning to userspace
+	- During the reschedule, scheduler finds TIF_NEED_FPU_LOAD is set
+	- Skips saving the registers and updating taskâs fpuâlast_cpu,
+	  because TIF_NEED_FPU_LOAD is the canonical source.
+
+7. Now CPU0âs FPU context is still pointing to the taskâs, and
+   fpu->last_cpu is still CPU0. So fpregs_state_valid() returns true even
+   though the reset FPU state has not been restored.
+
+So the root cause is that exec() is doing the wrong kind of invalidate. It
+should reset fpu->last_cpu via __fpu_invalidate_fpregs_state(). Further,
+fpu__drop() doesn't really seem appropriate as the task (and FPU) are not
+going away, they are just getting reset as part of an exec. So switch to
+__fpu_invalidate_fpregs_state().
+
+Also, delete the misleading comment that says that either kind of
+invalidate will be enough, because itâs not always the case.
+
+Fixes: 33344368cb08 ("x86/fpu: Clean up the fpu__clear() variants")
+Reported-by: Lei Wang <lei4.wang@intel.com>
+Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Lijun Pan <lijun.pan@intel.com>
+Reviewed-by: Sohil Mehta <sohil.mehta@intel.com>
+Acked-by: Lijun Pan <lijun.pan@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230818170305.502891-1-rick.p.edgecombe@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/context.h |    3 +--
+ arch/x86/kernel/fpu/core.c    |    2 +-
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/fpu/context.h
++++ b/arch/x86/kernel/fpu/context.h
+@@ -19,8 +19,7 @@
+  * FPU state for a task MUST let the rest of the kernel know that the
+  * FPU registers are no longer valid for this task.
+  *
+- * Either one of these invalidation functions is enough. Invalidate
+- * a resource you control: CPU if using the CPU for something else
++ * Invalidate a resource you control: CPU if using the CPU for something else
+  * (with preemption disabled), FPU for the current task, or a task that
+  * is prevented from running by the current task.
+  */
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -679,7 +679,7 @@ static void fpu_reset_fpregs(void)
+ 	struct fpu *fpu = &current->thread.fpu;
+ 
+ 	fpregs_lock();
+-	fpu__drop(fpu);
++	__fpu_invalidate_fpregs_state(fpu);
+ 	/*
+ 	 * This does not change the actual hardware registers. It just
+ 	 * resets the memory image and sets TIF_NEED_FPU_LOAD so a
diff --git a/queue-6.4/x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch b/queue-6.4/x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch
new file mode 100644
index 00000000000..cabd006c62f
--- /dev/null
+++ b/queue-6.4/x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch
@@ -0,0 +1,61 @@
+From 2c66ca3949dc701da7f4c9407f2140ae425683a5 Mon Sep 17 00:00:00 2001
+From: Feng Tang <feng.tang@intel.com>
+Date: Wed, 23 Aug 2023 14:57:47 +0800
+Subject: x86/fpu: Set X86_FEATURE_OSXSAVE feature after enabling OSXSAVE in CR4
+
+From: Feng Tang <feng.tang@intel.com>
+
+commit 2c66ca3949dc701da7f4c9407f2140ae425683a5 upstream.
+
+0-Day found a 34.6% regression in stress-ng's 'af-alg' test case, and
+bisected it to commit b81fac906a8f ("x86/fpu: Move FPU initialization into
+arch_cpu_finalize_init()"), which optimizes the FPU init order, and moves
+the CR4_OSXSAVE enabling into a later place:
+
+   arch_cpu_finalize_init
+       identify_boot_cpu
+	   identify_cpu
+	       generic_identify
+                   get_cpu_cap --> setup cpu capability
+       ...
+       fpu__init_cpu
+           fpu__init_cpu_xstate
+               cr4_set_bits(X86_CR4_OSXSAVE);
+
+As the FPU is not yet initialized the CPU capability setup fails to set
+X86_FEATURE_OSXSAVE. Many security module like 'camellia_aesni_avx_x86_64'
+depend on this feature and therefore fail to load, causing the regression.
+
+Cure this by setting X86_FEATURE_OSXSAVE feature right after OSXSAVE
+enabling.
+
+[ tglx: Moved it into the actual BSP FPU initialization code and added a comment ]
+
+Fixes: b81fac906a8f ("x86/fpu: Move FPU initialization into arch_cpu_finalize_init()")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Signed-off-by: Feng Tang <feng.tang@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/lkml/202307192135.203ac24e-oliver.sang@intel.com
+Link: https://lore.kernel.org/lkml/20230823065747.92257-1-feng.tang@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/xstate.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -882,6 +882,13 @@ void __init fpu__init_system_xstate(unsi
+ 		goto out_disable;
+ 	}
+ 
++	/*
++	 * CPU capabilities initialization runs before FPU init. So
++	 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
++	 * functional, set the feature bit so depending code works.
++	 */
++	setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
++
+ 	print_xstate_offset_size();
+ 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
+ 		fpu_kernel_cfg.max_features,