From: Greg Kroah-Hartman Date: Sun, 27 Aug 2023 06:50:45 +0000 (+0200) Subject: 6.4-stable patches X-Git-Tag: v6.1.49~39 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5628542b8f8bb53a88b1e9d1a01709e591cde470;p=thirdparty%2Fkernel%2Fstable-queue.git 6.4-stable patches added patches: drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch drm-i915-dgfx-enable-d3cold-at-s2idle.patch drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch drm-panfrost-skip-speed-binning-on-eopnotsupp.patch drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch drm-vmwgfx-fix-shader-stage-validation.patch loongarch-fix-hw_breakpoint_control-for-watchpoints.patch x86-fpu-invalidate-fpu-state-correctly-on-exec.patch x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch --- diff --git a/queue-6.4/drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch b/queue-6.4/drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch new file mode 100644 index 00000000000..f5d10ccb72c --- /dev/null +++ b/queue-6.4/drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch @@ -0,0 +1,144 @@ +From a94e7ccfc400c024976f3c2f31689ed843498b7c Mon Sep 17 00:00:00 2001 +From: Imre Deak +Date: Tue, 22 Aug 2023 14:30:14 +0300 +Subject: drm: Add an HPD poll helper to reschedule the poll work +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Imre Deak + +commit a94e7ccfc400c024976f3c2f31689ed843498b7c upstream. + +Add a helper to reschedule drm_mode_config::output_poll_work after +polling has been enabled for a connector (and needing a reschedule, +since previously polling was disabled for all connectors and hence +output_poll_work was not running). + +This is needed by the next patch fixing HPD polling on i915. + +CC: stable@vger.kernel.org # 6.4+ +Cc: Dmitry Baryshkov +Cc: dri-devel@lists.freedesktop.org +Reviewed-by: Jouni Högander +Reviewed-by: Dmitry Baryshkov +Signed-off-by: Imre Deak +Link: https://patchwork.freedesktop.org/patch/msgid/20230822113015.41224-1-imre.deak@intel.com +(cherry picked from commit fe2352fd64029918174de4b460dfe6df0c6911cd) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/drm_probe_helper.c | 68 ++++++++++++++++++++---------- + include/drm/drm_probe_helper.h | 1 + + 2 files changed, 47 insertions(+), 22 deletions(-) + +diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c +index 2fb9bf901a2c..3f479483d7d8 100644 +--- a/drivers/gpu/drm/drm_probe_helper.c ++++ b/drivers/gpu/drm/drm_probe_helper.c +@@ -262,6 +262,26 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev) + } + + #define DRM_OUTPUT_POLL_PERIOD (10*HZ) ++static void reschedule_output_poll_work(struct drm_device *dev) ++{ ++ unsigned long delay = DRM_OUTPUT_POLL_PERIOD; ++ ++ if (dev->mode_config.delayed_event) ++ /* ++ * FIXME: ++ * ++ * Use short (1s) delay to handle the initial delayed event. ++ * This delay should not be needed, but Optimus/nouveau will ++ * fail in a mysterious way if the delayed event is handled as ++ * soon as possible like it is done in ++ * drm_helper_probe_single_connector_modes() in case the poll ++ * was enabled before. ++ */ ++ delay = HZ; ++ ++ schedule_delayed_work(&dev->mode_config.output_poll_work, delay); ++} ++ + /** + * drm_kms_helper_poll_enable - re-enable output polling. + * @dev: drm_device +@@ -279,37 +299,41 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev) + */ + void drm_kms_helper_poll_enable(struct drm_device *dev) + { +- bool poll = false; +- unsigned long delay = DRM_OUTPUT_POLL_PERIOD; +- + if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll || + dev->mode_config.poll_running) + return; + +- poll = drm_kms_helper_enable_hpd(dev); +- +- if (dev->mode_config.delayed_event) { +- /* +- * FIXME: +- * +- * Use short (1s) delay to handle the initial delayed event. +- * This delay should not be needed, but Optimus/nouveau will +- * fail in a mysterious way if the delayed event is handled as +- * soon as possible like it is done in +- * drm_helper_probe_single_connector_modes() in case the poll +- * was enabled before. +- */ +- poll = true; +- delay = HZ; +- } +- +- if (poll) +- schedule_delayed_work(&dev->mode_config.output_poll_work, delay); ++ if (drm_kms_helper_enable_hpd(dev) || ++ dev->mode_config.delayed_event) ++ reschedule_output_poll_work(dev); + + dev->mode_config.poll_running = true; + } + EXPORT_SYMBOL(drm_kms_helper_poll_enable); + ++/** ++ * drm_kms_helper_poll_reschedule - reschedule the output polling work ++ * @dev: drm_device ++ * ++ * This function reschedules the output polling work, after polling for a ++ * connector has been enabled. ++ * ++ * Drivers must call this helper after enabling polling for a connector by ++ * setting %DRM_CONNECTOR_POLL_CONNECT / %DRM_CONNECTOR_POLL_DISCONNECT flags ++ * in drm_connector::polled. Note that after disabling polling by clearing these ++ * flags for a connector will stop the output polling work automatically if ++ * the polling is disabled for all other connectors as well. ++ * ++ * The function can be called only after polling has been enabled by calling ++ * drm_kms_helper_poll_init() / drm_kms_helper_poll_enable(). ++ */ ++void drm_kms_helper_poll_reschedule(struct drm_device *dev) ++{ ++ if (dev->mode_config.poll_running) ++ reschedule_output_poll_work(dev); ++} ++EXPORT_SYMBOL(drm_kms_helper_poll_reschedule); ++ + static enum drm_connector_status + drm_helper_probe_detect_ctx(struct drm_connector *connector, bool force) + { +diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h +index 4977e0ab72db..fad3c4003b2b 100644 +--- a/include/drm/drm_probe_helper.h ++++ b/include/drm/drm_probe_helper.h +@@ -25,6 +25,7 @@ void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector); + + void drm_kms_helper_poll_disable(struct drm_device *dev); + void drm_kms_helper_poll_enable(struct drm_device *dev); ++void drm_kms_helper_poll_reschedule(struct drm_device *dev); + bool drm_kms_helper_is_poll_worker(void); + + enum drm_mode_status drm_crtc_helper_mode_valid_fixed(struct drm_crtc *crtc, +-- +2.42.0 + diff --git a/queue-6.4/drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch b/queue-6.4/drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch new file mode 100644 index 00000000000..d1d8be7d325 --- /dev/null +++ b/queue-6.4/drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch @@ -0,0 +1,37 @@ +From 5ad1ab30ac0809d2963ddcf39ac34317a24a2f17 Mon Sep 17 00:00:00 2001 +From: Ankit Nautiyal +Date: Fri, 18 Aug 2023 10:14:36 +0530 +Subject: drm/display/dp: Fix the DP DSC Receiver cap size + +From: Ankit Nautiyal + +commit 5ad1ab30ac0809d2963ddcf39ac34317a24a2f17 upstream. + +DP DSC Receiver Capabilities are exposed via DPCD 60h-6Fh. +Fix the DSC RECEIVER CAP SIZE accordingly. + +Fixes: ffddc4363c28 ("drm/dp: Add DP DSC DPCD receiver capability size define and missing SHIFT") +Cc: Anusha Srivatsa +Cc: Manasi Navare +Cc: # v5.0+ + +Signed-off-by: Ankit Nautiyal +Reviewed-by: Stanislav Lisovskiy +Signed-off-by: Jani Nikula +Link: https://patchwork.freedesktop.org/patch/msgid/20230818044436.177806-1-ankit.k.nautiyal@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + include/drm/display/drm_dp.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/drm/display/drm_dp.h ++++ b/include/drm/display/drm_dp.h +@@ -1534,7 +1534,7 @@ enum drm_dp_phy { + + #define DP_BRANCH_OUI_HEADER_SIZE 0xc + #define DP_RECEIVER_CAP_SIZE 0xf +-#define DP_DSC_RECEIVER_CAP_SIZE 0xf ++#define DP_DSC_RECEIVER_CAP_SIZE 0x10 /* DSC Capabilities 0x60 through 0x6F */ + #define EDP_PSR_RECEIVER_CAP_SIZE 2 + #define EDP_DISPLAY_CTL_CAP_SIZE 3 + #define DP_LTTPR_COMMON_CAP_SIZE 8 diff --git a/queue-6.4/drm-i915-dgfx-enable-d3cold-at-s2idle.patch b/queue-6.4/drm-i915-dgfx-enable-d3cold-at-s2idle.patch new file mode 100644 index 00000000000..4375f8b6495 --- /dev/null +++ b/queue-6.4/drm-i915-dgfx-enable-d3cold-at-s2idle.patch @@ -0,0 +1,127 @@ +From 2872144aec04baa7e43ecd2a60f7f0be3aa843fd Mon Sep 17 00:00:00 2001 +From: Anshuman Gupta +Date: Wed, 16 Aug 2023 18:22:16 +0530 +Subject: drm/i915/dgfx: Enable d3cold at s2idle + +From: Anshuman Gupta + +commit 2872144aec04baa7e43ecd2a60f7f0be3aa843fd upstream. + +System wide suspend already has support for lmem save/restore during +suspend therefore enabling d3cold for s2idle and keepng it disable for +runtime PM.(Refer below commit for d3cold runtime PM disable justification) +'commit 66eb93e71a7a ("drm/i915/dgfx: Keep PCI autosuspend control +'on' by default on all dGPU")' + +It will reduce the DG2 Card power consumption to ~0 Watt +for s2idle power KPI. + +v2: +- Added "Cc: stable@vger.kernel.org". + +Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8755 +Cc: stable@vger.kernel.org +Cc: Rodrigo Vivi +Signed-off-by: Anshuman Gupta +Reviewed-by: Rodrigo Vivi +Tested-by: Aaron Ma +Tested-by: Jianshui Yu +Link: https://patchwork.freedesktop.org/patch/msgid/20230816125216.1722002-1-anshuman.gupta@intel.com +(cherry picked from commit 2643e6d1f2a5e51877be24042d53cf956589be10) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/i915_driver.c | 33 ++++++++++++++++++--------------- + 1 file changed, 18 insertions(+), 15 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_driver.c ++++ b/drivers/gpu/drm/i915/i915_driver.c +@@ -433,7 +433,6 @@ static int i915_pcode_init(struct drm_i9 + static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) + { + struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); +- struct pci_dev *root_pdev; + int ret; + + if (i915_inject_probe_failure(dev_priv)) +@@ -547,15 +546,6 @@ static int i915_driver_hw_probe(struct d + + intel_bw_init_hw(dev_priv); + +- /* +- * FIXME: Temporary hammer to avoid freezing the machine on our DGFX +- * This should be totally removed when we handle the pci states properly +- * on runtime PM and on s2idle cases. +- */ +- root_pdev = pcie_find_root_port(pdev); +- if (root_pdev) +- pci_d3cold_disable(root_pdev); +- + return 0; + + err_opregion: +@@ -581,7 +571,6 @@ err_perf: + static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) + { + struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); +- struct pci_dev *root_pdev; + + i915_perf_fini(dev_priv); + +@@ -589,10 +578,6 @@ static void i915_driver_hw_remove(struct + + if (pdev->msi_enabled) + pci_disable_msi(pdev); +- +- root_pdev = pcie_find_root_port(pdev); +- if (root_pdev) +- pci_d3cold_enable(root_pdev); + } + + /** +@@ -1499,6 +1484,8 @@ static int intel_runtime_suspend(struct + { + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); + struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; ++ struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); ++ struct pci_dev *root_pdev; + struct intel_gt *gt; + int ret, i; + +@@ -1550,6 +1537,15 @@ static int intel_runtime_suspend(struct + drm_err(&dev_priv->drm, + "Unclaimed access detected prior to suspending\n"); + ++ /* ++ * FIXME: Temporary hammer to avoid freezing the machine on our DGFX ++ * This should be totally removed when we handle the pci states properly ++ * on runtime PM. ++ */ ++ root_pdev = pcie_find_root_port(pdev); ++ if (root_pdev) ++ pci_d3cold_disable(root_pdev); ++ + rpm->suspended = true; + + /* +@@ -1588,6 +1584,8 @@ static int intel_runtime_resume(struct d + { + struct drm_i915_private *dev_priv = kdev_to_i915(kdev); + struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; ++ struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); ++ struct pci_dev *root_pdev; + struct intel_gt *gt; + int ret, i; + +@@ -1601,6 +1599,11 @@ static int intel_runtime_resume(struct d + + intel_opregion_notify_adapter(dev_priv, PCI_D0); + rpm->suspended = false; ++ ++ root_pdev = pcie_find_root_port(pdev); ++ if (root_pdev) ++ pci_d3cold_enable(root_pdev); ++ + if (intel_uncore_unclaimed_mmio(&dev_priv->uncore)) + drm_dbg(&dev_priv->drm, + "Unclaimed access during suspend, bios?\n"); diff --git a/queue-6.4/drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch b/queue-6.4/drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch new file mode 100644 index 00000000000..b924ad3a40e --- /dev/null +++ b/queue-6.4/drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch @@ -0,0 +1,59 @@ +From 1dcc437427bbcebc8381226352f7ade08a271191 Mon Sep 17 00:00:00 2001 +From: Imre Deak +Date: Tue, 22 Aug 2023 14:30:15 +0300 +Subject: drm/i915: Fix HPD polling, reenabling the output poll work as needed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Imre Deak + +commit 1dcc437427bbcebc8381226352f7ade08a271191 upstream. + +After the commit in the Fixes: line below, HPD polling stopped working +on i915, since after that change calling drm_kms_helper_poll_enable() +doesn't restart drm_mode_config::output_poll_work if the work was +stopped (no connectors needing polling) and enabling polling for a +connector (during runtime suspend or detecting an HPD IRQ storm). + +After the above change calling drm_kms_helper_poll_enable() is a nop +after it's been called already and polling for some connectors was +disabled/re-enabled. + +Fix this by calling drm_kms_helper_poll_reschedule() added in the +previous patch instead, which reschedules the work whenever expected. + +Fixes: d33a54e3991d ("drm/probe_helper: sort out poll_running vs poll_enabled") +CC: stable@vger.kernel.org # 6.4+ +Cc: Dmitry Baryshkov +Cc: dri-devel@lists.freedesktop.org +Reviewed-by: Jouni Högander +Signed-off-by: Imre Deak +Link: https://patchwork.freedesktop.org/patch/msgid/20230822113015.41224-2-imre.deak@intel.com +(cherry picked from commit 50452f2f76852322620b63e62922b85e955abe94) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_hotplug.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/i915/display/intel_hotplug.c ++++ b/drivers/gpu/drm/i915/display/intel_hotplug.c +@@ -210,7 +210,7 @@ intel_hpd_irq_storm_switch_to_polling(st + + /* Enable polling and queue hotplug re-enabling. */ + if (hpd_disabled) { +- drm_kms_helper_poll_enable(&dev_priv->drm); ++ drm_kms_helper_poll_reschedule(&dev_priv->drm); + mod_delayed_work(system_wq, &dev_priv->display.hotplug.reenable_work, + msecs_to_jiffies(HPD_STORM_REENABLE_DELAY)); + } +@@ -644,7 +644,7 @@ static void i915_hpd_poll_init_work(stru + drm_connector_list_iter_end(&conn_iter); + + if (enabled) +- drm_kms_helper_poll_enable(&dev_priv->drm); ++ drm_kms_helper_poll_reschedule(&dev_priv->drm); + + mutex_unlock(&dev_priv->drm.mode_config.mutex); + diff --git a/queue-6.4/drm-panfrost-skip-speed-binning-on-eopnotsupp.patch b/queue-6.4/drm-panfrost-skip-speed-binning-on-eopnotsupp.patch new file mode 100644 index 00000000000..aa2eba2d02c --- /dev/null +++ b/queue-6.4/drm-panfrost-skip-speed-binning-on-eopnotsupp.patch @@ -0,0 +1,40 @@ +From f19df6e4de64b7fc6d71f192aa9ff3b701e4bade Mon Sep 17 00:00:00 2001 +From: David Michael +Date: Tue, 15 Aug 2023 21:42:41 -0400 +Subject: drm/panfrost: Skip speed binning on EOPNOTSUPP + +From: David Michael + +commit f19df6e4de64b7fc6d71f192aa9ff3b701e4bade upstream. + +Encountered on an ARM Mali-T760 MP4, attempting to read the nvmem +variable can also return EOPNOTSUPP instead of ENOENT when speed +binning is unsupported. + +Cc: +Fixes: 7d690f936e9b ("drm/panfrost: Add basic support for speed binning") +Signed-off-by: David Michael +Reviewed-by: Steven Price +Signed-off-by: Steven Price +Link: https://patchwork.freedesktop.org/patch/msgid/87msyryd7y.fsf@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/panfrost/panfrost_devfreq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c +index 58dfb15a8757..e78de99e9933 100644 +--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c ++++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c +@@ -96,7 +96,7 @@ static int panfrost_read_speedbin(struct device *dev) + * keep going without it; any other error means that we are + * supposed to read the bin value, but we failed doing so. + */ +- if (ret != -ENOENT) { ++ if (ret != -ENOENT && ret != -EOPNOTSUPP) { + DRM_DEV_ERROR(dev, "Cannot read speed-bin (%d).", ret); + return ret; + } +-- +2.42.0 + diff --git a/queue-6.4/drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch b/queue-6.4/drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch new file mode 100644 index 00000000000..41207127357 --- /dev/null +++ b/queue-6.4/drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch @@ -0,0 +1,137 @@ +From f9e96bf1905479f18e83a3a4c314a8dfa56ede2c Mon Sep 17 00:00:00 2001 +From: Zack Rusin +Date: Fri, 18 Aug 2023 00:13:01 -0400 +Subject: drm/vmwgfx: Fix possible invalid drm gem put calls + +From: Zack Rusin + +commit f9e96bf1905479f18e83a3a4c314a8dfa56ede2c upstream. + +vmw_bo_unreference sets the input buffer to null on exit, resulting in +null ptr deref's on the subsequent drm gem put calls. + +This went unnoticed because only very old userspace would be exercising +those paths but it wouldn't be hard to hit on old distros with brand +new kernels. + +Introduce a new function that abstracts unrefing of user bo's to make +the code cleaner and more explicit. + +Signed-off-by: Zack Rusin +Reported-by: Ian Forbes +Fixes: 9ef8d83e8e25 ("drm/vmwgfx: Do not drop the reference to the handle too soon") +Cc: # v6.4+ +Reviewed-by: Maaz Mombasawala +Link: https://patchwork.freedesktop.org/patch/msgid/20230818041301.407636-1-zack@kde.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 6 ++---- + drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 8 ++++++++ + drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 6 ++---- + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 6 ++---- + drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c | 3 +-- + drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 3 +-- + 6 files changed, 16 insertions(+), 16 deletions(-) + +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +@@ -497,10 +497,9 @@ static int vmw_user_bo_synccpu_release(s + if (!(flags & drm_vmw_synccpu_allow_cs)) { + atomic_dec(&vmw_bo->cpu_writers); + } +- ttm_bo_put(&vmw_bo->tbo); ++ vmw_user_bo_unref(vmw_bo); + } + +- drm_gem_object_put(&vmw_bo->tbo.base); + return ret; + } + +@@ -540,8 +539,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm + return ret; + + ret = vmw_user_bo_synccpu_grab(vbo, arg->flags); +- vmw_bo_unreference(&vbo); +- drm_gem_object_put(&vbo->tbo.base); ++ vmw_user_bo_unref(vbo); + if (unlikely(ret != 0)) { + if (ret == -ERESTARTSYS || ret == -EBUSY) + return -EBUSY; +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +@@ -195,6 +195,14 @@ static inline struct vmw_bo *vmw_bo_refe + return buf; + } + ++static inline void vmw_user_bo_unref(struct vmw_bo *vbo) ++{ ++ if (vbo) { ++ ttm_bo_put(&vbo->tbo); ++ drm_gem_object_put(&vbo->tbo.base); ++ } ++} ++ + static inline struct vmw_bo *to_vmw_bo(struct drm_gem_object *gobj) + { + return container_of((gobj), struct vmw_bo, tbo.base); +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +@@ -1164,8 +1164,7 @@ static int vmw_translate_mob_ptr(struct + } + vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB); + ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo); +- ttm_bo_put(&vmw_bo->tbo); +- drm_gem_object_put(&vmw_bo->tbo.base); ++ vmw_user_bo_unref(vmw_bo); + if (unlikely(ret != 0)) + return ret; + +@@ -1221,8 +1220,7 @@ static int vmw_translate_guest_ptr(struc + vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM, + VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM); + ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo); +- ttm_bo_put(&vmw_bo->tbo); +- drm_gem_object_put(&vmw_bo->tbo.base); ++ vmw_user_bo_unref(vmw_bo); + if (unlikely(ret != 0)) + return ret; + +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +@@ -1665,10 +1665,8 @@ static struct drm_framebuffer *vmw_kms_f + + err_out: + /* vmw_user_lookup_handle takes one ref so does new_fb */ +- if (bo) { +- vmw_bo_unreference(&bo); +- drm_gem_object_put(&bo->tbo.base); +- } ++ if (bo) ++ vmw_user_bo_unref(bo); + if (surface) + vmw_surface_unreference(&surface); + +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +@@ -451,8 +451,7 @@ int vmw_overlay_ioctl(struct drm_device + + ret = vmw_overlay_update_stream(dev_priv, buf, arg, true); + +- vmw_bo_unreference(&buf); +- drm_gem_object_put(&buf->tbo.base); ++ vmw_user_bo_unref(buf); + + out_unlock: + mutex_unlock(&overlay->mutex); +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +@@ -809,8 +809,7 @@ static int vmw_shader_define(struct drm_ + shader_type, num_input_sig, + num_output_sig, tfile, shader_handle); + out_bad_arg: +- vmw_bo_unreference(&buffer); +- drm_gem_object_put(&buffer->tbo.base); ++ vmw_user_bo_unref(buffer); + return ret; + } + diff --git a/queue-6.4/drm-vmwgfx-fix-shader-stage-validation.patch b/queue-6.4/drm-vmwgfx-fix-shader-stage-validation.patch new file mode 100644 index 00000000000..0c6e9b0511b --- /dev/null +++ b/queue-6.4/drm-vmwgfx-fix-shader-stage-validation.patch @@ -0,0 +1,175 @@ +From 14abdfae508228a7307f7491b5c4215ae70c6542 Mon Sep 17 00:00:00 2001 +From: Zack Rusin +Date: Fri, 16 Jun 2023 15:09:34 -0400 +Subject: drm/vmwgfx: Fix shader stage validation + +From: Zack Rusin + +commit 14abdfae508228a7307f7491b5c4215ae70c6542 upstream. + +For multiple commands the driver was not correctly validating the shader +stages resulting in possible kernel oopses. The validation code was only. +if ever, checking the upper bound on the shader stages but never a lower +bound (valid shader stages start at 1 not 0). + +Fixes kernel oopses ending up in vmw_binding_add, e.g.: +Oops: 0000 [#1] PREEMPT SMP PTI +CPU: 1 PID: 2443 Comm: testcase Not tainted 6.3.0-rc4-vmwgfx #1 +Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 +RIP: 0010:vmw_binding_add+0x4c/0x140 [vmwgfx] +Code: 7e 30 49 83 ff 0e 0f 87 ea 00 00 00 4b 8d 04 7f 89 d2 89 cb 48 c1 e0 03 4c 8b b0 40 3d 93 c0 48 8b 80 48 3d 93 c0 49 0f af de <48> 03 1c d0 4c 01 e3 49 8> +RSP: 0018:ffffb8014416b968 EFLAGS: 00010206 +RAX: ffffffffc0933ec0 RBX: 0000000000000000 RCX: 0000000000000000 +RDX: 00000000ffffffff RSI: ffffb8014416b9c0 RDI: ffffb8014316f000 +RBP: ffffb8014416b998 R08: 0000000000000003 R09: 746f6c735f726564 +R10: ffffffffaaf2bda0 R11: 732e676e69646e69 R12: ffffb8014316f000 +R13: ffffb8014416b9c0 R14: 0000000000000040 R15: 0000000000000006 +FS: 00007fba8c0af740(0000) GS:ffff8a1277c80000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00000007c0933eb8 CR3: 0000000118244001 CR4: 00000000003706e0 +Call Trace: + + vmw_view_bindings_add+0xf5/0x1b0 [vmwgfx] + ? ___drm_dbg+0x8a/0xb0 [drm] + vmw_cmd_dx_set_shader_res+0x8f/0xc0 [vmwgfx] + vmw_execbuf_process+0x590/0x1360 [vmwgfx] + vmw_execbuf_ioctl+0x173/0x370 [vmwgfx] + ? __drm_dev_dbg+0xb4/0xe0 [drm] + ? __pfx_vmw_execbuf_ioctl+0x10/0x10 [vmwgfx] + drm_ioctl_kernel+0xbc/0x160 [drm] + drm_ioctl+0x2d2/0x580 [drm] + ? __pfx_vmw_execbuf_ioctl+0x10/0x10 [vmwgfx] + ? do_fault+0x1a6/0x420 + vmw_generic_ioctl+0xbd/0x180 [vmwgfx] + vmw_unlocked_ioctl+0x19/0x20 [vmwgfx] + __x64_sys_ioctl+0x96/0xd0 + do_syscall_64+0x5d/0x90 + ? handle_mm_fault+0xe4/0x2f0 + ? debug_smp_processor_id+0x1b/0x30 + ? fpregs_assert_state_consistent+0x2e/0x50 + ? exit_to_user_mode_prepare+0x40/0x180 + ? irqentry_exit_to_user_mode+0xd/0x20 + ? irqentry_exit+0x3f/0x50 + ? exc_page_fault+0x8b/0x180 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +Signed-off-by: Zack Rusin +Cc: security@openanolis.org +Reported-by: Ziming Zhang +Testcase-found-by: Niels De Graef +Fixes: d80efd5cb3de ("drm/vmwgfx: Initial DX support") +Cc: # v4.3+ +Reviewed-by: Maaz Mombasawala +Reviewed-by: Martin Krastev +Link: https://patchwork.freedesktop.org/patch/msgid/20230616190934.54828-1-zack@kde.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 12 ++++++++++++ + drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 29 +++++++++++------------------ + 2 files changed, 23 insertions(+), 18 deletions(-) + +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +@@ -1513,4 +1513,16 @@ static inline bool vmw_has_fences(struct + return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0; + } + ++static inline bool vmw_shadertype_is_valid(enum vmw_sm_type shader_model, ++ u32 shader_type) ++{ ++ SVGA3dShaderType max_allowed = SVGA3D_SHADERTYPE_PREDX_MAX; ++ ++ if (shader_model >= VMW_SM_5) ++ max_allowed = SVGA3D_SHADERTYPE_MAX; ++ else if (shader_model >= VMW_SM_4) ++ max_allowed = SVGA3D_SHADERTYPE_DX10_MAX; ++ return shader_type >= SVGA3D_SHADERTYPE_MIN && shader_type < max_allowed; ++} ++ + #endif +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +@@ -1992,7 +1992,7 @@ static int vmw_cmd_set_shader(struct vmw + + cmd = container_of(header, typeof(*cmd), header); + +- if (cmd->body.type >= SVGA3D_SHADERTYPE_PREDX_MAX) { ++ if (!vmw_shadertype_is_valid(VMW_SM_LEGACY, cmd->body.type)) { + VMW_DEBUG_USER("Illegal shader type %u.\n", + (unsigned int) cmd->body.type); + return -EINVAL; +@@ -2115,8 +2115,6 @@ vmw_cmd_dx_set_single_constant_buffer(st + SVGA3dCmdHeader *header) + { + VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetSingleConstantBuffer); +- SVGA3dShaderType max_shader_num = has_sm5_context(dev_priv) ? +- SVGA3D_NUM_SHADERTYPE : SVGA3D_NUM_SHADERTYPE_DX10; + + struct vmw_resource *res = NULL; + struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context); +@@ -2133,6 +2131,14 @@ vmw_cmd_dx_set_single_constant_buffer(st + if (unlikely(ret != 0)) + return ret; + ++ if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type) || ++ cmd->body.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) { ++ VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n", ++ (unsigned int) cmd->body.type, ++ (unsigned int) cmd->body.slot); ++ return -EINVAL; ++ } ++ + binding.bi.ctx = ctx_node->ctx; + binding.bi.res = res; + binding.bi.bt = vmw_ctx_binding_cb; +@@ -2141,14 +2147,6 @@ vmw_cmd_dx_set_single_constant_buffer(st + binding.size = cmd->body.sizeInBytes; + binding.slot = cmd->body.slot; + +- if (binding.shader_slot >= max_shader_num || +- binding.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) { +- VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n", +- (unsigned int) cmd->body.type, +- (unsigned int) binding.slot); +- return -EINVAL; +- } +- + vmw_binding_add(ctx_node->staged, &binding.bi, binding.shader_slot, + binding.slot); + +@@ -2207,15 +2205,13 @@ static int vmw_cmd_dx_set_shader_res(str + { + VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShaderResources) = + container_of(header, typeof(*cmd), header); +- SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ? +- SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX; + + u32 num_sr_view = (cmd->header.size - sizeof(cmd->body)) / + sizeof(SVGA3dShaderResourceViewId); + + if ((u64) cmd->body.startView + (u64) num_sr_view > + (u64) SVGA3D_DX_MAX_SRVIEWS || +- cmd->body.type >= max_allowed) { ++ !vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) { + VMW_DEBUG_USER("Invalid shader binding.\n"); + return -EINVAL; + } +@@ -2239,8 +2235,6 @@ static int vmw_cmd_dx_set_shader(struct + SVGA3dCmdHeader *header) + { + VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShader); +- SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ? +- SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX; + struct vmw_resource *res = NULL; + struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context); + struct vmw_ctx_bindinfo_shader binding; +@@ -2251,8 +2245,7 @@ static int vmw_cmd_dx_set_shader(struct + + cmd = container_of(header, typeof(*cmd), header); + +- if (cmd->body.type >= max_allowed || +- cmd->body.type < SVGA3D_SHADERTYPE_MIN) { ++ if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) { + VMW_DEBUG_USER("Illegal shader type %u.\n", + (unsigned int) cmd->body.type); + return -EINVAL; diff --git a/queue-6.4/loongarch-fix-hw_breakpoint_control-for-watchpoints.patch b/queue-6.4/loongarch-fix-hw_breakpoint_control-for-watchpoints.patch new file mode 100644 index 00000000000..d02d1225551 --- /dev/null +++ b/queue-6.4/loongarch-fix-hw_breakpoint_control-for-watchpoints.patch @@ -0,0 +1,34 @@ +From 9730870b484e9de852b51df08a8b357b1129489e Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Sat, 26 Aug 2023 22:21:57 +0800 +Subject: LoongArch: Fix hw_breakpoint_control() for watchpoints + +From: Huacai Chen + +commit 9730870b484e9de852b51df08a8b357b1129489e upstream. + +In hw_breakpoint_control(), encode_ctrl_reg() has already encoded the +MWPnCFG3_LoadEn/MWPnCFG3_StoreEn bits in info->ctrl. We don't need to +add (1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn) unconditionally. + +Otherwise we can't set read watchpoint and write watchpoint separately. + +Cc: stable@vger.kernel.org +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kernel/hw_breakpoint.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/loongarch/kernel/hw_breakpoint.c ++++ b/arch/loongarch/kernel/hw_breakpoint.c +@@ -207,8 +207,7 @@ static int hw_breakpoint_control(struct + write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE); + } else { + ctrl = encode_ctrl_reg(info->ctrl); +- write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE | +- 1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn); ++ write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE); + } + enable = csr_read64(LOONGARCH_CSR_CRMD); + csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD); diff --git a/queue-6.4/series b/queue-6.4/series index 11f5bb2ff2e..97867061793 100644 --- a/queue-6.4/series +++ b/queue-6.4/series @@ -64,6 +64,16 @@ selftests-mm-foll_longterm-need-to-be-updated-to-0x100.patch mm-enable-page-walking-api-to-lock-vmas-during-the-walk.patch mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch +drm-vmwgfx-fix-shader-stage-validation.patch +drm-vmwgfx-fix-possible-invalid-drm-gem-put-calls.patch +drm-add-an-hpd-poll-helper-to-reschedule-the-poll-work.patch +drm-panfrost-skip-speed-binning-on-eopnotsupp.patch +drm-i915-dgfx-enable-d3cold-at-s2idle.patch +drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch +drm-i915-fix-hpd-polling-reenabling-the-output-poll-work-as-needed.patch +loongarch-fix-hw_breakpoint_control-for-watchpoints.patch +x86-fpu-invalidate-fpu-state-correctly-on-exec.patch +x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch mm-add-a-call-to-flush_cache_vmap-in-vmap_pfn.patch mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch mm-multi-gen-lru-don-t-spin-during-memcg-release.patch diff --git a/queue-6.4/x86-fpu-invalidate-fpu-state-correctly-on-exec.patch b/queue-6.4/x86-fpu-invalidate-fpu-state-correctly-on-exec.patch new file mode 100644 index 00000000000..5cf755333c8 --- /dev/null +++ b/queue-6.4/x86-fpu-invalidate-fpu-state-correctly-on-exec.patch @@ -0,0 +1,135 @@ +From 1f69383b203e28cf8a4ca9570e572da1699f76cd Mon Sep 17 00:00:00 2001 +From: Rick Edgecombe +Date: Fri, 18 Aug 2023 10:03:05 -0700 +Subject: x86/fpu: Invalidate FPU state correctly on exec() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Rick Edgecombe + +commit 1f69383b203e28cf8a4ca9570e572da1699f76cd upstream. + +The thread flag TIF_NEED_FPU_LOAD indicates that the FPU saved state is +valid and should be reloaded when returning to userspace. However, the +kernel will skip doing this if the FPU registers are already valid as +determined by fpregs_state_valid(). The logic embedded there considers +the state valid if two cases are both true: + + 1: fpu_fpregs_owner_ctx points to the current tasks FPU state + 2: the last CPU the registers were live in was the current CPU. + +This is usually correct logic. A CPU’s fpu_fpregs_owner_ctx is set to +the current FPU during the fpregs_restore_userregs() operation, so it +indicates that the registers have been restored on this CPU. But this +alone doesn’t preclude that the task hasn’t been rescheduled to a +different CPU, where the registers were modified, and then back to the +current CPU. To verify that this was not the case the logic relies on the +second condition. So the assumption is that if the registers have been +restored, AND they haven’t had the chance to be modified (by being +loaded on another CPU), then they MUST be valid on the current CPU. + +Besides the lazy FPU optimizations, the other cases where the FPU +registers might not be valid are when the kernel modifies the FPU register +state or the FPU saved buffer. In this case the operation modifying the +FPU state needs to let the kernel know the correspondence has been +broken. The comment in “arch/x86/kernel/fpu/context.h” has: +/* +... + * If the FPU register state is valid, the kernel can skip restoring the + * FPU state from memory. + * + * Any code that clobbers the FPU registers or updates the in-memory + * FPU state for a task MUST let the rest of the kernel know that the + * FPU registers are no longer valid for this task. + * + * Either one of these invalidation functions is enough. Invalidate + * a resource you control: CPU if using the CPU for something else + * (with preemption disabled), FPU for the current task, or a task that + * is prevented from running by the current task. + */ + +However, this is not completely true. When the kernel modifies the +registers or saved FPU state, it can only rely on +__fpu_invalidate_fpregs_state(), which wipes the FPU’s last_cpu +tracking. The exec path instead relies on fpregs_deactivate(), which sets +the CPU’s FPU context to NULL. This was observed to fail to restore the +reset FPU state to the registers when returning to userspace in the +following scenario: + +1. A task is executing in userspace on CPU0 + - CPU0’s FPU context points to tasks + - fpu->last_cpu=CPU0 + +2. The task exec()’s + +3. While in the kernel the task is preempted + - CPU0 gets a thread executing in the kernel (such that no other + FPU context is activated) + - Scheduler sets task’s fpu->last_cpu=CPU0 when scheduling out + +4. Task is migrated to CPU1 + +5. Continuing the exec(), the task gets to + fpu_flush_thread()->fpu_reset_fpregs() + - Sets CPU1’s fpu context to NULL + - Copies the init state to the task’s FPU buffer + - Sets TIF_NEED_FPU_LOAD on the task + +6. The task reschedules back to CPU0 before completing the exec() and + returning to userspace + - During the reschedule, scheduler finds TIF_NEED_FPU_LOAD is set + - Skips saving the registers and updating task’s fpu→last_cpu, + because TIF_NEED_FPU_LOAD is the canonical source. + +7. Now CPU0’s FPU context is still pointing to the task’s, and + fpu->last_cpu is still CPU0. So fpregs_state_valid() returns true even + though the reset FPU state has not been restored. + +So the root cause is that exec() is doing the wrong kind of invalidate. It +should reset fpu->last_cpu via __fpu_invalidate_fpregs_state(). Further, +fpu__drop() doesn't really seem appropriate as the task (and FPU) are not +going away, they are just getting reset as part of an exec. So switch to +__fpu_invalidate_fpregs_state(). + +Also, delete the misleading comment that says that either kind of +invalidate will be enough, because it’s not always the case. + +Fixes: 33344368cb08 ("x86/fpu: Clean up the fpu__clear() variants") +Reported-by: Lei Wang +Signed-off-by: Rick Edgecombe +Signed-off-by: Thomas Gleixner +Tested-by: Lijun Pan +Reviewed-by: Sohil Mehta +Acked-by: Lijun Pan +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20230818170305.502891-1-rick.p.edgecombe@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/context.h | 3 +-- + arch/x86/kernel/fpu/core.c | 2 +- + 2 files changed, 2 insertions(+), 3 deletions(-) + +--- a/arch/x86/kernel/fpu/context.h ++++ b/arch/x86/kernel/fpu/context.h +@@ -19,8 +19,7 @@ + * FPU state for a task MUST let the rest of the kernel know that the + * FPU registers are no longer valid for this task. + * +- * Either one of these invalidation functions is enough. Invalidate +- * a resource you control: CPU if using the CPU for something else ++ * Invalidate a resource you control: CPU if using the CPU for something else + * (with preemption disabled), FPU for the current task, or a task that + * is prevented from running by the current task. + */ +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -679,7 +679,7 @@ static void fpu_reset_fpregs(void) + struct fpu *fpu = ¤t->thread.fpu; + + fpregs_lock(); +- fpu__drop(fpu); ++ __fpu_invalidate_fpregs_state(fpu); + /* + * This does not change the actual hardware registers. It just + * resets the memory image and sets TIF_NEED_FPU_LOAD so a diff --git a/queue-6.4/x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch b/queue-6.4/x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch new file mode 100644 index 00000000000..cabd006c62f --- /dev/null +++ b/queue-6.4/x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch @@ -0,0 +1,61 @@ +From 2c66ca3949dc701da7f4c9407f2140ae425683a5 Mon Sep 17 00:00:00 2001 +From: Feng Tang +Date: Wed, 23 Aug 2023 14:57:47 +0800 +Subject: x86/fpu: Set X86_FEATURE_OSXSAVE feature after enabling OSXSAVE in CR4 + +From: Feng Tang + +commit 2c66ca3949dc701da7f4c9407f2140ae425683a5 upstream. + +0-Day found a 34.6% regression in stress-ng's 'af-alg' test case, and +bisected it to commit b81fac906a8f ("x86/fpu: Move FPU initialization into +arch_cpu_finalize_init()"), which optimizes the FPU init order, and moves +the CR4_OSXSAVE enabling into a later place: + + arch_cpu_finalize_init + identify_boot_cpu + identify_cpu + generic_identify + get_cpu_cap --> setup cpu capability + ... + fpu__init_cpu + fpu__init_cpu_xstate + cr4_set_bits(X86_CR4_OSXSAVE); + +As the FPU is not yet initialized the CPU capability setup fails to set +X86_FEATURE_OSXSAVE. Many security module like 'camellia_aesni_avx_x86_64' +depend on this feature and therefore fail to load, causing the regression. + +Cure this by setting X86_FEATURE_OSXSAVE feature right after OSXSAVE +enabling. + +[ tglx: Moved it into the actual BSP FPU initialization code and added a comment ] + +Fixes: b81fac906a8f ("x86/fpu: Move FPU initialization into arch_cpu_finalize_init()") +Reported-by: kernel test robot +Signed-off-by: Feng Tang +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/lkml/202307192135.203ac24e-oliver.sang@intel.com +Link: https://lore.kernel.org/lkml/20230823065747.92257-1-feng.tang@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/xstate.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -882,6 +882,13 @@ void __init fpu__init_system_xstate(unsi + goto out_disable; + } + ++ /* ++ * CPU capabilities initialization runs before FPU init. So ++ * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely ++ * functional, set the feature bit so depending code works. ++ */ ++ setup_force_cpu_cap(X86_FEATURE_OSXSAVE); ++ + print_xstate_offset_size(); + pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", + fpu_kernel_cfg.max_features,