From: Greg Kroah-Hartman Date: Mon, 20 Jan 2025 14:45:23 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v6.6.73~14 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f7859851c0f8c7e9a43d832aec51fcf13df21713;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: drm-amd-display-disable-replay-and-psr-while-vrr-is-enabled.patch drm-amd-display-do-not-elevate-mem_type-change-to-full-update.patch drm-amd-display-do-not-wait-for-psr-disable-on-vbl-enable.patch drm-amd-display-fix-psr-su-not-support-but-still-call-the-amdgpu_dm_psr_enable.patch drm-amd-display-validate-mdoe-under-mst-lct-1-case-as-well.patch drm-amdgpu-always-sync-the-gfx-pipe-on-ctx-switch.patch drm-amdgpu-disable-gfxoff-with-the-compute-workload-on-gfx12.patch drm-amdgpu-fix-fw-attestation-for-mp0_14_0_-2-3.patch drm-amdgpu-smu13-update-powersave-optimizations.patch drm-i915-fb-relax-clear-color-alignment-to-64-bytes.patch drm-xe-mark-computecs-read-mode-as-uc-on-igpu.patch drm-xe-oa-add-missing-visactl-mux-registers.patch hrtimers-handle-cpu-state-correctly-on-hotplug.patch irqchip-gic-v3-handle-cpu_pm_enter_failed-correctly.patch irqchip-gic-v3-its-don-t-enable-interrupts-in-its_irq_set_vcpu_affinity.patch irqchip-plug-a-of-node-reference-leak-in-platform_irqchip_probe.patch mm-clear-uffd-wp-pte-pmd-state-on-mremap.patch mm-kmemleak-fix-percpu-memory-leak-detection-failure.patch mm-vmscan-pgdemote-vmstat-is-not-getting-updated-when-mglru-is-enabled.patch pmdomain-imx8mp-blk-ctrl-add-missing-loop-break-condition.patch revert-drm-amd-display-enable-urgent-latency-adjustments-for-dcn35.patch selftests-mm-set-allocated-memory-to-non-zero-content-in-cow-test.patch timers-migration-enforce-group-initialization-visibility-to-tree-walkers.patch timers-migration-fix-another-race-between-hotplug-and-idle-entry-exit.patch tracing-gfp-fix-the-gfp-enum-values-shown-for-user-space-tracing-tools.patch x86-fred-fix-the-fred-rsp0-msr-out-of-sync-with-its-per-cpu-cache.patch --- diff --git a/queue-6.12/drm-amd-display-disable-replay-and-psr-while-vrr-is-enabled.patch b/queue-6.12/drm-amd-display-disable-replay-and-psr-while-vrr-is-enabled.patch new file mode 100644 index 0000000000..49873d9902 --- /dev/null +++ b/queue-6.12/drm-amd-display-disable-replay-and-psr-while-vrr-is-enabled.patch @@ -0,0 +1,82 @@ +From 67edb81d6e9af43a0d58edf74630f82cfda4155d Mon Sep 17 00:00:00 2001 +From: Tom Chung +Date: Thu, 5 Dec 2024 23:20:45 +0800 +Subject: drm/amd/display: Disable replay and psr while VRR is enabled + +From: Tom Chung + +commit 67edb81d6e9af43a0d58edf74630f82cfda4155d upstream. + +[Why] +Replay and PSR will cause some video corruption while VRR is enabled. + +[How] +1. Disable the Replay and PSR while VRR is enabled. +2. Change the amdgpu_dm_crtc_vrr_active() parameter to const. + Because the function will only read data from dm_crtc_state. + +Reviewed-by: Sun peng Li +Signed-off-by: Tom Chung +Signed-off-by: Roman Li +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +(cherry picked from commit d7879340e987b3056b8ae39db255b6c19c170a0d) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++++-- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 2 +- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h | 2 +- + 3 files changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -8889,6 +8889,7 @@ static void amdgpu_dm_enable_self_refres + struct replay_settings *pr = &acrtc_state->stream->link->replay_settings; + struct amdgpu_dm_connector *aconn = + (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context; ++ bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state); + + if (acrtc_state->update_type > UPDATE_TYPE_FAST) { + if (pr->config.replay_supported && !pr->replay_feature_enabled) +@@ -8915,7 +8916,8 @@ static void amdgpu_dm_enable_self_refres + * adequate number of fast atomic commits to notify KMD + * of update events. See `vblank_control_worker()`. + */ +- if (acrtc_attach->dm_irq_params.allow_sr_entry && ++ if (!vrr_active && ++ acrtc_attach->dm_irq_params.allow_sr_entry && + #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY + !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) && + #endif +@@ -9259,7 +9261,7 @@ static void amdgpu_dm_commit_planes(stru + bundle->stream_update.abm_level = &acrtc_state->abm_level; + + mutex_lock(&dm->dc_lock); +- if (acrtc_state->update_type > UPDATE_TYPE_FAST) { ++ if ((acrtc_state->update_type > UPDATE_TYPE_FAST) || vrr_active) { + if (acrtc_state->stream->link->replay_settings.replay_allow_active) + amdgpu_dm_replay_disable(acrtc_state->stream); + if (acrtc_state->stream->link->psr_settings.psr_allow_active) +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +@@ -93,7 +93,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struc + return rc; + } + +-bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state) ++bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state) + { + return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || + dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h +@@ -37,7 +37,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struc + + bool amdgpu_dm_crtc_vrr_active_irq(struct amdgpu_crtc *acrtc); + +-bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state); ++bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state); + + int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc); + diff --git a/queue-6.12/drm-amd-display-do-not-elevate-mem_type-change-to-full-update.patch b/queue-6.12/drm-amd-display-do-not-elevate-mem_type-change-to-full-update.patch new file mode 100644 index 0000000000..c8a276eae9 --- /dev/null +++ b/queue-6.12/drm-amd-display-do-not-elevate-mem_type-change-to-full-update.patch @@ -0,0 +1,91 @@ +From 35ca53b7b0f0ffd16c6675fd76abac9409cf83e0 Mon Sep 17 00:00:00 2001 +From: Leo Li +Date: Wed, 11 Dec 2024 12:06:24 -0500 +Subject: drm/amd/display: Do not elevate mem_type change to full update + +From: Leo Li + +commit 35ca53b7b0f0ffd16c6675fd76abac9409cf83e0 upstream. + +[Why] + +There should not be any need to revalidate bandwidth on memory placement +change, since the fb is expected to be pinned to DCN-accessable memory +before scanout. For APU it's DRAM, and DGPU, it's VRAM. However, async +flips + memory type change needs to be rejected. + +[How] + +Do not set lock_and_validation_needed on mem_type change. Instead, +reject an async_flip request if the crtc's buffer(s) changed mem_type. + +This may fix stuttering/corruption experienced with PSR SU and PSR1 +panels, if the compositor allocates fbs in both VRAM carveout and GTT +and flips between them. + +Fixes: a7c0cad0dc06 ("drm/amd/display: ensure async flips are only accepted for fast updates") +Reviewed-by: Tom Chung +Signed-off-by: Leo Li +Signed-off-by: Tom Chung +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +(cherry picked from commit 4caacd1671b7a013ad04cd8b6398f002540bdd4d) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 +++++++++++++++++----- + 1 file changed, 23 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -11370,6 +11370,25 @@ static int dm_crtc_get_cursor_mode(struc + return 0; + } + ++static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, ++ struct drm_atomic_state *state, ++ struct drm_crtc_state *crtc_state) ++{ ++ struct drm_plane *plane; ++ struct drm_plane_state *new_plane_state, *old_plane_state; ++ ++ drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) { ++ new_plane_state = drm_atomic_get_plane_state(state, plane); ++ old_plane_state = drm_atomic_get_plane_state(state, plane); ++ ++ if (old_plane_state->fb && new_plane_state->fb && ++ get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb)) ++ return true; ++ } ++ ++ return false; ++} ++ + /** + * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM. + * +@@ -11567,10 +11586,6 @@ static int amdgpu_dm_atomic_check(struct + + /* Remove exiting planes if they are modified */ + for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) { +- if (old_plane_state->fb && new_plane_state->fb && +- get_mem_type(old_plane_state->fb) != +- get_mem_type(new_plane_state->fb)) +- lock_and_validation_needed = true; + + ret = dm_update_plane_state(dc, state, plane, + old_plane_state, +@@ -11865,9 +11880,11 @@ static int amdgpu_dm_atomic_check(struct + + /* + * Only allow async flips for fast updates that don't change +- * the FB pitch, the DCC state, rotation, etc. ++ * the FB pitch, the DCC state, rotation, mem_type, etc. + */ +- if (new_crtc_state->async_flip && lock_and_validation_needed) { ++ if (new_crtc_state->async_flip && ++ (lock_and_validation_needed || ++ amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) { + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] async flips are only supported for fast updates\n", + crtc->base.id, crtc->name); diff --git a/queue-6.12/drm-amd-display-do-not-wait-for-psr-disable-on-vbl-enable.patch b/queue-6.12/drm-amd-display-do-not-wait-for-psr-disable-on-vbl-enable.patch new file mode 100644 index 0000000000..04a96ebefe --- /dev/null +++ b/queue-6.12/drm-amd-display-do-not-wait-for-psr-disable-on-vbl-enable.patch @@ -0,0 +1,215 @@ +From ff2e4d874726c549130308b6b46aa0f8a34e04cb Mon Sep 17 00:00:00 2001 +From: Leo Li +Date: Mon, 9 Dec 2024 12:58:33 -0500 +Subject: drm/amd/display: Do not wait for PSR disable on vbl enable + +From: Leo Li + +commit ff2e4d874726c549130308b6b46aa0f8a34e04cb upstream. + +[Why] + +Outside of a modeset/link configuration change, we should not have to +wait for the panel to exit PSR. Depending on the panel and it's state, +it may take multiple frames for it to exit PSR. Therefore, waiting in +all scenarios may cause perceived stuttering, especially in combination +with faster vblank shutdown. + +[How] + +PSR1 disable is hooked up to the vblank enable event, and vice versa. In +case of vblank enable, do not wait for panel to exit PSR, but still wait +in all other cases. + +We also avoid a call to unnecessarily change power_opts on disable - +this ends up sending another command to dmcub fw. + +When testing against IGT, some crc tests like kms_plane_alpha_blend and +amd_hotplug were failing due to CRC timeouts. This was found to be +caused by the early return before HW has fully exited PSR1. Fix this by +first making sure we grab a vblank reference, then waiting for panel to +exit PSR1, before programming hw for CRC generation. + +Fixes: 58a261bfc967 ("drm/amd/display: use a more lax vblank enable policy for older ASICs") +Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3743 +Reviewed-by: Tom Chung +Signed-off-by: Leo Li +Signed-off-by: Tom Chung +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +(cherry picked from commit aa6713fa2046f4c09bf3013dd1420ae15603ca6f) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 - + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 25 ++++++---- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 2 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 35 ++++++++++++-- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h | 3 - + 6 files changed, 54 insertions(+), 17 deletions(-) + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -9095,7 +9095,7 @@ static void amdgpu_dm_commit_planes(stru + acrtc_state->stream->link->psr_settings.psr_dirty_rects_change_timestamp_ns = + timestamp_ns; + if (acrtc_state->stream->link->psr_settings.psr_allow_active) +- amdgpu_dm_psr_disable(acrtc_state->stream); ++ amdgpu_dm_psr_disable(acrtc_state->stream, true); + mutex_unlock(&dm->dc_lock); + } + } +@@ -9265,7 +9265,7 @@ static void amdgpu_dm_commit_planes(stru + if (acrtc_state->stream->link->replay_settings.replay_allow_active) + amdgpu_dm_replay_disable(acrtc_state->stream); + if (acrtc_state->stream->link->psr_settings.psr_allow_active) +- amdgpu_dm_psr_disable(acrtc_state->stream); ++ amdgpu_dm_psr_disable(acrtc_state->stream, true); + } + mutex_unlock(&dm->dc_lock); + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +@@ -30,6 +30,7 @@ + #include "amdgpu_dm.h" + #include "dc.h" + #include "amdgpu_securedisplay.h" ++#include "amdgpu_dm_psr.h" + + static const char *const pipe_crc_sources[] = { + "none", +@@ -224,6 +225,10 @@ int amdgpu_dm_crtc_configure_crc_source( + + mutex_lock(&adev->dm.dc_lock); + ++ /* For PSR1, check that the panel has exited PSR */ ++ if (stream_state->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1) ++ amdgpu_dm_psr_wait_disable(stream_state); ++ + /* Enable or disable CRTC CRC generation */ + if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { + if (!dc_stream_configure_crc(stream_state->ctx->dc, +@@ -357,6 +362,17 @@ int amdgpu_dm_crtc_set_crc_source(struct + + } + ++ /* ++ * Reading the CRC requires the vblank interrupt handler to be ++ * enabled. Keep a reference until CRC capture stops. ++ */ ++ enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); ++ if (!enabled && enable) { ++ ret = drm_crtc_vblank_get(crtc); ++ if (ret) ++ goto cleanup; ++ } ++ + #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + /* Reset secure_display when we change crc source from debugfs */ + amdgpu_dm_set_crc_window_default(crtc, crtc_state->stream); +@@ -367,16 +383,7 @@ int amdgpu_dm_crtc_set_crc_source(struct + goto cleanup; + } + +- /* +- * Reading the CRC requires the vblank interrupt handler to be +- * enabled. Keep a reference until CRC capture stops. +- */ +- enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src); + if (!enabled && enable) { +- ret = drm_crtc_vblank_get(crtc); +- if (ret) +- goto cleanup; +- + if (dm_is_crc_source_dprx(source)) { + if (drm_dp_start_crc(aux, crtc)) { + DRM_DEBUG_DRIVER("dp start crc failed\n"); +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +@@ -142,7 +142,7 @@ static void amdgpu_dm_crtc_set_panel_sr_ + amdgpu_dm_replay_enable(vblank_work->stream, true); + } else if (vblank_enabled) { + if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active) +- amdgpu_dm_psr_disable(vblank_work->stream); ++ amdgpu_dm_psr_disable(vblank_work->stream, false); + } else if (link->psr_settings.psr_feature_enabled && + allow_sr_entry && !is_sr_active && !is_crc_window_active) { + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +@@ -3638,7 +3638,7 @@ static int crc_win_update_set(void *data + /* PSR may write to OTG CRC window control register, + * so close it before starting secure_display. + */ +- amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream); ++ amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream, true); + + spin_lock_irq(&adev_to_drm(adev)->event_lock); + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +@@ -201,14 +201,13 @@ void amdgpu_dm_psr_enable(struct dc_stre + * + * Return: true if success + */ +-bool amdgpu_dm_psr_disable(struct dc_stream_state *stream) ++bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait) + { +- unsigned int power_opt = 0; + bool psr_enable = false; + + DRM_DEBUG_DRIVER("Disabling psr...\n"); + +- return dc_link_set_psr_allow_active(stream->link, &psr_enable, true, false, &power_opt); ++ return dc_link_set_psr_allow_active(stream->link, &psr_enable, wait, false, NULL); + } + + /* +@@ -251,3 +250,33 @@ bool amdgpu_dm_psr_is_active_allowed(str + + return allow_active; + } ++ ++/** ++ * amdgpu_dm_psr_wait_disable() - Wait for eDP panel to exit PSR ++ * @stream: stream state attached to the eDP link ++ * ++ * Waits for a max of 500ms for the eDP panel to exit PSR. ++ * ++ * Return: true if panel exited PSR, false otherwise. ++ */ ++bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream) ++{ ++ enum dc_psr_state psr_state = PSR_STATE0; ++ struct dc_link *link = stream->link; ++ int retry_count; ++ ++ if (link == NULL) ++ return false; ++ ++ for (retry_count = 0; retry_count <= 1000; retry_count++) { ++ dc_link_get_psr_state(link, &psr_state); ++ if (psr_state == PSR_STATE0) ++ break; ++ udelay(500); ++ } ++ ++ if (retry_count == 1000) ++ return false; ++ ++ return true; ++} +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h +@@ -34,8 +34,9 @@ + void amdgpu_dm_set_psr_caps(struct dc_link *link); + void amdgpu_dm_psr_enable(struct dc_stream_state *stream); + bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream); +-bool amdgpu_dm_psr_disable(struct dc_stream_state *stream); ++bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait); + bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm); + bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm); ++bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream); + + #endif /* AMDGPU_DM_AMDGPU_DM_PSR_H_ */ diff --git a/queue-6.12/drm-amd-display-fix-psr-su-not-support-but-still-call-the-amdgpu_dm_psr_enable.patch b/queue-6.12/drm-amd-display-fix-psr-su-not-support-but-still-call-the-amdgpu_dm_psr_enable.patch new file mode 100644 index 0000000000..a77de0ad70 --- /dev/null +++ b/queue-6.12/drm-amd-display-fix-psr-su-not-support-but-still-call-the-amdgpu_dm_psr_enable.patch @@ -0,0 +1,42 @@ +From b0a3e840ad287c33a86b5515d606451b7df86ad4 Mon Sep 17 00:00:00 2001 +From: Tom Chung +Date: Thu, 5 Dec 2024 23:08:28 +0800 +Subject: drm/amd/display: Fix PSR-SU not support but still call the amdgpu_dm_psr_enable + +From: Tom Chung + +commit b0a3e840ad287c33a86b5515d606451b7df86ad4 upstream. + +[Why] +The enum DC_PSR_VERSION_SU_1 of psr_version is 1 and +DC_PSR_VERSION_UNSUPPORTED is 0xFFFFFFFF. + +The original code may has chance trigger the amdgpu_dm_psr_enable() +while psr version is set to DC_PSR_VERSION_UNSUPPORTED. + +[How] +Modify the condition to psr->psr_version == DC_PSR_VERSION_SU_1 + +Reviewed-by: Sun peng Li +Signed-off-by: Tom Chung +Signed-off-by: Roman Li +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +(cherry picked from commit f765e7ce0417f8dc38479b4b495047c397c16902) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -8922,7 +8922,7 @@ static void amdgpu_dm_enable_self_refres + (current_ts - psr->psr_dirty_rects_change_timestamp_ns) > 500000000) { + if (pr->replay_feature_enabled && !pr->replay_allow_active) + amdgpu_dm_replay_enable(acrtc_state->stream, true); +- if (psr->psr_version >= DC_PSR_VERSION_SU_1 && ++ if (psr->psr_version == DC_PSR_VERSION_SU_1 && + !psr->psr_allow_active && !aconn->disallow_edp_enter_psr) + amdgpu_dm_psr_enable(acrtc_state->stream); + } diff --git a/queue-6.12/drm-amd-display-validate-mdoe-under-mst-lct-1-case-as-well.patch b/queue-6.12/drm-amd-display-validate-mdoe-under-mst-lct-1-case-as-well.patch new file mode 100644 index 0000000000..4c372aee87 --- /dev/null +++ b/queue-6.12/drm-amd-display-validate-mdoe-under-mst-lct-1-case-as-well.patch @@ -0,0 +1,57 @@ +From b5cd418f016fb801be413fd52fe4711d2d13018c Mon Sep 17 00:00:00 2001 +From: Wayne Lin +Date: Tue, 10 Dec 2024 11:17:55 +0800 +Subject: drm/amd/display: Validate mdoe under MST LCT=1 case as well + +From: Wayne Lin + +commit b5cd418f016fb801be413fd52fe4711d2d13018c upstream. + +[Why & How] +Currently in dm_dp_mst_is_port_support_mode(), when valdidating mode +under dsc decoding at the last DP link config, we only validate the +case when there is an UFP. However, if the MSTB LCT=1, there is no +UFP. + +Under this case, use root_link_bw_in_kbps as the available bw to +compare. + +Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3720 +Fixes: fa57924c76d9 ("drm/amd/display: Refactor function dm_dp_mst_is_port_support_mode()") +Cc: Mario Limonciello +Cc: Alex Deucher +Reviewed-by: Jerry Zuo +Signed-off-by: Wayne Lin +Signed-off-by: Tom Chung +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +(cherry picked from commit a04d9534a8a75b2806c5321c387be450c364b55e) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 14 +++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +@@ -1831,11 +1831,15 @@ enum dc_status dm_dp_mst_is_port_support + if (immediate_upstream_port) { + virtual_channel_bw_in_kbps = kbps_from_pbn(immediate_upstream_port->full_pbn); + virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps); +- if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { +- DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." +- "Max dsc compression can't fit into MST available bw\n"); +- return DC_FAIL_BANDWIDTH_VALIDATE; +- } ++ } else { ++ /* For topology LCT 1 case - only one mstb*/ ++ virtual_channel_bw_in_kbps = root_link_bw_in_kbps; ++ } ++ ++ if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { ++ DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." ++ "Max dsc compression can't fit into MST available bw\n"); ++ return DC_FAIL_BANDWIDTH_VALIDATE; + } + } + diff --git a/queue-6.12/drm-amdgpu-always-sync-the-gfx-pipe-on-ctx-switch.patch b/queue-6.12/drm-amdgpu-always-sync-the-gfx-pipe-on-ctx-switch.patch new file mode 100644 index 0000000000..53e7d1461e --- /dev/null +++ b/queue-6.12/drm-amdgpu-always-sync-the-gfx-pipe-on-ctx-switch.patch @@ -0,0 +1,37 @@ +From af04b320c71c4b59971f021615876808a36e5038 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= +Date: Fri, 20 Dec 2024 16:21:11 +0100 +Subject: drm/amdgpu: always sync the GFX pipe on ctx switch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian König + +commit af04b320c71c4b59971f021615876808a36e5038 upstream. + +That is needed to enforce isolation between contexts. + +Signed-off-by: Christian König +Reviewed-by: Alex Deucher +Signed-off-by: Alex Deucher +(cherry picked from commit def59436fb0d3ca0f211d14873d0273d69ebb405) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +@@ -193,8 +193,8 @@ int amdgpu_ib_schedule(struct amdgpu_rin + need_ctx_switch = ring->current_ctx != fence_ctx; + if (ring->funcs->emit_pipeline_sync && job && + ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || +- (amdgpu_sriov_vf(adev) && need_ctx_switch) || +- amdgpu_vm_need_pipeline_sync(ring, job))) { ++ need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { ++ + need_pipe_sync = true; + + if (tmp) diff --git a/queue-6.12/drm-amdgpu-disable-gfxoff-with-the-compute-workload-on-gfx12.patch b/queue-6.12/drm-amdgpu-disable-gfxoff-with-the-compute-workload-on-gfx12.patch new file mode 100644 index 0000000000..2e187d6cec --- /dev/null +++ b/queue-6.12/drm-amdgpu-disable-gfxoff-with-the-compute-workload-on-gfx12.patch @@ -0,0 +1,41 @@ +From 90505894c4ed581318836b792c57723df491cb91 Mon Sep 17 00:00:00 2001 +From: Kenneth Feng +Date: Thu, 9 Jan 2025 15:58:23 +0800 +Subject: drm/amdgpu: disable gfxoff with the compute workload on gfx12 + +From: Kenneth Feng + +commit 90505894c4ed581318836b792c57723df491cb91 upstream. + +Disable gfxoff with the compute workload on gfx12. This is a +workaround for the opencl test failure. + +Signed-off-by: Kenneth Feng +Acked-by: Alex Deucher +Signed-off-by: Alex Deucher +(cherry picked from commit 2affe2bbc997b3920045c2c434e480c81a5f9707) +Cc: stable@vger.kernel.org # 6.12.x +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +index 3afcd1e8aa54..c4e733c2e75e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +@@ -715,8 +715,9 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, + void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) + { + enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; +- if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && +- ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { ++ if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && ++ ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) || ++ (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) { + pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); + amdgpu_gfx_off_ctrl(adev, idle); + } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && +-- +2.48.1 + diff --git a/queue-6.12/drm-amdgpu-fix-fw-attestation-for-mp0_14_0_-2-3.patch b/queue-6.12/drm-amdgpu-fix-fw-attestation-for-mp0_14_0_-2-3.patch new file mode 100644 index 0000000000..b905be8b69 --- /dev/null +++ b/queue-6.12/drm-amdgpu-fix-fw-attestation-for-mp0_14_0_-2-3.patch @@ -0,0 +1,48 @@ +From bd275e6cfc972329d39c6406a3c6d2ba2aba7db6 Mon Sep 17 00:00:00 2001 +From: Gui Chengming +Date: Tue, 7 Jan 2025 17:09:08 +0800 +Subject: drm/amdgpu: fix fw attestation for MP0_14_0_{2/3} +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Gui Chengming + +commit bd275e6cfc972329d39c6406a3c6d2ba2aba7db6 upstream. + +FW attestation was disabled on MP0_14_0_{2/3}. + +V2: +Move check into is_fw_attestation_support func. (Frank) +Remove DRM_WARN log info. (Alex) +Fix format. (Christian) + +Signed-off-by: Gui Chengming +Reviewed-by: Frank.Min +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +(cherry picked from commit 62952a38d9bcf357d5ffc97615c48b12c9cd627c) +Cc: stable@vger.kernel.org # 6.12.x +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c +index 2d4b67175b55..328a1b963548 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c +@@ -122,6 +122,10 @@ static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev) + if (adev->flags & AMD_IS_APU) + return 0; + ++ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) || ++ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3)) ++ return 0; ++ + if (adev->asic_type >= CHIP_SIENNA_CICHLID) + return 1; + +-- +2.48.1 + diff --git a/queue-6.12/drm-amdgpu-smu13-update-powersave-optimizations.patch b/queue-6.12/drm-amdgpu-smu13-update-powersave-optimizations.patch new file mode 100644 index 0000000000..0d6d9a7e82 --- /dev/null +++ b/queue-6.12/drm-amdgpu-smu13-update-powersave-optimizations.patch @@ -0,0 +1,42 @@ +From 11510e67d0bd956878ab4ffa03c45766788092c1 Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Wed, 8 Jan 2025 15:17:12 -0500 +Subject: drm/amdgpu/smu13: update powersave optimizations + +From: Alex Deucher + +commit 11510e67d0bd956878ab4ffa03c45766788092c1 upstream. + +Only apply when compute profile is selected. This is +the only supported configuration. Selecting other +profiles can lead to performane degradations. + +Reviewed-by: Kenneth Feng +Signed-off-by: Alex Deucher +(cherry picked from commit d477e39532d725b1cdb3c8005c689c74ffbf3b94) +Cc: stable@vger.kernel.org # 6.12.x +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +@@ -2549,11 +2549,12 @@ static int smu_v13_0_0_set_power_profile + &backend_workload_mask); + + /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ +- if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && +- ((smu->adev->pm.fw_version == 0x004e6601) || +- (smu->adev->pm.fw_version >= 0x004e7300))) || +- (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && +- smu->adev->pm.fw_version >= 0x00504500)) { ++ if ((workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE)) && ++ ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && ++ ((smu->adev->pm.fw_version == 0x004e6601) || ++ (smu->adev->pm.fw_version >= 0x004e7300))) || ++ (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && ++ smu->adev->pm.fw_version >= 0x00504500))) { + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + PP_SMC_POWER_PROFILE_POWERSAVING); diff --git a/queue-6.12/drm-i915-fb-relax-clear-color-alignment-to-64-bytes.patch b/queue-6.12/drm-i915-fb-relax-clear-color-alignment-to-64-bytes.patch new file mode 100644 index 0000000000..2d0b7f0cdf --- /dev/null +++ b/queue-6.12/drm-i915-fb-relax-clear-color-alignment-to-64-bytes.patch @@ -0,0 +1,62 @@ +From 1a5401ec3018c101c456cdbda2eaef9482db6786 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= +Date: Fri, 29 Nov 2024 08:50:11 +0200 +Subject: drm/i915/fb: Relax clear color alignment to 64 bytes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ville Syrjälä + +commit 1a5401ec3018c101c456cdbda2eaef9482db6786 upstream. + +Mesa changed its clear color alignment from 4k to 64 bytes +without informing the kernel side about the change. This +is now likely to cause framebuffer creation to fail. + +The only thing we do with the clear color buffer in i915 is: +1. map a single page +2. read out bytes 16-23 from said page +3. unmap the page + +So the only requirement we really have is that those 8 bytes +are all contained within one page. Thus we can deal with the +Mesa regression by reducing the alignment requiment from 4k +to the same 64 bytes in the kernel. We could even go as low as +32 bytes, but IIRC 64 bytes is the hardware requirement on +the 3D engine side so matching that seems sensible. + +Note that the Mesa alignment chages were partially undone +so the regression itself was already fixed on userspace +side. + +Cc: stable@vger.kernel.org +Cc: Sagar Ghuge +Cc: Nanley Chery +Reported-by: Xi Ruoyao +Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13057 +Closes: https://lore.kernel.org/all/45a5bba8de009347262d86a4acb27169d9ae0d9f.camel@xry111.site/ +Link: https://gitlab.freedesktop.org/mesa/mesa/-/commit/17f97a69c13832a6c1b0b3aad45b06f07d4b852f +Link: https://gitlab.freedesktop.org/mesa/mesa/-/commit/888f63cf1baf34bc95e847a30a041dc7798edddb +Signed-off-by: Ville Syrjälä +Link: https://patchwork.freedesktop.org/patch/msgid/20241129065014.8363-2-ville.syrjala@linux.intel.com +Tested-by: Xi Ruoyao +Reviewed-by: José Roberto de Souza +(cherry picked from commit ed3a892e5e3d6b3f6eeb76db7c92a968aeb52f3d) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_fb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/i915/display/intel_fb.c ++++ b/drivers/gpu/drm/i915/display/intel_fb.c +@@ -1613,7 +1613,7 @@ int intel_fill_fb_info(struct drm_i915_p + * arithmetic related to alignment and offset calculation. + */ + if (is_gen12_ccs_cc_plane(&fb->base, i)) { +- if (IS_ALIGNED(fb->base.offsets[i], PAGE_SIZE)) ++ if (IS_ALIGNED(fb->base.offsets[i], 64)) + continue; + else + return -EINVAL; diff --git a/queue-6.12/drm-xe-mark-computecs-read-mode-as-uc-on-igpu.patch b/queue-6.12/drm-xe-mark-computecs-read-mode-as-uc-on-igpu.patch new file mode 100644 index 0000000000..cf8bc9b069 --- /dev/null +++ b/queue-6.12/drm-xe-mark-computecs-read-mode-as-uc-on-igpu.patch @@ -0,0 +1,48 @@ +From b1231ff7ea0689d04040a44864c265bc11612fa8 Mon Sep 17 00:00:00 2001 +From: Matthew Brost +Date: Mon, 13 Jan 2025 16:25:07 -0800 +Subject: drm/xe: Mark ComputeCS read mode as UC on iGPU +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Matthew Brost + +commit b1231ff7ea0689d04040a44864c265bc11612fa8 upstream. + +RING_CMD_CCTL read index should be UC on iGPU parts due to L3 caching +structure. Having this as WB blocks ULLS from being enabled. Change to +UC to unblock ULLS on iGPU. + +v2: + - Drop internal communications commnet, bspec is updated + +Cc: Balasubramani Vivekanandan +Cc: Michal Mrozek +Cc: Paulo Zanoni +Cc: José Roberto de Souza +Cc: stable@vger.kernel.org +Fixes: 328e089bfb37 ("drm/xe: Leverage ComputeCS read L3 caching") +Signed-off-by: Matthew Brost +Acked-by: Michal Mrozek +Reviewed-by: Stuart Summers +Reviewed-by: Matt Roper +Link: https://patchwork.freedesktop.org/patch/msgid/20250114002507.114087-1-matthew.brost@intel.com +(cherry picked from commit 758debf35b9cda5450e40996991a6e4b222899bd) +Signed-off-by: Thomas Hellström +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/xe/xe_hw_engine.c ++++ b/drivers/gpu/drm/xe/xe_hw_engine.c +@@ -417,7 +417,7 @@ hw_engine_setup_default_state(struct xe_ + * Bspec: 72161 + */ + const u8 mocs_write_idx = gt->mocs.uc_index; +- const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && ++ const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) && + (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? + gt->mocs.wb_index : gt->mocs.uc_index; + u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | diff --git a/queue-6.12/drm-xe-oa-add-missing-visactl-mux-registers.patch b/queue-6.12/drm-xe-oa-add-missing-visactl-mux-registers.patch new file mode 100644 index 0000000000..295c6f6da1 --- /dev/null +++ b/queue-6.12/drm-xe-oa-add-missing-visactl-mux-registers.patch @@ -0,0 +1,37 @@ +From 79a21fc921d7aafaf69d00b4938435b81bf66022 Mon Sep 17 00:00:00 2001 +From: Ashutosh Dixit +Date: Fri, 10 Jan 2025 18:15:39 -0800 +Subject: drm/xe/oa: Add missing VISACTL mux registers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ashutosh Dixit + +commit 79a21fc921d7aafaf69d00b4938435b81bf66022 upstream. + +Add missing VISACTL mux registers required for some OA +config's (e.g. RenderPipeCtrl). + +Fixes: cdf02fe1a94a ("drm/xe/oa/uapi: Add/remove OA config perf ops") +Cc: stable@vger.kernel.org +Signed-off-by: Ashutosh Dixit +Reviewed-by: Umesh Nerlige Ramappa +Link: https://patchwork.freedesktop.org/patch/msgid/20250111021539.2920346-1-ashutosh.dixit@intel.com +(cherry picked from commit c26f22dac3449d8a687237cdfc59a6445eb8f75a) +Signed-off-by: Thomas Hellström +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/xe_oa.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/xe/xe_oa.c ++++ b/drivers/gpu/drm/xe/xe_oa.c +@@ -1980,6 +1980,7 @@ static const struct xe_mmio_range xe2_oa + { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ + { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ ++ { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ + { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ + { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ + { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ diff --git a/queue-6.12/hrtimers-handle-cpu-state-correctly-on-hotplug.patch b/queue-6.12/hrtimers-handle-cpu-state-correctly-on-hotplug.patch new file mode 100644 index 0000000000..7d800ff69c --- /dev/null +++ b/queue-6.12/hrtimers-handle-cpu-state-correctly-on-hotplug.patch @@ -0,0 +1,101 @@ +From 2f8dea1692eef2b7ba6a256246ed82c365fdc686 Mon Sep 17 00:00:00 2001 +From: Koichiro Den +Date: Fri, 20 Dec 2024 22:44:21 +0900 +Subject: hrtimers: Handle CPU state correctly on hotplug + +From: Koichiro Den + +commit 2f8dea1692eef2b7ba6a256246ed82c365fdc686 upstream. + +Consider a scenario where a CPU transitions from CPUHP_ONLINE to halfway +through a CPU hotunplug down to CPUHP_HRTIMERS_PREPARE, and then back to +CPUHP_ONLINE: + +Since hrtimers_prepare_cpu() does not run, cpu_base.hres_active remains set +to 1 throughout. However, during a CPU unplug operation, the tick and the +clockevents are shut down at CPUHP_AP_TICK_DYING. On return to the online +state, for instance CFS incorrectly assumes that the hrtick is already +active, and the chance of the clockevent device to transition to oneshot +mode is also lost forever for the CPU, unless it goes back to a lower state +than CPUHP_HRTIMERS_PREPARE once. + +This round-trip reveals another issue; cpu_base.online is not set to 1 +after the transition, which appears as a WARN_ON_ONCE in enqueue_hrtimer(). + +Aside of that, the bulk of the per CPU state is not reset either, which +means there are dangling pointers in the worst case. + +Address this by adding a corresponding startup() callback, which resets the +stale per CPU state and sets the online flag. + +[ tglx: Make the new callback unconditionally available, remove the online + modification in the prepare() callback and clear the remaining + state in the starting callback instead of the prepare callback ] + +Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") +Signed-off-by: Koichiro Den +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20241220134421.3809834-1-koichiro.den@canonical.com +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/hrtimer.h | 1 + + kernel/cpu.c | 2 +- + kernel/time/hrtimer.c | 11 ++++++++++- + 3 files changed, 12 insertions(+), 2 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -379,6 +379,7 @@ extern void __init hrtimers_init(void); + extern void sysrq_timer_list_show(void); + + int hrtimers_prepare_cpu(unsigned int cpu); ++int hrtimers_cpu_starting(unsigned int cpu); + #ifdef CONFIG_HOTPLUG_CPU + int hrtimers_cpu_dying(unsigned int cpu); + #else +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -2179,7 +2179,7 @@ static struct cpuhp_step cpuhp_hp_states + }, + [CPUHP_AP_HRTIMERS_DYING] = { + .name = "hrtimers:dying", +- .startup.single = NULL, ++ .startup.single = hrtimers_cpu_starting, + .teardown.single = hrtimers_cpu_dying, + }, + [CPUHP_AP_TICK_DYING] = { +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -2156,6 +2156,15 @@ int hrtimers_prepare_cpu(unsigned int cp + } + + cpu_base->cpu = cpu; ++ hrtimer_cpu_base_init_expiry_lock(cpu_base); ++ return 0; ++} ++ ++int hrtimers_cpu_starting(unsigned int cpu) ++{ ++ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ++ ++ /* Clear out any left over state from a CPU down operation */ + cpu_base->active_bases = 0; + cpu_base->hres_active = 0; + cpu_base->hang_detected = 0; +@@ -2164,7 +2173,6 @@ int hrtimers_prepare_cpu(unsigned int cp + cpu_base->expires_next = KTIME_MAX; + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->online = 1; +- hrtimer_cpu_base_init_expiry_lock(cpu_base); + return 0; + } + +@@ -2240,6 +2248,7 @@ int hrtimers_cpu_dying(unsigned int dyin + void __init hrtimers_init(void) + { + hrtimers_prepare_cpu(smp_processor_id()); ++ hrtimers_cpu_starting(smp_processor_id()); + open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); + } + diff --git a/queue-6.12/irqchip-gic-v3-handle-cpu_pm_enter_failed-correctly.patch b/queue-6.12/irqchip-gic-v3-handle-cpu_pm_enter_failed-correctly.patch new file mode 100644 index 0000000000..116f953c0d --- /dev/null +++ b/queue-6.12/irqchip-gic-v3-handle-cpu_pm_enter_failed-correctly.patch @@ -0,0 +1,46 @@ +From 0d62a49ab55c99e8deb4593b8d9f923de1ab5c18 Mon Sep 17 00:00:00 2001 +From: Yogesh Lal +Date: Fri, 20 Dec 2024 15:09:07 +0530 +Subject: irqchip/gic-v3: Handle CPU_PM_ENTER_FAILED correctly + +From: Yogesh Lal + +commit 0d62a49ab55c99e8deb4593b8d9f923de1ab5c18 upstream. + +When a CPU attempts to enter low power mode, it disables the redistributor +and Group 1 interrupts and reinitializes the system registers upon wakeup. + +If the transition into low power mode fails, then the CPU_PM framework +invokes the PM notifier callback with CPU_PM_ENTER_FAILED to allow the +drivers to undo the state changes. + +The GIC V3 driver ignores CPU_PM_ENTER_FAILED, which leaves the GIC in +disabled state. + +Handle CPU_PM_ENTER_FAILED in the same way as CPU_PM_EXIT to restore normal +operation. + +[ tglx: Massage change log, add Fixes tag ] + +Fixes: 3708d52fc6bb ("irqchip: gic-v3: Implement CPU PM notifier") +Signed-off-by: Yogesh Lal +Signed-off-by: Thomas Gleixner +Acked-by: Marc Zyngier +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20241220093907.2747601-1-quic_ylal@quicinc.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-gic-v3.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/irqchip/irq-gic-v3.c ++++ b/drivers/irqchip/irq-gic-v3.c +@@ -1522,7 +1522,7 @@ static int gic_retrigger(struct irq_data + static int gic_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) + { +- if (cmd == CPU_PM_EXIT) { ++ if (cmd == CPU_PM_EXIT || cmd == CPU_PM_ENTER_FAILED) { + if (gic_dist_security_disabled()) + gic_enable_redist(true); + gic_cpu_sys_reg_enable(); diff --git a/queue-6.12/irqchip-gic-v3-its-don-t-enable-interrupts-in-its_irq_set_vcpu_affinity.patch b/queue-6.12/irqchip-gic-v3-its-don-t-enable-interrupts-in-its_irq_set_vcpu_affinity.patch new file mode 100644 index 0000000000..7d327524d0 --- /dev/null +++ b/queue-6.12/irqchip-gic-v3-its-don-t-enable-interrupts-in-its_irq_set_vcpu_affinity.patch @@ -0,0 +1,48 @@ +From 35cb2c6ce7da545f3b5cb1e6473ad7c3a6f08310 Mon Sep 17 00:00:00 2001 +From: Tomas Krcka +Date: Mon, 30 Dec 2024 15:08:25 +0000 +Subject: irqchip/gic-v3-its: Don't enable interrupts in its_irq_set_vcpu_affinity() + +From: Tomas Krcka + +commit 35cb2c6ce7da545f3b5cb1e6473ad7c3a6f08310 upstream. + +The following call-chain leads to enabling interrupts in a nested interrupt +disabled section: + +irq_set_vcpu_affinity() + irq_get_desc_lock() + raw_spin_lock_irqsave() <--- Disable interrupts + its_irq_set_vcpu_affinity() + guard(raw_spinlock_irq) <--- Enables interrupts when leaving the guard() + irq_put_desc_unlock() <--- Warns because interrupts are enabled + +This was broken in commit b97e8a2f7130, which replaced the original +raw_spin_[un]lock() pair with guard(raw_spinlock_irq). + +Fix the issue by using guard(raw_spinlock). + +[ tglx: Massaged change log ] + +Fixes: b97e8a2f7130 ("irqchip/gic-v3-its: Fix potential race condition in its_vlpi_prop_update()") +Signed-off-by: Tomas Krcka +Signed-off-by: Thomas Gleixner +Reviewed-by: Marc Zyngier +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20241230150825.62894-1-krckatom@amazon.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-gic-v3-its.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/irqchip/irq-gic-v3-its.c ++++ b/drivers/irqchip/irq-gic-v3-its.c +@@ -1961,7 +1961,7 @@ static int its_irq_set_vcpu_affinity(str + if (!is_v4(its_dev->its)) + return -EINVAL; + +- guard(raw_spinlock_irq)(&its_dev->event_map.vlpi_lock); ++ guard(raw_spinlock)(&its_dev->event_map.vlpi_lock); + + /* Unmap request? */ + if (!info) diff --git a/queue-6.12/irqchip-plug-a-of-node-reference-leak-in-platform_irqchip_probe.patch b/queue-6.12/irqchip-plug-a-of-node-reference-leak-in-platform_irqchip_probe.patch new file mode 100644 index 0000000000..f5dd81e998 --- /dev/null +++ b/queue-6.12/irqchip-plug-a-of-node-reference-leak-in-platform_irqchip_probe.patch @@ -0,0 +1,48 @@ +From 9322d1915f9d976ee48c09d800fbd5169bc2ddcc Mon Sep 17 00:00:00 2001 +From: Joe Hattori +Date: Sun, 15 Dec 2024 12:39:45 +0900 +Subject: irqchip: Plug a OF node reference leak in platform_irqchip_probe() + +From: Joe Hattori + +commit 9322d1915f9d976ee48c09d800fbd5169bc2ddcc upstream. + +platform_irqchip_probe() leaks a OF node when irq_init_cb() fails. Fix it +by declaring par_np with the __free(device_node) cleanup construct. + +This bug was found by an experimental static analysis tool that I am +developing. + +Fixes: f8410e626569 ("irqchip: Add IRQCHIP_PLATFORM_DRIVER_BEGIN/END and IRQCHIP_MATCH helper macros") +Signed-off-by: Joe Hattori +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20241215033945.3414223-1-joe@pf.is.s.u-tokyo.ac.jp +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irqchip.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/irqchip/irqchip.c ++++ b/drivers/irqchip/irqchip.c +@@ -35,11 +35,10 @@ void __init irqchip_init(void) + int platform_irqchip_probe(struct platform_device *pdev) + { + struct device_node *np = pdev->dev.of_node; +- struct device_node *par_np = of_irq_find_parent(np); ++ struct device_node *par_np __free(device_node) = of_irq_find_parent(np); + of_irq_init_cb_t irq_init_cb = of_device_get_match_data(&pdev->dev); + + if (!irq_init_cb) { +- of_node_put(par_np); + return -EINVAL; + } + +@@ -55,7 +54,6 @@ int platform_irqchip_probe(struct platfo + * interrupt controller can check for specific domains as necessary. + */ + if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY)) { +- of_node_put(par_np); + return -EPROBE_DEFER; + } + diff --git a/queue-6.12/mm-clear-uffd-wp-pte-pmd-state-on-mremap.patch b/queue-6.12/mm-clear-uffd-wp-pte-pmd-state-on-mremap.patch new file mode 100644 index 0000000000..ec16d33b03 --- /dev/null +++ b/queue-6.12/mm-clear-uffd-wp-pte-pmd-state-on-mremap.patch @@ -0,0 +1,199 @@ +From 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3 Mon Sep 17 00:00:00 2001 +From: Ryan Roberts +Date: Tue, 7 Jan 2025 14:47:52 +0000 +Subject: mm: clear uffd-wp PTE/PMD state on mremap() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ryan Roberts + +commit 0cef0bb836e3cfe00f08f9606c72abd72fe78ca3 upstream. + +When mremap()ing a memory region previously registered with userfaultfd as +write-protected but without UFFD_FEATURE_EVENT_REMAP, an inconsistency in +flag clearing leads to a mismatch between the vma flags (which have +uffd-wp cleared) and the pte/pmd flags (which do not have uffd-wp +cleared). This mismatch causes a subsequent mprotect(PROT_WRITE) to +trigger a warning in page_table_check_pte_flags() due to setting the pte +to writable while uffd-wp is still set. + +Fix this by always explicitly clearing the uffd-wp pte/pmd flags on any +such mremap() so that the values are consistent with the existing clearing +of VM_UFFD_WP. Be careful to clear the logical flag regardless of its +physical form; a PTE bit, a swap PTE bit, or a PTE marker. Cover PTE, +huge PMD and hugetlb paths. + +Link: https://lkml.kernel.org/r/20250107144755.1871363-2-ryan.roberts@arm.com +Co-developed-by: Mikołaj Lenczewski +Signed-off-by: Mikołaj Lenczewski +Signed-off-by: Ryan Roberts +Closes: https://lore.kernel.org/linux-mm/810b44a8-d2ae-4107-b665-5a42eae2d948@arm.com/ +Fixes: 63b2d4174c4a ("userfaultfd: wp: add the writeprotect API to userfaultfd ioctl") +Cc: David Hildenbrand +Cc: Jann Horn +Cc: Liam R. Howlett +Cc: Lorenzo Stoakes +Cc: Mark Rutland +Cc: Muchun Song +Cc: Peter Xu +Cc: Shuah Khan +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/userfaultfd_k.h | 12 ++++++++++++ + mm/huge_memory.c | 12 ++++++++++++ + mm/hugetlb.c | 14 +++++++++++++- + mm/mremap.c | 32 +++++++++++++++++++++++++++++++- + 4 files changed, 68 insertions(+), 2 deletions(-) + +--- a/include/linux/userfaultfd_k.h ++++ b/include/linux/userfaultfd_k.h +@@ -247,6 +247,13 @@ static inline bool vma_can_userfault(str + vma_is_shmem(vma); + } + ++static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) ++{ ++ struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx; ++ ++ return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0; ++} ++ + extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); + extern void dup_userfaultfd_complete(struct list_head *); + void dup_userfaultfd_fail(struct list_head *); +@@ -401,6 +408,11 @@ static inline bool userfaultfd_wp_async( + { + return false; + } ++ ++static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) ++{ ++ return false; ++} + + #endif /* CONFIG_USERFAULTFD */ + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2132,6 +2132,16 @@ static pmd_t move_soft_dirty_pmd(pmd_t p + return pmd; + } + ++static pmd_t clear_uffd_wp_pmd(pmd_t pmd) ++{ ++ if (pmd_present(pmd)) ++ pmd = pmd_clear_uffd_wp(pmd); ++ else if (is_swap_pmd(pmd)) ++ pmd = pmd_swp_clear_uffd_wp(pmd); ++ ++ return pmd; ++} ++ + bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, + unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) + { +@@ -2170,6 +2180,8 @@ bool move_huge_pmd(struct vm_area_struct + pgtable_trans_huge_deposit(mm, new_pmd, pgtable); + } + pmd = move_soft_dirty_pmd(pmd); ++ if (vma_has_uffd_without_event_remap(vma)) ++ pmd = clear_uffd_wp_pmd(pmd); + set_pmd_at(mm, new_addr, new_pmd, pmd); + if (force_flush) + flush_pmd_tlb_range(vma, old_addr, old_addr + PMD_SIZE); +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5395,6 +5395,7 @@ static void move_huge_pte(struct vm_area + unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte, + unsigned long sz) + { ++ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma); + struct hstate *h = hstate_vma(vma); + struct mm_struct *mm = vma->vm_mm; + spinlock_t *src_ptl, *dst_ptl; +@@ -5411,7 +5412,18 @@ static void move_huge_pte(struct vm_area + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); + + pte = huge_ptep_get_and_clear(mm, old_addr, src_pte); +- set_huge_pte_at(mm, new_addr, dst_pte, pte, sz); ++ ++ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) ++ huge_pte_clear(mm, new_addr, dst_pte, sz); ++ else { ++ if (need_clear_uffd_wp) { ++ if (pte_present(pte)) ++ pte = huge_pte_clear_uffd_wp(pte); ++ else if (is_swap_pte(pte)) ++ pte = pte_swp_clear_uffd_wp(pte); ++ } ++ set_huge_pte_at(mm, new_addr, dst_pte, pte, sz); ++ } + + if (src_ptl != dst_ptl) + spin_unlock(src_ptl); +--- a/mm/mremap.c ++++ b/mm/mremap.c +@@ -138,6 +138,7 @@ static int move_ptes(struct vm_area_stru + struct vm_area_struct *new_vma, pmd_t *new_pmd, + unsigned long new_addr, bool need_rmap_locks) + { ++ bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma); + struct mm_struct *mm = vma->vm_mm; + pte_t *old_pte, *new_pte, pte; + spinlock_t *old_ptl, *new_ptl; +@@ -207,7 +208,18 @@ static int move_ptes(struct vm_area_stru + force_flush = true; + pte = move_pte(pte, old_addr, new_addr); + pte = move_soft_dirty_pte(pte); +- set_pte_at(mm, new_addr, new_pte, pte); ++ ++ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) ++ pte_clear(mm, new_addr, new_pte); ++ else { ++ if (need_clear_uffd_wp) { ++ if (pte_present(pte)) ++ pte = pte_clear_uffd_wp(pte); ++ else if (is_swap_pte(pte)) ++ pte = pte_swp_clear_uffd_wp(pte); ++ } ++ set_pte_at(mm, new_addr, new_pte, pte); ++ } + } + + arch_leave_lazy_mmu_mode(); +@@ -269,6 +281,15 @@ static bool move_normal_pmd(struct vm_ar + if (WARN_ON_ONCE(!pmd_none(*new_pmd))) + return false; + ++ /* If this pmd belongs to a uffd vma with remap events disabled, we need ++ * to ensure that the uffd-wp state is cleared from all pgtables. This ++ * means recursing into lower page tables in move_page_tables(), and we ++ * can reuse the existing code if we simply treat the entry as "not ++ * moved". ++ */ ++ if (vma_has_uffd_without_event_remap(vma)) ++ return false; ++ + /* + * We don't have to worry about the ordering of src and dst + * ptlocks because exclusive mmap_lock prevents deadlock. +@@ -324,6 +345,15 @@ static bool move_normal_pud(struct vm_ar + if (WARN_ON_ONCE(!pud_none(*new_pud))) + return false; + ++ /* If this pud belongs to a uffd vma with remap events disabled, we need ++ * to ensure that the uffd-wp state is cleared from all pgtables. This ++ * means recursing into lower page tables in move_page_tables(), and we ++ * can reuse the existing code if we simply treat the entry as "not ++ * moved". ++ */ ++ if (vma_has_uffd_without_event_remap(vma)) ++ return false; ++ + /* + * We don't have to worry about the ordering of src and dst + * ptlocks because exclusive mmap_lock prevents deadlock. diff --git a/queue-6.12/mm-kmemleak-fix-percpu-memory-leak-detection-failure.patch b/queue-6.12/mm-kmemleak-fix-percpu-memory-leak-detection-failure.patch new file mode 100644 index 0000000000..4c767586f5 --- /dev/null +++ b/queue-6.12/mm-kmemleak-fix-percpu-memory-leak-detection-failure.patch @@ -0,0 +1,36 @@ +From 76d5d4c53e68719c018691b19a961e78524a155c Mon Sep 17 00:00:00 2001 +From: Guo Weikang +Date: Fri, 27 Dec 2024 17:23:10 +0800 +Subject: mm/kmemleak: fix percpu memory leak detection failure + +From: Guo Weikang + +commit 76d5d4c53e68719c018691b19a961e78524a155c upstream. + +kmemleak_alloc_percpu gives an incorrect min_count parameter, causing +percpu memory to be considered a gray object. + +Link: https://lkml.kernel.org/r/20241227092311.3572500-1-guoweikang.kernel@gmail.com +Fixes: 8c8685928910 ("mm/kmemleak: use IS_ERR_PCPU() for pointer in the percpu address space") +Signed-off-by: Guo Weikang +Acked-by: Uros Bizjak +Acked-by: Catalin Marinas +Cc: Guo Weikang +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/kmemleak.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/kmemleak.c ++++ b/mm/kmemleak.c +@@ -1071,7 +1071,7 @@ void __ref kmemleak_alloc_percpu(const v + pr_debug("%s(0x%px, %zu)\n", __func__, ptr, size); + + if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr)) +- create_object_percpu((__force unsigned long)ptr, size, 0, gfp); ++ create_object_percpu((__force unsigned long)ptr, size, 1, gfp); + } + EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu); + diff --git a/queue-6.12/mm-vmscan-pgdemote-vmstat-is-not-getting-updated-when-mglru-is-enabled.patch b/queue-6.12/mm-vmscan-pgdemote-vmstat-is-not-getting-updated-when-mglru-is-enabled.patch new file mode 100644 index 0000000000..f8c3c0bcba --- /dev/null +++ b/queue-6.12/mm-vmscan-pgdemote-vmstat-is-not-getting-updated-when-mglru-is-enabled.patch @@ -0,0 +1,69 @@ +From bd3d56ffa2c450364acf02663ba88996da37079d Mon Sep 17 00:00:00 2001 +From: Donet Tom +Date: Thu, 9 Jan 2025 00:05:39 -0600 +Subject: mm: vmscan : pgdemote vmstat is not getting updated when MGLRU is enabled. + +From: Donet Tom + +commit bd3d56ffa2c450364acf02663ba88996da37079d upstream. + +When MGLRU is enabled, the pgdemote_kswapd, pgdemote_direct, and +pgdemote_khugepaged stats in vmstat are not being updated. + +Commit f77f0c751478 ("mm,memcg: provide per-cgroup counters for NUMA +balancing operations") moved the pgdemote vmstat update from +demote_folio_list() to shrink_inactive_list(), which is in the normal LRU +path. As a result, the pgdemote stats are updated correctly for the +normal LRU but not for MGLRU. + +To address this, we have added the pgdemote stat update in the +evict_folios() function, which is in the MGLRU path. With this patch, the +pgdemote stats will now be updated correctly when MGLRU is enabled. + +Without this patch vmstat output when MGLRU is enabled +====================================================== +pgdemote_kswapd 0 +pgdemote_direct 0 +pgdemote_khugepaged 0 + +With this patch vmstat output when MGLRU is enabled +=================================================== +pgdemote_kswapd 43234 +pgdemote_direct 4691 +pgdemote_khugepaged 0 + +Link: https://lkml.kernel.org/r/20250109060540.451261-1-donettom@linux.ibm.com +Fixes: f77f0c751478 ("mm,memcg: provide per-cgroup counters for NUMA balancing operations") +Signed-off-by: Donet Tom +Acked-by: Yu Zhao +Tested-by: Li Zhijian +Reviewed-by: Li Zhijian +Cc: Aneesh Kumar K.V (Arm) +Cc: David Rientjes +Cc: Johannes Weiner +Cc: Kaiyang Zhao +Cc: Michal Hocko +Cc: Muchun Song +Cc: Ritesh Harjani (IBM) +Cc: Roman Gushchin +Cc: Shakeel Butt +Cc: Wei Xu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmscan.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4637,6 +4637,9 @@ retry: + reset_batch_size(walk); + } + ++ __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(), ++ stat.nr_demoted); ++ + item = PGSTEAL_KSWAPD + reclaimer_offset(); + if (!cgroup_reclaim(sc)) + __count_vm_events(item, reclaimed); diff --git a/queue-6.12/pmdomain-imx8mp-blk-ctrl-add-missing-loop-break-condition.patch b/queue-6.12/pmdomain-imx8mp-blk-ctrl-add-missing-loop-break-condition.patch new file mode 100644 index 0000000000..306b1af9cd --- /dev/null +++ b/queue-6.12/pmdomain-imx8mp-blk-ctrl-add-missing-loop-break-condition.patch @@ -0,0 +1,66 @@ +From 726efa92e02b460811e8bc6990dd742f03b645ea Mon Sep 17 00:00:00 2001 +From: Xiaolei Wang +Date: Wed, 15 Jan 2025 09:41:18 +0800 +Subject: pmdomain: imx8mp-blk-ctrl: add missing loop break condition + +From: Xiaolei Wang + +commit 726efa92e02b460811e8bc6990dd742f03b645ea upstream. + +Currently imx8mp_blk_ctrl_remove() will continue the for loop +until an out-of-bounds exception occurs. + +pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) +pc : dev_pm_domain_detach+0x8/0x48 +lr : imx8mp_blk_ctrl_shutdown+0x58/0x90 +sp : ffffffc084f8bbf0 +x29: ffffffc084f8bbf0 x28: ffffff80daf32ac0 x27: 0000000000000000 +x26: ffffffc081658d78 x25: 0000000000000001 x24: ffffffc08201b028 +x23: ffffff80d0db9490 x22: ffffffc082340a78 x21: 00000000000005b0 +x20: ffffff80d19bc180 x19: 000000000000000a x18: ffffffffffffffff +x17: ffffffc080a39e08 x16: ffffffc080a39c98 x15: 4f435f464f006c72 +x14: 0000000000000004 x13: ffffff80d0172110 x12: 0000000000000000 +x11: ffffff80d0537740 x10: ffffff80d05376c0 x9 : ffffffc0808ed2d8 +x8 : ffffffc084f8bab0 x7 : 0000000000000000 x6 : 0000000000000000 +x5 : ffffff80d19b9420 x4 : fffffffe03466e60 x3 : 0000000080800077 +x2 : 0000000000000000 x1 : 0000000000000001 x0 : 0000000000000000 +Call trace: + dev_pm_domain_detach+0x8/0x48 + platform_shutdown+0x2c/0x48 + device_shutdown+0x158/0x268 + kernel_restart_prepare+0x40/0x58 + kernel_kexec+0x58/0xe8 + __do_sys_reboot+0x198/0x258 + __arm64_sys_reboot+0x2c/0x40 + invoke_syscall+0x5c/0x138 + el0_svc_common.constprop.0+0x48/0xf0 + do_el0_svc+0x24/0x38 + el0_svc+0x38/0xc8 + el0t_64_sync_handler+0x120/0x130 + el0t_64_sync+0x190/0x198 +Code: 8128c2d0 ffffffc0 aa1e03e9 d503201f + +Fixes: 556f5cf9568a ("soc: imx: add i.MX8MP HSIO blk-ctrl") +Cc: stable@vger.kernel.org +Signed-off-by: Xiaolei Wang +Reviewed-by: Lucas Stach +Reviewed-by: Fabio Estevam +Reviewed-by: Frank Li +Link: https://lore.kernel.org/r/20250115014118.4086729-1-xiaolei.wang@windriver.com +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pmdomain/imx/imx8mp-blk-ctrl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/pmdomain/imx/imx8mp-blk-ctrl.c ++++ b/drivers/pmdomain/imx/imx8mp-blk-ctrl.c +@@ -770,7 +770,7 @@ static void imx8mp_blk_ctrl_remove(struc + + of_genpd_del_provider(pdev->dev.of_node); + +- for (i = 0; bc->onecell_data.num_domains; i++) { ++ for (i = 0; i < bc->onecell_data.num_domains; i++) { + struct imx8mp_blk_ctrl_domain *domain = &bc->domains[i]; + + pm_genpd_remove(&domain->genpd); diff --git a/queue-6.12/revert-drm-amd-display-enable-urgent-latency-adjustments-for-dcn35.patch b/queue-6.12/revert-drm-amd-display-enable-urgent-latency-adjustments-for-dcn35.patch new file mode 100644 index 0000000000..60e819a5c0 --- /dev/null +++ b/queue-6.12/revert-drm-amd-display-enable-urgent-latency-adjustments-for-dcn35.patch @@ -0,0 +1,46 @@ +From 3412860cc4c0c484f53f91b371483e6e4440c3e5 Mon Sep 17 00:00:00 2001 +From: Nicholas Susanto +Date: Thu, 19 Dec 2024 14:15:37 -0500 +Subject: Revert "drm/amd/display: Enable urgent latency adjustments for DCN35" + +From: Nicholas Susanto + +commit 3412860cc4c0c484f53f91b371483e6e4440c3e5 upstream. + +Revert commit 284f141f5ce5 ("drm/amd/display: Enable urgent latency adjustments for DCN35") + +[Why & How] + +Urgent latency increase caused 2.8K OLED monitor caused it to +block this panel support P0. + +Reverting this change does not reintroduce the netflix corruption issue +which it fixed. + +Fixes: 284f141f5ce5 ("drm/amd/display: Enable urgent latency adjustments for DCN35") +Reviewed-by: Charlene Liu +Signed-off-by: Nicholas Susanto +Signed-off-by: Tom Chung +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +(cherry picked from commit c7ccfc0d4241a834c25a9a9e1e78b388b4445d23) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +@@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3 + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, +- .do_urgent_latency_adjustment = 1, ++ .do_urgent_latency_adjustment = 0, + .urgent_latency_adjustment_fabric_clock_component_us = 0, +- .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, ++ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + }; + + void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) diff --git a/queue-6.12/selftests-mm-set-allocated-memory-to-non-zero-content-in-cow-test.patch b/queue-6.12/selftests-mm-set-allocated-memory-to-non-zero-content-in-cow-test.patch new file mode 100644 index 0000000000..b94e32cc6e --- /dev/null +++ b/queue-6.12/selftests-mm-set-allocated-memory-to-non-zero-content-in-cow-test.patch @@ -0,0 +1,82 @@ +From a32bf5bb7933fde6f39747499f8ec232b5b5400f Mon Sep 17 00:00:00 2001 +From: Ryan Roberts +Date: Tue, 7 Jan 2025 14:25:53 +0000 +Subject: selftests/mm: set allocated memory to non-zero content in cow test + +From: Ryan Roberts + +commit a32bf5bb7933fde6f39747499f8ec232b5b5400f upstream. + +After commit b1f202060afe ("mm: remap unused subpages to shared zeropage +when splitting isolated thp"), cow test cases involving swapping out THPs +via madvise(MADV_PAGEOUT) started to be skipped due to the subsequent +check via pagemap determining that the memory was not actually swapped +out. Logs similar to this were emitted: + + ... + + # [RUN] Basic COW after fork() ... with swapped-out, PTE-mapped THP (16 kB) + ok 2 # SKIP MADV_PAGEOUT did not work, is swap enabled? + # [RUN] Basic COW after fork() ... with single PTE of swapped-out THP (16 kB) + ok 3 # SKIP MADV_PAGEOUT did not work, is swap enabled? + # [RUN] Basic COW after fork() ... with swapped-out, PTE-mapped THP (32 kB) + ok 4 # SKIP MADV_PAGEOUT did not work, is swap enabled? + + ... + +The commit in question introduces the behaviour of scanning THPs and if +their content is predominantly zero, it splits them and replaces the pages +which are wholly zero with the zero page. These cow test cases were +getting caught up in this. + +So let's avoid that by filling the contents of all allocated memory with +a non-zero value. With this in place, the tests are passing again. + +Link: https://lkml.kernel.org/r/20250107142555.1870101-1-ryan.roberts@arm.com +Fixes: b1f202060afe ("mm: remap unused subpages to shared zeropage when splitting isolated thp") +Signed-off-by: Ryan Roberts +Acked-by: David Hildenbrand +Cc: Usama Arif +Cc: Yu Zhao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/cow.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/mm/cow.c ++++ b/tools/testing/selftests/mm/cow.c +@@ -758,7 +758,7 @@ static void do_run_with_base_page(test_f + } + + /* Populate a base page. */ +- memset(mem, 0, pagesize); ++ memset(mem, 1, pagesize); + + if (swapout) { + madvise(mem, pagesize, MADV_PAGEOUT); +@@ -824,12 +824,12 @@ static void do_run_with_thp(test_fn fn, + * Try to populate a THP. Touch the first sub-page and test if + * we get the last sub-page populated automatically. + */ +- mem[0] = 0; ++ mem[0] = 1; + if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { + ksft_test_result_skip("Did not get a THP populated\n"); + goto munmap; + } +- memset(mem, 0, thpsize); ++ memset(mem, 1, thpsize); + + size = thpsize; + switch (thp_run) { +@@ -1012,7 +1012,7 @@ static void run_with_hugetlb(test_fn fn, + } + + /* Populate an huge page. */ +- memset(mem, 0, hugetlbsize); ++ memset(mem, 1, hugetlbsize); + + /* + * We need a total of two hugetlb pages to handle COW/unsharing diff --git a/queue-6.12/series b/queue-6.12/series index 6d3e566157..7fd7341558 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -93,3 +93,29 @@ filemap-avoid-truncating-64-bit-offset-to-32-bits.patch fs-proc-fix-softlockup-in-__read_vmcore-part-2.patch gpio-xilinx-convert-gpio_lock-to-raw-spinlock.patch tools-fix-atomic_set-definition-to-set-the-value-correctly.patch +pmdomain-imx8mp-blk-ctrl-add-missing-loop-break-condition.patch +mm-kmemleak-fix-percpu-memory-leak-detection-failure.patch +selftests-mm-set-allocated-memory-to-non-zero-content-in-cow-test.patch +drm-amd-display-do-not-elevate-mem_type-change-to-full-update.patch +mm-clear-uffd-wp-pte-pmd-state-on-mremap.patch +mm-vmscan-pgdemote-vmstat-is-not-getting-updated-when-mglru-is-enabled.patch +tracing-gfp-fix-the-gfp-enum-values-shown-for-user-space-tracing-tools.patch +irqchip-plug-a-of-node-reference-leak-in-platform_irqchip_probe.patch +irqchip-gic-v3-handle-cpu_pm_enter_failed-correctly.patch +irqchip-gic-v3-its-don-t-enable-interrupts-in-its_irq_set_vcpu_affinity.patch +hrtimers-handle-cpu-state-correctly-on-hotplug.patch +timers-migration-fix-another-race-between-hotplug-and-idle-entry-exit.patch +timers-migration-enforce-group-initialization-visibility-to-tree-walkers.patch +x86-fred-fix-the-fred-rsp0-msr-out-of-sync-with-its-per-cpu-cache.patch +drm-i915-fb-relax-clear-color-alignment-to-64-bytes.patch +drm-xe-mark-computecs-read-mode-as-uc-on-igpu.patch +drm-xe-oa-add-missing-visactl-mux-registers.patch +drm-amdgpu-smu13-update-powersave-optimizations.patch +drm-amdgpu-fix-fw-attestation-for-mp0_14_0_-2-3.patch +drm-amdgpu-disable-gfxoff-with-the-compute-workload-on-gfx12.patch +drm-amdgpu-always-sync-the-gfx-pipe-on-ctx-switch.patch +drm-amd-display-fix-psr-su-not-support-but-still-call-the-amdgpu_dm_psr_enable.patch +drm-amd-display-disable-replay-and-psr-while-vrr-is-enabled.patch +drm-amd-display-do-not-wait-for-psr-disable-on-vbl-enable.patch +revert-drm-amd-display-enable-urgent-latency-adjustments-for-dcn35.patch +drm-amd-display-validate-mdoe-under-mst-lct-1-case-as-well.patch diff --git a/queue-6.12/timers-migration-enforce-group-initialization-visibility-to-tree-walkers.patch b/queue-6.12/timers-migration-enforce-group-initialization-visibility-to-tree-walkers.patch new file mode 100644 index 0000000000..647ec1d602 --- /dev/null +++ b/queue-6.12/timers-migration-enforce-group-initialization-visibility-to-tree-walkers.patch @@ -0,0 +1,220 @@ +From de3ced72a79280fefd680e5e101d8b9f03cfa1d7 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Wed, 15 Jan 2025 00:15:05 +0100 +Subject: timers/migration: Enforce group initialization visibility to tree walkers + +From: Frederic Weisbecker + +commit de3ced72a79280fefd680e5e101d8b9f03cfa1d7 upstream. + +Commit 2522c84db513 ("timers/migration: Fix another race between hotplug +and idle entry/exit") fixed yet another race between idle exit and CPU +hotplug up leading to a wrong "0" value migrator assigned to the top +level. However there is yet another situation that remains unhandled: + + [GRP0:0] + migrator = TMIGR_NONE + active = NONE + groupmask = 1 + / \ \ + 0 1 2..7 + idle idle idle + +0) The system is fully idle. + + [GRP0:0] + migrator = CPU 0 + active = CPU 0 + groupmask = 1 + / \ \ + 0 1 2..7 + active idle idle + +1) CPU 0 is activating. It has done the cmpxchg on the top's ->migr_state +but it hasn't yet returned to __walk_groups(). + + [GRP0:0] + migrator = CPU 0 + active = CPU 0, CPU 1 + groupmask = 1 + / \ \ + 0 1 2..7 + active active idle + +2) CPU 1 is activating. CPU 0 stays the migrator (still stuck in +__walk_groups(), delayed by #VMEXIT for example). + + [GRP1:0] + migrator = TMIGR_NONE + active = NONE + groupmask = 1 + / \ + [GRP0:0] [GRP0:1] + migrator = CPU 0 migrator = TMIGR_NONE + active = CPU 0, CPU1 active = NONE + groupmask = 1 groupmask = 2 + / \ \ + 0 1 2..7 8 + active active idle !online + +3) CPU 8 is preparing to boot. CPUHP_TMIGR_PREPARE is being ran by CPU 1 +which has created the GRP0:1 and the new top GRP1:0 connected to GRP0:1 +and GRP0:0. CPU 1 hasn't yet propagated its activation up to GRP1:0. + + [GRP1:0] + migrator = GRP0:0 + active = GRP0:0 + groupmask = 1 + / \ + [GRP0:0] [GRP0:1] + migrator = CPU 0 migrator = TMIGR_NONE + active = CPU 0, CPU1 active = NONE + groupmask = 1 groupmask = 2 + / \ \ + 0 1 2..7 8 + active active idle !online + +4) CPU 0 finally resumed after its #VMEXIT. It's in __walk_groups() +returning from tmigr_cpu_active(). The new top GRP1:0 is visible and +fetched and the pre-initialized groupmask of GRP0:0 is also visible. +As a result tmigr_active_up() is called to GRP1:0 with GRP0:0 as active +and migrator. CPU 0 is returning to __walk_groups() but suffers again +a #VMEXIT. + + [GRP1:0] + migrator = GRP0:0 + active = GRP0:0 + groupmask = 1 + / \ + [GRP0:0] [GRP0:1] + migrator = CPU 0 migrator = TMIGR_NONE + active = CPU 0, CPU1 active = NONE + groupmask = 1 groupmask = 2 + / \ \ + 0 1 2..7 8 + active active idle !online + +5) CPU 1 propagates its activation of GRP0:0 to GRP1:0. This has no + effect since CPU 0 did it already. + + [GRP1:0] + migrator = GRP0:0 + active = GRP0:0, GRP0:1 + groupmask = 1 + / \ + [GRP0:0] [GRP0:1] + migrator = CPU 0 migrator = CPU 8 + active = CPU 0, CPU1 active = CPU 8 + groupmask = 1 groupmask = 2 + / \ \ \ + 0 1 2..7 8 + active active idle active + +6) CPU 1 links CPU 8 to its group. CPU 8 boots and goes through + CPUHP_AP_TMIGR_ONLINE which propagates activation. + + [GRP2:0] + migrator = TMIGR_NONE + active = NONE + groupmask = 1 + / \ + [GRP1:0] [GRP1:1] + migrator = GRP0:0 migrator = TMIGR_NONE + active = GRP0:0, GRP0:1 active = NONE + groupmask = 1 groupmask = 2 + / \ + [GRP0:0] [GRP0:1] [GRP0:2] + migrator = CPU 0 migrator = CPU 8 migrator = TMIGR_NONE + active = CPU 0, CPU1 active = CPU 8 active = NONE + groupmask = 1 groupmask = 2 groupmask = 0 + / \ \ \ + 0 1 2..7 8 64 + active active idle active !online + +7) CPU 64 is booting. CPUHP_TMIGR_PREPARE is being ran by CPU 1 +which has created the GRP1:1, GRP0:2 and the new top GRP2:0 connected to +GRP1:1 and GRP1:0. CPU 1 hasn't yet propagated its activation up to +GRP2:0. + + [GRP2:0] + migrator = 0 (!!!) + active = NONE + groupmask = 1 + / \ + [GRP1:0] [GRP1:1] + migrator = GRP0:0 migrator = TMIGR_NONE + active = GRP0:0, GRP0:1 active = NONE + groupmask = 1 groupmask = 2 + / \ + [GRP0:0] [GRP0:1] [GRP0:2] + migrator = CPU 0 migrator = CPU 8 migrator = TMIGR_NONE + active = CPU 0, CPU1 active = CPU 8 active = NONE + groupmask = 1 groupmask = 2 groupmask = 0 + / \ \ \ + 0 1 2..7 8 64 + active active idle active !online + +8) CPU 0 finally resumed after its #VMEXIT. It's in __walk_groups() +returning from tmigr_cpu_active(). The new top GRP2:0 is visible and +fetched but the pre-initialized groupmask of GRP1:0 is not because no +ordering made its initialization visible. As a result tmigr_active_up() +may be called to GRP2:0 with a "0" child's groumask. Leaving the timers +ignored for ever when the system is fully idle. + +The race is highly theoretical and perhaps impossible in practice but +the groupmask of the child is not the only concern here as the whole +initialization of the child is not guaranteed to be visible to any +tree walker racing against hotplug (idle entry/exit, remote handling, +etc...). Although the current code layout seem to be resilient to such +hazards, this doesn't tell much about the future. + +Fix this with enforcing address dependency between group initialization +and the write/read to the group's parent's pointer. Fortunately that +doesn't involve any barrier addition in the fast paths. + +Fixes: 10a0e6f3d3db ("timers/migration: Move hierarchy setup into cpuhotplug prepare callback") +Signed-off-by: Frederic Weisbecker +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20250114231507.21672-3-frederic@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/timer_migration.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c +index c8a8ea2e5b98..371a62a749aa 100644 +--- a/kernel/time/timer_migration.c ++++ b/kernel/time/timer_migration.c +@@ -534,8 +534,13 @@ static void __walk_groups(up_f up, struct tmigr_walk *data, + break; + + child = group; +- group = group->parent; ++ /* ++ * Pairs with the store release on group connection ++ * to make sure group initialization is visible. ++ */ ++ group = READ_ONCE(group->parent); + data->childmask = child->groupmask; ++ WARN_ON_ONCE(!data->childmask); + } while (group); + } + +@@ -1578,7 +1583,12 @@ static void tmigr_connect_child_parent(struct tmigr_group *child, + child->groupmask = BIT(parent->num_children++); + } + +- child->parent = parent; ++ /* ++ * Make sure parent initialization is visible before publishing it to a ++ * racing CPU entering/exiting idle. This RELEASE barrier enforces an ++ * address dependency that pairs with the READ_ONCE() in __walk_groups(). ++ */ ++ smp_store_release(&child->parent, parent); + + raw_spin_unlock(&parent->lock); + raw_spin_unlock_irq(&child->lock); +-- +2.48.1 + diff --git a/queue-6.12/timers-migration-fix-another-race-between-hotplug-and-idle-entry-exit.patch b/queue-6.12/timers-migration-fix-another-race-between-hotplug-and-idle-entry-exit.patch new file mode 100644 index 0000000000..b0b7994a1b --- /dev/null +++ b/queue-6.12/timers-migration-fix-another-race-between-hotplug-and-idle-entry-exit.patch @@ -0,0 +1,170 @@ +From b729cc1ec21a5899b7879ccfbe1786664928d597 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Wed, 15 Jan 2025 00:15:04 +0100 +Subject: timers/migration: Fix another race between hotplug and idle entry/exit + +From: Frederic Weisbecker + +commit b729cc1ec21a5899b7879ccfbe1786664928d597 upstream. + +Commit 10a0e6f3d3db ("timers/migration: Move hierarchy setup into +cpuhotplug prepare callback") fixed a race between idle exit and CPU +hotplug up leading to a wrong "0" value migrator assigned to the top +level. However there is still a situation that remains unhandled: + + [GRP0:0] + migrator = TMIGR_NONE + active = NONE + groupmask = 0 + / \ \ + 0 1 2..7 + idle idle idle + +0) The system is fully idle. + + [GRP0:0] + migrator = CPU 0 + active = CPU 0 + groupmask = 0 + / \ \ + 0 1 2..7 + active idle idle + +1) CPU 0 is activating. It has done the cmpxchg on the top's ->migr_state +but it hasn't yet returned to __walk_groups(). + + [GRP0:0] + migrator = CPU 0 + active = CPU 0, CPU 1 + groupmask = 0 + / \ \ + 0 1 2..7 + active active idle + +2) CPU 1 is activating. CPU 0 stays the migrator (still stuck in +__walk_groups(), delayed by #VMEXIT for example). + + [GRP1:0] + migrator = TMIGR_NONE + active = NONE + groupmask = 0 + / \ + [GRP0:0] [GRP0:1] + migrator = CPU 0 migrator = TMIGR_NONE + active = CPU 0, CPU1 active = NONE + groupmask = 2 groupmask = 1 + / \ \ + 0 1 2..7 8 + active active idle !online + +3) CPU 8 is preparing to boot. CPUHP_TMIGR_PREPARE is being ran by CPU 1 +which has created the GRP0:1 and the new top GRP1:0 connected to GRP0:1 +and GRP0:0. The groupmask of GRP0:0 is now 2. CPU 1 hasn't yet +propagated its activation up to GRP1:0. + + [GRP1:0] + migrator = 0 (!!!) + active = NONE + groupmask = 0 + / \ + [GRP0:0] [GRP0:1] + migrator = CPU 0 migrator = TMIGR_NONE + active = CPU 0, CPU1 active = NONE + groupmask = 2 groupmask = 1 + / \ \ + 0 1 2..7 8 + active active idle !online + +4) CPU 0 finally resumed after its #VMEXIT. It's in __walk_groups() +returning from tmigr_cpu_active(). The new top GRP1:0 is visible and +fetched but the freshly updated groupmask of GRP0:0 may not be visible +due to lack of ordering! As a result tmigr_active_up() is called to +GRP0:0 with a child's groupmask of "0". This buggy "0" groupmask then +becomes the migrator for GRP1:0 forever. As a result, timers on a fully +idle system get ignored. + +One possible fix would be to define TMIGR_NONE as "0" so that such a +race would have no effect. And after all TMIGR_NONE doesn't need to be +anything else. However this would leave an uncomfortable state machine +where gears happen not to break by chance but are vulnerable to future +modifications. + +Keep TMIGR_NONE as is instead and pre-initialize to "1" the groupmask of +any newly created top level. This groupmask is guaranteed to be visible +upon fetching the corresponding group for the 1st time: + +_ By the upcoming CPU thanks to CPU hotplug synchronization between the + control CPU (BP) and the booting one (AP). + +_ By the control CPU since the groupmask and parent pointers are + initialized locally. + +_ By all CPUs belonging to the same group than the control CPU because + they must wait for it to ever become idle before needing to walk to + the new top. The cmpcxhg() on ->migr_state then makes sure its + groupmask is visible. + +With this pre-initialization, it is guaranteed that if a future top level +is linked to an old one, it is walked through with a valid groupmask. + +Fixes: 10a0e6f3d3db ("timers/migration: Move hierarchy setup into cpuhotplug prepare callback") +Signed-off-by: Frederic Weisbecker +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20250114231507.21672-2-frederic@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/timer_migration.c | 29 ++++++++++++++++++++++++++++- + 1 file changed, 28 insertions(+), 1 deletion(-) + +diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c +index 8d57f7686bb0..c8a8ea2e5b98 100644 +--- a/kernel/time/timer_migration.c ++++ b/kernel/time/timer_migration.c +@@ -1487,6 +1487,21 @@ static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl, + s.seq = 0; + atomic_set(&group->migr_state, s.state); + ++ /* ++ * If this is a new top-level, prepare its groupmask in advance. ++ * This avoids accidents where yet another new top-level is ++ * created in the future and made visible before the current groupmask. ++ */ ++ if (list_empty(&tmigr_level_list[lvl])) { ++ group->groupmask = BIT(0); ++ /* ++ * The previous top level has prepared its groupmask already, ++ * simply account it as the first child. ++ */ ++ if (lvl > 0) ++ group->num_children = 1; ++ } ++ + timerqueue_init_head(&group->events); + timerqueue_init(&group->groupevt.nextevt); + group->groupevt.nextevt.expires = KTIME_MAX; +@@ -1550,8 +1565,20 @@ static void tmigr_connect_child_parent(struct tmigr_group *child, + raw_spin_lock_irq(&child->lock); + raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING); + ++ if (activate) { ++ /* ++ * @child is the old top and @parent the new one. In this ++ * case groupmask is pre-initialized and @child already ++ * accounted, along with its new sibling corresponding to the ++ * CPU going up. ++ */ ++ WARN_ON_ONCE(child->groupmask != BIT(0) || parent->num_children != 2); ++ } else { ++ /* Adding @child for the CPU going up to @parent. */ ++ child->groupmask = BIT(parent->num_children++); ++ } ++ + child->parent = parent; +- child->groupmask = BIT(parent->num_children++); + + raw_spin_unlock(&parent->lock); + raw_spin_unlock_irq(&child->lock); +-- +2.48.1 + diff --git a/queue-6.12/tracing-gfp-fix-the-gfp-enum-values-shown-for-user-space-tracing-tools.patch b/queue-6.12/tracing-gfp-fix-the-gfp-enum-values-shown-for-user-space-tracing-tools.patch new file mode 100644 index 0000000000..18ecbbdc75 --- /dev/null +++ b/queue-6.12/tracing-gfp-fix-the-gfp-enum-values-shown-for-user-space-tracing-tools.patch @@ -0,0 +1,152 @@ +From 60295b944ff6805e677c48ae4178532b207d43be Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Thu, 16 Jan 2025 16:41:24 -0500 +Subject: tracing: gfp: Fix the GFP enum values shown for user space tracing tools + +From: Steven Rostedt + +commit 60295b944ff6805e677c48ae4178532b207d43be upstream. + +Tracing tools like perf and trace-cmd read the /sys/kernel/tracing/events/*/*/format +files to know how to parse the data and also how to print it. For the +"print fmt" portion of that file, if anything uses an enum that is not +exported to the tracing system, user space will not be able to parse it. + +The GFP flags use to be defines, and defines get translated in the print +fmt sections. But now they are converted to use enums, which is not. + +The mm_page_alloc trace event format use to have: + + print fmt: "page=%p pfn=0x%lx order=%d migratetype=%d gfp_flags=%s", + REC->pfn != -1UL ? (((struct page *)vmemmap_base) + (REC->pfn)) : ((void + *)0), REC->pfn != -1UL ? REC->pfn : 0, REC->order, REC->migratetype, + (REC->gfp_flags) ? __print_flags(REC->gfp_flags, "|", {( unsigned + long)(((((((( gfp_t)(0x400u|0x800u)) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | + (( gfp_t)0x100000u)) | (( gfp_t)0x02u)) | (( gfp_t)0x08u) | (( gfp_t)0)) | + (( gfp_t)0x40000u) | (( gfp_t)0x80000u) | (( gfp_t)0x2000u)) & ~(( + gfp_t)(0x400u|0x800u))) | (( gfp_t)0x400u)), "GFP_TRANSHUGE"}, {( unsigned + long)((((((( gfp_t)(0x400u|0x800u)) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | + (( gfp_t)0x100000u)) | (( gfp_t)0x02u)) | (( gfp_t)0x08u) | (( gfp_t)0)) ... + +Where the GFP values are shown and not their names. But after the GFP +flags were converted to use enums, it has: + + print fmt: "page=%p pfn=0x%lx order=%d migratetype=%d gfp_flags=%s", + REC->pfn != -1UL ? (vmemmap + (REC->pfn)) : ((void *)0), REC->pfn != -1UL + ? REC->pfn : 0, REC->order, REC->migratetype, (REC->gfp_flags) ? + __print_flags(REC->gfp_flags, "|", {( unsigned long)(((((((( + gfp_t)(((((1UL))) << (___GFP_DIRECT_RECLAIM_BIT))|((((1UL))) << + (___GFP_KSWAPD_RECLAIM_BIT)))) | (( gfp_t)((((1UL))) << (___GFP_IO_BIT))) + | (( gfp_t)((((1UL))) << (___GFP_FS_BIT))) | (( gfp_t)((((1UL))) << + (___GFP_HARDWALL_BIT)))) | (( gfp_t)((((1UL))) << (___GFP_HIGHMEM_BIT)))) + | (( gfp_t)((((1UL))) << (___GFP_MOVABLE_BIT))) | (( gfp_t)0)) | (( + gfp_t)((((1UL))) << (___GFP_COMP_BIT))) ... + +Where the enums names like ___GFP_KSWAPD_RECLAIM_BIT are shown and not their +values. User space has no way to convert these names to their values and +the output will fail to parse. What is shown is now: + + mm_page_alloc: page=0xffffffff981685f3 pfn=0x1d1ac1 order=0 migratetype=1 gfp_flags=0x140cca + +The TRACE_DEFINE_ENUM() macro was created to handle enums in the print fmt +files. This causes them to be replaced at boot up with the numbers, so +that user space tooling can parse it. By using this macro, the output is +back to the human readable: + + mm_page_alloc: page=0xffffffff981685f3 pfn=0x122233 order=0 migratetype=1 gfp_flags=GFP_HIGHUSER_MOVABLE|__GFP_COMP + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Veronika Molnarova +Cc: Suren Baghdasaryan +Cc: Linus Torvalds +Link: https://lore.kernel.org/20250116214438.749504792@goodmis.org +Reported-by: Michael Petlan +Closes: https://lore.kernel.org/all/87be5f7c-1a0-dad-daa0-54e342efaea7@redhat.com/ +Fixes: 772dd0342727c ("mm: enumerate all gfp flags") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + include/trace/events/mmflags.h | 63 ++++++++++++++++++++++++++++++++++ + 1 file changed, 63 insertions(+) + +diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h +index bb8a59c6caa2..d36c857dd249 100644 +--- a/include/trace/events/mmflags.h ++++ b/include/trace/events/mmflags.h +@@ -13,6 +13,69 @@ + * Thus most bits set go first. + */ + ++/* These define the values that are enums (the bits) */ ++#define TRACE_GFP_FLAGS_GENERAL \ ++ TRACE_GFP_EM(DMA) \ ++ TRACE_GFP_EM(HIGHMEM) \ ++ TRACE_GFP_EM(DMA32) \ ++ TRACE_GFP_EM(MOVABLE) \ ++ TRACE_GFP_EM(RECLAIMABLE) \ ++ TRACE_GFP_EM(HIGH) \ ++ TRACE_GFP_EM(IO) \ ++ TRACE_GFP_EM(FS) \ ++ TRACE_GFP_EM(ZERO) \ ++ TRACE_GFP_EM(DIRECT_RECLAIM) \ ++ TRACE_GFP_EM(KSWAPD_RECLAIM) \ ++ TRACE_GFP_EM(WRITE) \ ++ TRACE_GFP_EM(NOWARN) \ ++ TRACE_GFP_EM(RETRY_MAYFAIL) \ ++ TRACE_GFP_EM(NOFAIL) \ ++ TRACE_GFP_EM(NORETRY) \ ++ TRACE_GFP_EM(MEMALLOC) \ ++ TRACE_GFP_EM(COMP) \ ++ TRACE_GFP_EM(NOMEMALLOC) \ ++ TRACE_GFP_EM(HARDWALL) \ ++ TRACE_GFP_EM(THISNODE) \ ++ TRACE_GFP_EM(ACCOUNT) \ ++ TRACE_GFP_EM(ZEROTAGS) ++ ++#ifdef CONFIG_KASAN_HW_TAGS ++# define TRACE_GFP_FLAGS_KASAN \ ++ TRACE_GFP_EM(SKIP_ZERO) \ ++ TRACE_GFP_EM(SKIP_KASAN) ++#else ++# define TRACE_GFP_FLAGS_KASAN ++#endif ++ ++#ifdef CONFIG_LOCKDEP ++# define TRACE_GFP_FLAGS_LOCKDEP \ ++ TRACE_GFP_EM(NOLOCKDEP) ++#else ++# define TRACE_GFP_FLAGS_LOCKDEP ++#endif ++ ++#ifdef CONFIG_SLAB_OBJ_EXT ++# define TRACE_GFP_FLAGS_SLAB \ ++ TRACE_GFP_EM(NO_OBJ_EXT) ++#else ++# define TRACE_GFP_FLAGS_SLAB ++#endif ++ ++#define TRACE_GFP_FLAGS \ ++ TRACE_GFP_FLAGS_GENERAL \ ++ TRACE_GFP_FLAGS_KASAN \ ++ TRACE_GFP_FLAGS_LOCKDEP \ ++ TRACE_GFP_FLAGS_SLAB ++ ++#undef TRACE_GFP_EM ++#define TRACE_GFP_EM(a) TRACE_DEFINE_ENUM(___GFP_##a##_BIT); ++ ++TRACE_GFP_FLAGS ++ ++/* Just in case these are ever used */ ++TRACE_DEFINE_ENUM(___GFP_UNUSED_BIT); ++TRACE_DEFINE_ENUM(___GFP_LAST_BIT); ++ + #define gfpflag_string(flag) {(__force unsigned long)flag, #flag} + + #define __def_gfpflag_names \ +-- +2.48.1 + diff --git a/queue-6.12/x86-fred-fix-the-fred-rsp0-msr-out-of-sync-with-its-per-cpu-cache.patch b/queue-6.12/x86-fred-fix-the-fred-rsp0-msr-out-of-sync-with-its-per-cpu-cache.patch new file mode 100644 index 0000000000..e56e9a19cb --- /dev/null +++ b/queue-6.12/x86-fred-fix-the-fred-rsp0-msr-out-of-sync-with-its-per-cpu-cache.patch @@ -0,0 +1,65 @@ +From de31b3cd706347044e1a57d68c3a683d58e8cca4 Mon Sep 17 00:00:00 2001 +From: "Xin Li (Intel)" +Date: Fri, 10 Jan 2025 09:46:39 -0800 +Subject: x86/fred: Fix the FRED RSP0 MSR out of sync with its per-CPU cache + +From: Xin Li (Intel) + +commit de31b3cd706347044e1a57d68c3a683d58e8cca4 upstream. + +The FRED RSP0 MSR is only used for delivering events when running +userspace. Linux leverages this property to reduce expensive MSR +writes and optimize context switches. The kernel only writes the +MSR when about to run userspace *and* when the MSR has actually +changed since the last time userspace ran. + +This optimization is implemented by maintaining a per-CPU cache of +FRED RSP0 and then checking that against the value for the top of +current task stack before running userspace. + +However cpu_init_fred_exceptions() writes the MSR without updating +the per-CPU cache. This means that the kernel might return to +userspace with MSR_IA32_FRED_RSP0==0 when it needed to point to the +top of current task stack. This would induce a double fault (#DF), +which is bad. + +A context switch after cpu_init_fred_exceptions() can paper over +the issue since it updates the cached value. That evidently +happens most of the time explaining how this bug got through. + +Fix the bug through resynchronizing the FRED RSP0 MSR with its +per-CPU cache in cpu_init_fred_exceptions(). + +Fixes: fe85ee391966 ("x86/entry: Set FRED RSP0 on return to userspace instead of context switch") +Signed-off-by: Xin Li (Intel) +Signed-off-by: Dave Hansen +Acked-by: Dave Hansen +Cc:stable@vger.kernel.org +Link: https://lore.kernel.org/all/20250110174639.1250829-1-xin%40zytor.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fred.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c +index 8d32c3f48abc..5e2cd1004980 100644 +--- a/arch/x86/kernel/fred.c ++++ b/arch/x86/kernel/fred.c +@@ -50,7 +50,13 @@ void cpu_init_fred_exceptions(void) + FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user)); + + wrmsrl(MSR_IA32_FRED_STKLVLS, 0); +- wrmsrl(MSR_IA32_FRED_RSP0, 0); ++ ++ /* ++ * Ater a CPU offline/online cycle, the FRED RSP0 MSR should be ++ * resynchronized with its per-CPU cache. ++ */ ++ wrmsrl(MSR_IA32_FRED_RSP0, __this_cpu_read(fred_rsp0)); ++ + wrmsrl(MSR_IA32_FRED_RSP1, 0); + wrmsrl(MSR_IA32_FRED_RSP2, 0); + wrmsrl(MSR_IA32_FRED_RSP3, 0); +-- +2.48.1 +