--- /dev/null
+From stable+bounces-204371-greg=kroah.com@vger.kernel.org Wed Dec 31 17:19:08 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Dec 2025 11:18:56 -0500
+Subject: drm/displayid: add quirk to ignore DisplayID checksum errors
+To: stable@vger.kernel.org
+Cc: "Jani Nikula" <jani.nikula@intel.com>, "Tiago Martins Araújo" <tiago.martins.araujo@gmail.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251231161856.3237284-3-sashal@kernel.org>
+
+From: Jani Nikula <jani.nikula@intel.com>
+
+[ Upstream commit 83cbb4d33dc22b0ca1a4e85c6e892c9b729e28d4 ]
+
+Add a mechanism for DisplayID specific quirks, and add the first quirk
+to ignore DisplayID section checksum errors.
+
+It would be quite inconvenient to pass existing EDID quirks from
+drm_edid.c for DisplayID parsing. Not all places doing DisplayID
+iteration have the quirks readily available, and would have to pass it
+in all places. Simply add a separate array of DisplayID specific EDID
+quirks. We do end up checking it every time we iterate DisplayID blocks,
+but hopefully the number of quirks remains small.
+
+There are a few laptop models with DisplayID checksum failures, leading
+to higher refresh rates only present in the DisplayID blocks being
+ignored. Add a quirk for the panel in the machines.
+
+Reported-by: Tiago Martins Araújo <tiago.martins.araujo@gmail.com>
+Closes: https://lore.kernel.org/r/CACRbrPGvLP5LANXuFi6z0S7XMbAG4X5y2YOLBDxfOVtfGGqiKQ@mail.gmail.com
+Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14703
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Tested-by: Tiago Martins Araújo <tiago.martins.araujo@gmail.com>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/c04d81ae648c5f21b3f5b7953f924718051f2798.1761681968.git.jani.nikula@intel.com
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/drm_displayid.c | 41 +++++++++++++++++++++++++++----
+ drivers/gpu/drm/drm_displayid_internal.h | 2 +
+ 2 files changed, 39 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/drm_displayid.c
++++ b/drivers/gpu/drm/drm_displayid.c
+@@ -9,6 +9,34 @@
+ #include "drm_crtc_internal.h"
+ #include "drm_displayid_internal.h"
+
++enum {
++ QUIRK_IGNORE_CHECKSUM,
++};
++
++struct displayid_quirk {
++ const struct drm_edid_ident ident;
++ u8 quirks;
++};
++
++static const struct displayid_quirk quirks[] = {
++ {
++ .ident = DRM_EDID_IDENT_INIT('C', 'S', 'O', 5142, "MNE007ZA1-5"),
++ .quirks = BIT(QUIRK_IGNORE_CHECKSUM),
++ },
++};
++
++static u8 get_quirks(const struct drm_edid *drm_edid)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(quirks); i++) {
++ if (drm_edid_match(drm_edid, &quirks[i].ident))
++ return quirks[i].quirks;
++ }
++
++ return 0;
++}
++
+ static const struct displayid_header *
+ displayid_get_header(const u8 *displayid, int length, int index)
+ {
+@@ -23,7 +51,7 @@ displayid_get_header(const u8 *displayid
+ }
+
+ static const struct displayid_header *
+-validate_displayid(const u8 *displayid, int length, int idx)
++validate_displayid(const u8 *displayid, int length, int idx, bool ignore_checksum)
+ {
+ int i, dispid_length;
+ u8 csum = 0;
+@@ -41,8 +69,11 @@ validate_displayid(const u8 *displayid,
+ for (i = 0; i < dispid_length; i++)
+ csum += displayid[idx + i];
+ if (csum) {
+- DRM_NOTE("DisplayID checksum invalid, remainder is %d\n", csum);
+- return ERR_PTR(-EINVAL);
++ DRM_NOTE("DisplayID checksum invalid, remainder is %d%s\n", csum,
++ ignore_checksum ? " (ignoring)" : "");
++
++ if (!ignore_checksum)
++ return ERR_PTR(-EINVAL);
+ }
+
+ return base;
+@@ -52,6 +83,7 @@ static const u8 *find_next_displayid_ext
+ {
+ const struct displayid_header *base;
+ const u8 *displayid;
++ bool ignore_checksum = iter->quirks & BIT(QUIRK_IGNORE_CHECKSUM);
+
+ displayid = drm_edid_find_extension(iter->drm_edid, DISPLAYID_EXT, &iter->ext_index);
+ if (!displayid)
+@@ -61,7 +93,7 @@ static const u8 *find_next_displayid_ext
+ iter->length = EDID_LENGTH - 1;
+ iter->idx = 1;
+
+- base = validate_displayid(displayid, iter->length, iter->idx);
++ base = validate_displayid(displayid, iter->length, iter->idx, ignore_checksum);
+ if (IS_ERR(base))
+ return NULL;
+
+@@ -76,6 +108,7 @@ void displayid_iter_edid_begin(const str
+ memset(iter, 0, sizeof(*iter));
+
+ iter->drm_edid = drm_edid;
++ iter->quirks = get_quirks(drm_edid);
+ }
+
+ static const struct displayid_block *
+--- a/drivers/gpu/drm/drm_displayid_internal.h
++++ b/drivers/gpu/drm/drm_displayid_internal.h
+@@ -167,6 +167,8 @@ struct displayid_iter {
+
+ u8 version;
+ u8 primary_use;
++
++ u8 quirks;
+ };
+
+ void displayid_iter_edid_begin(const struct drm_edid *drm_edid,
--- /dev/null
+From stable+bounces-204370-greg=kroah.com@vger.kernel.org Wed Dec 31 17:19:04 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Dec 2025 11:18:55 -0500
+Subject: drm/edid: add DRM_EDID_IDENT_INIT() to initialize struct drm_edid_ident
+To: stable@vger.kernel.org
+Cc: "Jani Nikula" <jani.nikula@intel.com>, "Tiago Martins Araújo" <tiago.martins.araujo@gmail.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251231161856.3237284-2-sashal@kernel.org>
+
+From: Jani Nikula <jani.nikula@intel.com>
+
+[ Upstream commit 8b61583f993589a64c061aa91b44f5bd350d90a5 ]
+
+Add a convenience helper for initializing struct drm_edid_ident.
+
+Cc: Tiago Martins Araújo <tiago.martins.araujo@gmail.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Tested-by: Tiago Martins Araújo <tiago.martins.araujo@gmail.com>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/710b2ac6a211606ec1f90afa57b79e8c7375a27e.1761681968.git.jani.nikula@intel.com
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Stable-dep-of: 83cbb4d33dc2 ("drm/displayid: add quirk to ignore DisplayID checksum errors")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/drm/drm_edid.h | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/include/drm/drm_edid.h
++++ b/include/drm/drm_edid.h
+@@ -340,6 +340,12 @@ struct drm_edid_ident {
+ const char *name;
+ };
+
++#define DRM_EDID_IDENT_INIT(_vend_chr_0, _vend_chr_1, _vend_chr_2, _product_id, _name) \
++{ \
++ .panel_id = drm_edid_encode_panel_id(_vend_chr_0, _vend_chr_1, _vend_chr_2, _product_id), \
++ .name = _name, \
++}
++
+ #define EDID_PRODUCT_ID(e) ((e)->prod_code[0] | ((e)->prod_code[1] << 8))
+
+ /* Short Audio Descriptor */
--- /dev/null
+From ba1b40ed0e34bab597fd90d4c4e9f7397f878c8f Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@kernel.org>
+Date: Tue, 28 Oct 2025 12:00:52 +0100
+Subject: drm: nova: depend on CONFIG_64BIT
+
+From: Danilo Krummrich <dakr@kernel.org>
+
+commit ba1b40ed0e34bab597fd90d4c4e9f7397f878c8f upstream.
+
+nova-core already depends on CONFIG_64BIT, hence also depend on
+CONFIG_64BIT for nova-drm.
+
+Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
+Reviewed-by: John Hubbard <jhubbard@nvidia.com>
+Link: https://patch.msgid.link/20251028110058.340320-1-dakr@kernel.org
+Signed-off-by: Danilo Krummrich <dakr@kernel.org>
+Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/nova/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/nova/Kconfig
++++ b/drivers/gpu/drm/nova/Kconfig
+@@ -1,5 +1,6 @@
+ config DRM_NOVA
+ tristate "Nova DRM driver"
++ depends on 64BIT
+ depends on DRM=y
+ depends on PCI
+ depends on RUST
--- /dev/null
+From stable+bounces-204314-greg=kroah.com@vger.kernel.org Wed Dec 31 04:16:32 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 Dec 2025 22:16:26 -0500
+Subject: KVM: s390: Fix gmap_helper_zap_one_page() again
+To: stable@vger.kernel.org
+Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>, Marc Hartmayer <mhartmay@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Heiko Carstens <hca@linux.ibm.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251231031626.2684565-1-sashal@kernel.org>
+
+From: Claudio Imbrenda <imbrenda@linux.ibm.com>
+
+[ Upstream commit 2f393c228cc519ddf19b8c6c05bf15723241aa96 ]
+
+A few checks were missing in gmap_helper_zap_one_page(), which can lead
+to memory corruption in the guest under specific circumstances.
+
+Add the missing checks.
+
+Fixes: 5deafa27d9ae ("KVM: s390: Fix to clear PTE when discarding a swapped page")
+Cc: stable@vger.kernel.org
+Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
+Tested-by: Marc Hartmayer <mhartmay@linux.ibm.com>
+Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+[ adapted ptep_zap_softleaf_entry() and softleaf_from_pte() calls to ptep_zap_swap_entry() and pte_to_swp_entry() ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/mm/gmap_helpers.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/mm/gmap_helpers.c
++++ b/arch/s390/mm/gmap_helpers.c
+@@ -47,6 +47,7 @@ static void ptep_zap_swap_entry(struct m
+ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
+ {
+ struct vm_area_struct *vma;
++ unsigned long pgstev;
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pte_t *ptep;
+@@ -65,9 +66,13 @@ void gmap_helper_zap_one_page(struct mm_
+ if (pte_swap(*ptep)) {
+ preempt_disable();
+ pgste = pgste_get_lock(ptep);
++ pgstev = pgste_val(pgste);
+
+- ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+- pte_clear(mm, vmaddr, ptep);
++ if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
++ (pgstev & _PGSTE_GPS_ZERO)) {
++ ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
++ pte_clear(mm, vmaddr, ptep);
++ }
+
+ pgste_set_unlock(ptep, pgste);
+ preempt_enable();
--- /dev/null
+From stable+bounces-204176-greg=kroah.com@vger.kernel.org Tue Dec 30 03:48:38 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Dec 2025 21:48:31 -0500
+Subject: mm/huge_memory: merge uniform_split_supported() and non_uniform_split_supported()
+To: stable@vger.kernel.org
+Cc: Wei Yang <richard.weiyang@gmail.com>, Zi Yan <ziy@nvidia.com>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Baolin Wang <baolin.wang@linux.alibaba.com>, Barry Song <baohua@kernel.org>, Dev Jain <dev.jain@arm.com>, Lance Yang <lance.yang@linux.dev>, Liam Howlett <liam.howlett@oracle.com>, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, Nico Pache <npache@redhat.com>, Ryan Roberts <ryan.roberts@arm.com>, Andrew Morton <akpm@linux-foundation.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251230024831.1972219-1-sashal@kernel.org>
+
+From: Wei Yang <richard.weiyang@gmail.com>
+
+[ Upstream commit 8a0e4bdddd1c998b894d879a1d22f1e745606215 ]
+
+uniform_split_supported() and non_uniform_split_supported() share
+significantly similar logic.
+
+The only functional difference is that uniform_split_supported() includes
+an additional check on the requested @new_order.
+
+The reason for this check comes from the following two aspects:
+
+ * some file system or swap cache just supports order-0 folio
+ * the behavioral difference between uniform/non-uniform split
+
+The behavioral difference between uniform split and non-uniform:
+
+ * uniform split splits folio directly to @new_order
+ * non-uniform split creates after-split folios with orders from
+ folio_order(folio) - 1 to new_order.
+
+This means for non-uniform split or !new_order split we should check the
+file system and swap cache respectively.
+
+This commit unifies the logic and merge the two functions into a single
+combined helper, removing redundant code and simplifying the split
+support checking mechanism.
+
+Link: https://lkml.kernel.org/r/20251106034155.21398-3-richard.weiyang@gmail.com
+Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages")
+Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
+Reviewed-by: Zi Yan <ziy@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: "David Hildenbrand (Red Hat)" <david@kernel.org>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Lance Yang <lance.yang@linux.dev>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Nico Pache <npache@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[ split_type => uniform_split and replaced SPLIT_TYPE_NON_UNIFORM checks ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/huge_mm.h | 8 ++---
+ mm/huge_memory.c | 71 ++++++++++++++++++++----------------------------
+ 2 files changed, 33 insertions(+), 46 deletions(-)
+
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -369,10 +369,8 @@ int split_huge_page_to_list_to_order(str
+ unsigned int new_order);
+ int min_order_for_split(struct folio *folio);
+ int split_folio_to_list(struct folio *folio, struct list_head *list);
+-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
+- bool warns);
+-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
+- bool warns);
++bool folio_split_supported(struct folio *folio, unsigned int new_order,
++ bool uniform_split, bool warns);
+ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
+ struct list_head *list);
+ /*
+@@ -392,7 +390,7 @@ int folio_split(struct folio *folio, uns
+ static inline int try_folio_split_to_order(struct folio *folio,
+ struct page *page, unsigned int new_order)
+ {
+- if (!non_uniform_split_supported(folio, new_order, /* warns= */ false))
++ if (!folio_split_supported(folio, new_order, false, /* warns= */ false))
+ return split_huge_page_to_list_to_order(&folio->page, NULL,
+ new_order);
+ return folio_split(folio, new_order, page, NULL);
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -3515,8 +3515,8 @@ static int __split_unmapped_folio(struct
+ return ret;
+ }
+
+-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
+- bool warns)
++bool folio_split_supported(struct folio *folio, unsigned int new_order,
++ bool uniform_split, bool warns)
+ {
+ if (folio_test_anon(folio)) {
+ /* order-1 is not supported for anonymous THP. */
+@@ -3524,48 +3524,41 @@ bool non_uniform_split_supported(struct
+ "Cannot split to order-1 folio");
+ if (new_order == 1)
+ return false;
+- } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+- !mapping_large_folio_support(folio->mapping)) {
+- /*
+- * No split if the file system does not support large folio.
+- * Note that we might still have THPs in such mappings due to
+- * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
+- * does not actually support large folios properly.
+- */
+- VM_WARN_ONCE(warns,
+- "Cannot split file folio to non-0 order");
+- return false;
+- }
+-
+- /* Only swapping a whole PMD-mapped folio is supported */
+- if (folio_test_swapcache(folio)) {
+- VM_WARN_ONCE(warns,
+- "Cannot split swapcache folio to non-0 order");
+- return false;
+- }
+-
+- return true;
+-}
+-
+-/* See comments in non_uniform_split_supported() */
+-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
+- bool warns)
+-{
+- if (folio_test_anon(folio)) {
+- VM_WARN_ONCE(warns && new_order == 1,
+- "Cannot split to order-1 folio");
+- if (new_order == 1)
+- return false;
+- } else if (new_order) {
++ } else if (!uniform_split || new_order) {
+ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ !mapping_large_folio_support(folio->mapping)) {
++ /*
++ * We can always split a folio down to a single page
++ * (new_order == 0) uniformly.
++ *
++ * For any other scenario
++ * a) uniform split targeting a large folio
++ * (new_order > 0)
++ * b) any non-uniform split
++ * we must confirm that the file system supports large
++ * folios.
++ *
++ * Note that we might still have THPs in such
++ * mappings, which is created from khugepaged when
++ * CONFIG_READ_ONLY_THP_FOR_FS is enabled. But in that
++ * case, the mapping does not actually support large
++ * folios properly.
++ */
+ VM_WARN_ONCE(warns,
+ "Cannot split file folio to non-0 order");
+ return false;
+ }
+ }
+
+- if (new_order && folio_test_swapcache(folio)) {
++ /*
++ * swapcache folio could only be split to order 0
++ *
++ * non-uniform split creates after-split folios with orders from
++ * folio_order(folio) - 1 to new_order, making it not suitable for any
++ * swapcache folio split. Only uniform split to order-0 can be used
++ * here.
++ */
++ if ((!uniform_split || new_order) && folio_test_swapcache(folio)) {
+ VM_WARN_ONCE(warns,
+ "Cannot split swapcache folio to non-0 order");
+ return false;
+@@ -3632,11 +3625,7 @@ static int __folio_split(struct folio *f
+ if (new_order >= folio_order(folio))
+ return -EINVAL;
+
+- if (uniform_split && !uniform_split_supported(folio, new_order, true))
+- return -EINVAL;
+-
+- if (!uniform_split &&
+- !non_uniform_split_supported(folio, new_order, true))
++ if (!folio_split_supported(folio, new_order, uniform_split, /* warn = */ true))
+ return -EINVAL;
+
+ is_hzp = is_huge_zero_folio(folio);
--- /dev/null
+From stable+bounces-204125-greg=kroah.com@vger.kernel.org Mon Dec 29 20:35:44 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Dec 2025 14:35:37 -0500
+Subject: sched/core: Add comment explaining force-idle vruntime snapshots
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251229193539.1640748-1-sashal@kernel.org>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 9359d9785d85bb53f1ff1738a59aeeec4b878906 ]
+
+I always end up having to re-read these emails every time I look at
+this code. And a future patch is going to change this story a little.
+This means it is past time to stick them in a comment so it can be
+modified and stay current.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20200506143506.GH5298@hirez.programming.kicks-ass.net
+Link: https://lkml.kernel.org/r/20200515103844.GG2978@hirez.programming.kicks-ass.net
+Link: https://patch.msgid.link/20251106111603.GB4068168@noisy.programming.kicks-ass.net
+Stable-dep-of: 79f3f9bedd14 ("sched/eevdf: Fix min_vruntime vs avg_vruntime")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/fair.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 181 insertions(+)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -13014,6 +13014,187 @@ static inline void task_tick_core(struct
+ }
+
+ /*
++ * Consider any infeasible weight scenario. Take for instance two tasks,
++ * each bound to their respective sibling, one with weight 1 and one with
++ * weight 2. Then the lower weight task will run ahead of the higher weight
++ * task without bound.
++ *
++ * This utterly destroys the concept of a shared time base.
++ *
++ * Remember; all this is about a proportionally fair scheduling, where each
++ * tasks receives:
++ *
++ * w_i
++ * dt_i = ---------- dt (1)
++ * \Sum_j w_j
++ *
++ * which we do by tracking a virtual time, s_i:
++ *
++ * 1
++ * s_i = --- d[t]_i (2)
++ * w_i
++ *
++ * Where d[t] is a delta of discrete time, while dt is an infinitesimal.
++ * The immediate corollary is that the ideal schedule S, where (2) to use
++ * an infinitesimal delta, is:
++ *
++ * 1
++ * S = ---------- dt (3)
++ * \Sum_i w_i
++ *
++ * From which we can define the lag, or deviation from the ideal, as:
++ *
++ * lag(i) = S - s_i (4)
++ *
++ * And since the one and only purpose is to approximate S, we get that:
++ *
++ * \Sum_i w_i lag(i) := 0 (5)
++ *
++ * If this were not so, we no longer converge to S, and we can no longer
++ * claim our scheduler has any of the properties we derive from S. This is
++ * exactly what you did above, you broke it!
++ *
++ *
++ * Let's continue for a while though; to see if there is anything useful to
++ * be learned. We can combine (1)-(3) or (4)-(5) and express S in s_i:
++ *
++ * \Sum_i w_i s_i
++ * S = -------------- (6)
++ * \Sum_i w_i
++ *
++ * Which gives us a way to compute S, given our s_i. Now, if you've read
++ * our code, you know that we do not in fact do this, the reason for this
++ * is two-fold. Firstly, computing S in that way requires a 64bit division
++ * for every time we'd use it (see 12), and secondly, this only describes
++ * the steady-state, it doesn't handle dynamics.
++ *
++ * Anyway, in (6): s_i -> x + (s_i - x), to get:
++ *
++ * \Sum_i w_i (s_i - x)
++ * S - x = -------------------- (7)
++ * \Sum_i w_i
++ *
++ * Which shows that S and s_i transform alike (which makes perfect sense
++ * given that S is basically the (weighted) average of s_i).
++ *
++ * Then:
++ *
++ * x -> s_min := min{s_i} (8)
++ *
++ * to obtain:
++ *
++ * \Sum_i w_i (s_i - s_min)
++ * S = s_min + ------------------------ (9)
++ * \Sum_i w_i
++ *
++ * Which already looks familiar, and is the basis for our current
++ * approximation:
++ *
++ * S ~= s_min (10)
++ *
++ * Now, obviously, (10) is absolute crap :-), but it sorta works.
++ *
++ * So the thing to remember is that the above is strictly UP. It is
++ * possible to generalize to multiple runqueues -- however it gets really
++ * yuck when you have to add affinity support, as illustrated by our very
++ * first counter-example.
++ *
++ * Luckily I think we can avoid needing a full multi-queue variant for
++ * core-scheduling (or load-balancing). The crucial observation is that we
++ * only actually need this comparison in the presence of forced-idle; only
++ * then do we need to tell if the stalled rq has higher priority over the
++ * other.
++ *
++ * [XXX assumes SMT2; better consider the more general case, I suspect
++ * it'll work out because our comparison is always between 2 rqs and the
++ * answer is only interesting if one of them is forced-idle]
++ *
++ * And (under assumption of SMT2) when there is forced-idle, there is only
++ * a single queue, so everything works like normal.
++ *
++ * Let, for our runqueue 'k':
++ *
++ * T_k = \Sum_i w_i s_i
++ * W_k = \Sum_i w_i ; for all i of k (11)
++ *
++ * Then we can write (6) like:
++ *
++ * T_k
++ * S_k = --- (12)
++ * W_k
++ *
++ * From which immediately follows that:
++ *
++ * T_k + T_l
++ * S_k+l = --------- (13)
++ * W_k + W_l
++ *
++ * On which we can define a combined lag:
++ *
++ * lag_k+l(i) := S_k+l - s_i (14)
++ *
++ * And that gives us the tools to compare tasks across a combined runqueue.
++ *
++ *
++ * Combined this gives the following:
++ *
++ * a) when a runqueue enters force-idle, sync it against it's sibling rq(s)
++ * using (7); this only requires storing single 'time'-stamps.
++ *
++ * b) when comparing tasks between 2 runqueues of which one is forced-idle,
++ * compare the combined lag, per (14).
++ *
++ * Now, of course cgroups (I so hate them) make this more interesting in
++ * that a) seems to suggest we need to iterate all cgroup on a CPU at such
++ * boundaries, but I think we can avoid that. The force-idle is for the
++ * whole CPU, all it's rqs. So we can mark it in the root and lazily
++ * propagate downward on demand.
++ */
++
++/*
++ * So this sync is basically a relative reset of S to 0.
++ *
++ * So with 2 queues, when one goes idle, we drop them both to 0 and one
++ * then increases due to not being idle, and the idle one builds up lag to
++ * get re-elected. So far so simple, right?
++ *
++ * When there's 3, we can have the situation where 2 run and one is idle,
++ * we sync to 0 and let the idle one build up lag to get re-election. Now
++ * suppose another one also drops idle. At this point dropping all to 0
++ * again would destroy the built-up lag from the queue that was already
++ * idle, not good.
++ *
++ * So instead of syncing everything, we can:
++ *
++ * less := !((s64)(s_a - s_b) <= 0)
++ *
++ * (v_a - S_a) - (v_b - S_b) == v_a - v_b - S_a + S_b
++ * == v_a - (v_b - S_a + S_b)
++ *
++ * IOW, we can recast the (lag) comparison to a one-sided difference.
++ * So if then, instead of syncing the whole queue, sync the idle queue
++ * against the active queue with S_a + S_b at the point where we sync.
++ *
++ * (XXX consider the implication of living in a cyclic group: N / 2^n N)
++ *
++ * This gives us means of syncing single queues against the active queue,
++ * and for already idle queues to preserve their build-up lag.
++ *
++ * Of course, then we get the situation where there's 2 active and one
++ * going idle, who do we pick to sync against? Theory would have us sync
++ * against the combined S, but as we've already demonstrated, there is no
++ * such thing in infeasible weight scenarios.
++ *
++ * One thing I've considered; and this is where that core_active rudiment
++ * came from, is having active queues sync up between themselves after
++ * every tick. This limits the observed divergence due to the work
++ * conservancy.
++ *
++ * On top of that, we can improve upon things by moving away from our
++ * horrible (10) hack and moving to (9) and employing (13) here.
++ */
++
++/*
+ * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed.
+ */
+ static void se_fi_update(const struct sched_entity *se, unsigned int fi_seq,
--- /dev/null
+From stable+bounces-204126-greg=kroah.com@vger.kernel.org Mon Dec 29 20:35:47 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Dec 2025 14:35:38 -0500
+Subject: sched/eevdf: Fix min_vruntime vs avg_vruntime
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Zicheng Qu <quzicheng@huawei.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251229193539.1640748-2-sashal@kernel.org>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 79f3f9bedd149ea438aaeb0fb6a083637affe205 ]
+
+Basically, from the constraint that the sum of lag is zero, you can
+infer that the 0-lag point is the weighted average of the individual
+vruntime, which is what we're trying to compute:
+
+ \Sum w_i * v_i
+ avg = --------------
+ \Sum w_i
+
+Now, since vruntime takes the whole u64 (worse, it wraps), this
+multiplication term in the numerator is not something we can compute;
+instead we do the min_vruntime (v0 henceforth) thing like:
+
+ v_i = (v_i - v0) + v0
+
+This does two things:
+ - it keeps the key: (v_i - v0) 'small';
+ - it creates a relative 0-point in the modular space.
+
+If you do that subtitution and work it all out, you end up with:
+
+ \Sum w_i * (v_i - v0)
+ avg = --------------------- + v0
+ \Sum w_i
+
+Since you cannot very well track a ratio like that (and not suffer
+terrible numerical problems) we simpy track the numerator and
+denominator individually and only perform the division when strictly
+needed.
+
+Notably, the numerator lives in cfs_rq->avg_vruntime and the denominator
+lives in cfs_rq->avg_load.
+
+The one extra 'funny' is that these numbers track the entities in the
+tree, and current is typically outside of the tree, so avg_vruntime()
+adds current when needed before doing the division.
+
+(vruntime_eligible() elides the division by cross-wise multiplication)
+
+Anyway, as mentioned above, we currently use the CFS era min_vruntime
+for this purpose. However, this thing can only move forward, while the
+above avg can in fact move backward (when a non-eligible task leaves,
+the average becomes smaller), this can cause trouble when through
+happenstance (or construction) these values drift far enough apart to
+wreck the game.
+
+Replace cfs_rq::min_vruntime with cfs_rq::zero_vruntime which is kept
+near/at avg_vruntime, following its motion.
+
+The down-side is that this requires computing the avg more often.
+
+Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
+Reported-by: Zicheng Qu <quzicheng@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://patch.msgid.link/20251106111741.GC4068168@noisy.programming.kicks-ass.net
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/debug.c | 8 +--
+ kernel/sched/fair.c | 114 +++++++++++----------------------------------------
+ kernel/sched/sched.h | 4 -
+ 3 files changed, 31 insertions(+), 95 deletions(-)
+
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -796,7 +796,7 @@ static void print_rq(struct seq_file *m,
+
+ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+ {
+- s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, left_deadline = -1, spread;
++ s64 left_vruntime = -1, zero_vruntime, right_vruntime = -1, left_deadline = -1, spread;
+ struct sched_entity *last, *first, *root;
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
+@@ -819,15 +819,15 @@ void print_cfs_rq(struct seq_file *m, in
+ last = __pick_last_entity(cfs_rq);
+ if (last)
+ right_vruntime = last->vruntime;
+- min_vruntime = cfs_rq->min_vruntime;
++ zero_vruntime = cfs_rq->zero_vruntime;
+ raw_spin_rq_unlock_irqrestore(rq, flags);
+
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_deadline",
+ SPLIT_NS(left_deadline));
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_vruntime",
+ SPLIT_NS(left_vruntime));
+- SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
+- SPLIT_NS(min_vruntime));
++ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "zero_vruntime",
++ SPLIT_NS(zero_vruntime));
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "avg_vruntime",
+ SPLIT_NS(avg_vruntime(cfs_rq)));
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "right_vruntime",
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -554,7 +554,7 @@ static inline bool entity_before(const s
+
+ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
+- return (s64)(se->vruntime - cfs_rq->min_vruntime);
++ return (s64)(se->vruntime - cfs_rq->zero_vruntime);
+ }
+
+ #define __node_2_se(node) \
+@@ -606,13 +606,13 @@ static inline s64 entity_key(struct cfs_
+ *
+ * Which we track using:
+ *
+- * v0 := cfs_rq->min_vruntime
++ * v0 := cfs_rq->zero_vruntime
+ * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
+ * \Sum w_i := cfs_rq->avg_load
+ *
+- * Since min_vruntime is a monotonic increasing variable that closely tracks
+- * the per-task service, these deltas: (v_i - v), will be in the order of the
+- * maximal (virtual) lag induced in the system due to quantisation.
++ * Since zero_vruntime closely tracks the per-task service, these
++ * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
++ * induced in the system due to quantisation.
+ *
+ * Also, we use scale_load_down() to reduce the size.
+ *
+@@ -671,7 +671,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
+ avg = div_s64(avg, load);
+ }
+
+- return cfs_rq->min_vruntime + avg;
++ return cfs_rq->zero_vruntime + avg;
+ }
+
+ /*
+@@ -732,7 +732,7 @@ static int vruntime_eligible(struct cfs_
+ load += weight;
+ }
+
+- return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
++ return avg >= (s64)(vruntime - cfs_rq->zero_vruntime) * load;
+ }
+
+ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -740,42 +740,14 @@ int entity_eligible(struct cfs_rq *cfs_r
+ return vruntime_eligible(cfs_rq, se->vruntime);
+ }
+
+-static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
++static void update_zero_vruntime(struct cfs_rq *cfs_rq)
+ {
+- u64 min_vruntime = cfs_rq->min_vruntime;
+- /*
+- * open coded max_vruntime() to allow updating avg_vruntime
+- */
+- s64 delta = (s64)(vruntime - min_vruntime);
+- if (delta > 0) {
+- avg_vruntime_update(cfs_rq, delta);
+- min_vruntime = vruntime;
+- }
+- return min_vruntime;
+-}
+-
+-static void update_min_vruntime(struct cfs_rq *cfs_rq)
+-{
+- struct sched_entity *se = __pick_root_entity(cfs_rq);
+- struct sched_entity *curr = cfs_rq->curr;
+- u64 vruntime = cfs_rq->min_vruntime;
++ u64 vruntime = avg_vruntime(cfs_rq);
++ s64 delta = (s64)(vruntime - cfs_rq->zero_vruntime);
+
+- if (curr) {
+- if (curr->on_rq)
+- vruntime = curr->vruntime;
+- else
+- curr = NULL;
+- }
+-
+- if (se) {
+- if (!curr)
+- vruntime = se->min_vruntime;
+- else
+- vruntime = min_vruntime(vruntime, se->min_vruntime);
+- }
++ avg_vruntime_update(cfs_rq, delta);
+
+- /* ensure we never gain time by being placed backwards. */
+- cfs_rq->min_vruntime = __update_min_vruntime(cfs_rq, vruntime);
++ cfs_rq->zero_vruntime = vruntime;
+ }
+
+ static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq)
+@@ -848,6 +820,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntim
+ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
+ avg_vruntime_add(cfs_rq, se);
++ update_zero_vruntime(cfs_rq);
+ se->min_vruntime = se->vruntime;
+ se->min_slice = se->slice;
+ rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+@@ -859,6 +832,7 @@ static void __dequeue_entity(struct cfs_
+ rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+ &min_vruntime_cb);
+ avg_vruntime_sub(cfs_rq, se);
++ update_zero_vruntime(cfs_rq);
+ }
+
+ struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
+@@ -1226,7 +1200,6 @@ static void update_curr(struct cfs_rq *c
+
+ curr->vruntime += calc_delta_fair(delta_exec, curr);
+ resched = update_deadline(cfs_rq, curr);
+- update_min_vruntime(cfs_rq);
+
+ if (entity_is_task(curr)) {
+ /*
+@@ -3808,15 +3781,6 @@ static void reweight_entity(struct cfs_r
+ if (!curr)
+ __enqueue_entity(cfs_rq, se);
+ cfs_rq->nr_queued++;
+-
+- /*
+- * The entity's vruntime has been adjusted, so let's check
+- * whether the rq-wide min_vruntime needs updated too. Since
+- * the calculations above require stable min_vruntime rather
+- * than up-to-date one, we do the update at the end of the
+- * reweight process.
+- */
+- update_min_vruntime(cfs_rq);
+ }
+ }
+
+@@ -5432,15 +5396,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
+
+ update_cfs_group(se);
+
+- /*
+- * Now advance min_vruntime if @se was the entity holding it back,
+- * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
+- * put back on, and if we advance min_vruntime, we'll be placed back
+- * further than we started -- i.e. we'll be penalized.
+- */
+- if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
+- update_min_vruntime(cfs_rq);
+-
+ if (flags & DEQUEUE_DELAYED)
+ finish_delayed_dequeue_entity(se);
+
+@@ -9028,7 +8983,6 @@ static void yield_task_fair(struct rq *r
+ if (entity_eligible(cfs_rq, se)) {
+ se->vruntime = se->deadline;
+ se->deadline += calc_delta_fair(se->slice, se);
+- update_min_vruntime(cfs_rq);
+ }
+ }
+
+@@ -13077,23 +13031,6 @@ static inline void task_tick_core(struct
+ * Which shows that S and s_i transform alike (which makes perfect sense
+ * given that S is basically the (weighted) average of s_i).
+ *
+- * Then:
+- *
+- * x -> s_min := min{s_i} (8)
+- *
+- * to obtain:
+- *
+- * \Sum_i w_i (s_i - s_min)
+- * S = s_min + ------------------------ (9)
+- * \Sum_i w_i
+- *
+- * Which already looks familiar, and is the basis for our current
+- * approximation:
+- *
+- * S ~= s_min (10)
+- *
+- * Now, obviously, (10) is absolute crap :-), but it sorta works.
+- *
+ * So the thing to remember is that the above is strictly UP. It is
+ * possible to generalize to multiple runqueues -- however it gets really
+ * yuck when you have to add affinity support, as illustrated by our very
+@@ -13115,23 +13052,23 @@ static inline void task_tick_core(struct
+ * Let, for our runqueue 'k':
+ *
+ * T_k = \Sum_i w_i s_i
+- * W_k = \Sum_i w_i ; for all i of k (11)
++ * W_k = \Sum_i w_i ; for all i of k (8)
+ *
+ * Then we can write (6) like:
+ *
+ * T_k
+- * S_k = --- (12)
++ * S_k = --- (9)
+ * W_k
+ *
+ * From which immediately follows that:
+ *
+ * T_k + T_l
+- * S_k+l = --------- (13)
++ * S_k+l = --------- (10)
+ * W_k + W_l
+ *
+ * On which we can define a combined lag:
+ *
+- * lag_k+l(i) := S_k+l - s_i (14)
++ * lag_k+l(i) := S_k+l - s_i (11)
+ *
+ * And that gives us the tools to compare tasks across a combined runqueue.
+ *
+@@ -13142,7 +13079,7 @@ static inline void task_tick_core(struct
+ * using (7); this only requires storing single 'time'-stamps.
+ *
+ * b) when comparing tasks between 2 runqueues of which one is forced-idle,
+- * compare the combined lag, per (14).
++ * compare the combined lag, per (11).
+ *
+ * Now, of course cgroups (I so hate them) make this more interesting in
+ * that a) seems to suggest we need to iterate all cgroup on a CPU at such
+@@ -13190,12 +13127,11 @@ static inline void task_tick_core(struct
+ * every tick. This limits the observed divergence due to the work
+ * conservancy.
+ *
+- * On top of that, we can improve upon things by moving away from our
+- * horrible (10) hack and moving to (9) and employing (13) here.
++ * On top of that, we can improve upon things by employing (10) here.
+ */
+
+ /*
+- * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed.
++ * se_fi_update - Update the cfs_rq->zero_vruntime_fi in a CFS hierarchy if needed.
+ */
+ static void se_fi_update(const struct sched_entity *se, unsigned int fi_seq,
+ bool forceidle)
+@@ -13209,7 +13145,7 @@ static void se_fi_update(const struct sc
+ cfs_rq->forceidle_seq = fi_seq;
+ }
+
+- cfs_rq->min_vruntime_fi = cfs_rq->min_vruntime;
++ cfs_rq->zero_vruntime_fi = cfs_rq->zero_vruntime;
+ }
+ }
+
+@@ -13262,11 +13198,11 @@ bool cfs_prio_less(const struct task_str
+
+ /*
+ * Find delta after normalizing se's vruntime with its cfs_rq's
+- * min_vruntime_fi, which would have been updated in prior calls
++ * zero_vruntime_fi, which would have been updated in prior calls
+ * to se_fi_update().
+ */
+ delta = (s64)(sea->vruntime - seb->vruntime) +
+- (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
++ (s64)(cfs_rqb->zero_vruntime_fi - cfs_rqa->zero_vruntime_fi);
+
+ return delta > 0;
+ }
+@@ -13502,7 +13438,7 @@ static void set_next_task_fair(struct rq
+ void init_cfs_rq(struct cfs_rq *cfs_rq)
+ {
+ cfs_rq->tasks_timeline = RB_ROOT_CACHED;
+- cfs_rq->min_vruntime = (u64)(-(1LL << 20));
++ cfs_rq->zero_vruntime = (u64)(-(1LL << 20));
+ raw_spin_lock_init(&cfs_rq->removed.lock);
+ }
+
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -682,10 +682,10 @@ struct cfs_rq {
+ s64 avg_vruntime;
+ u64 avg_load;
+
+- u64 min_vruntime;
++ u64 zero_vruntime;
+ #ifdef CONFIG_SCHED_CORE
+ unsigned int forceidle_seq;
+- u64 min_vruntime_fi;
++ u64 zero_vruntime_fi;
+ #endif
+
+ struct rb_root_cached tasks_timeline;
--- /dev/null
+From 127b90315ca07ccad2618db7ba950a63e3b32d22 Mon Sep 17 00:00:00 2001
+From: Fernand Sieber <sieberf@amazon.com>
+Date: Thu, 6 Nov 2025 12:40:10 +0200
+Subject: sched/proxy: Yield the donor task
+
+From: Fernand Sieber <sieberf@amazon.com>
+
+commit 127b90315ca07ccad2618db7ba950a63e3b32d22 upstream.
+
+When executing a task in proxy context, handle yields as if they were
+requested by the donor task. This matches the traditional PI semantics
+of yield() as well.
+
+This avoids scenario like proxy task yielding, pick next task selecting the
+same previous blocked donor, running the proxy task again, etc.
+
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202510211205.1e0f5223-lkp@intel.com
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Fernand Sieber <sieberf@amazon.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://patch.msgid.link/20251106104022.195157-1-sieberf@amazon.com
+Cc: Holger Hoffstätte <holger@applied-asynchrony.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/deadline.c | 2 +-
+ kernel/sched/ext.c | 4 ++--
+ kernel/sched/fair.c | 2 +-
+ kernel/sched/rt.c | 2 +-
+ kernel/sched/syscalls.c | 5 +++--
+ 5 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -2143,7 +2143,7 @@ static void yield_task_dl(struct rq *rq)
+ * it and the bandwidth timer will wake it up and will give it
+ * new scheduling parameters (thanks to dl_yielded=1).
+ */
+- rq->curr->dl.dl_yielded = 1;
++ rq->donor->dl.dl_yielded = 1;
+
+ update_rq_clock(rq);
+ update_curr_dl(rq);
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -1493,7 +1493,7 @@ static bool dequeue_task_scx(struct rq *
+ static void yield_task_scx(struct rq *rq)
+ {
+ struct scx_sched *sch = scx_root;
+- struct task_struct *p = rq->curr;
++ struct task_struct *p = rq->donor;
+
+ if (SCX_HAS_OP(sch, yield))
+ SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq, p, NULL);
+@@ -1504,7 +1504,7 @@ static void yield_task_scx(struct rq *rq
+ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
+ {
+ struct scx_sched *sch = scx_root;
+- struct task_struct *from = rq->curr;
++ struct task_struct *from = rq->donor;
+
+ if (SCX_HAS_OP(sch, yield))
+ return SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq,
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8993,7 +8993,7 @@ static void put_prev_task_fair(struct rq
+ */
+ static void yield_task_fair(struct rq *rq)
+ {
+- struct task_struct *curr = rq->curr;
++ struct task_struct *curr = rq->donor;
+ struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+ struct sched_entity *se = &curr->se;
+
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -1490,7 +1490,7 @@ static void requeue_task_rt(struct rq *r
+
+ static void yield_task_rt(struct rq *rq)
+ {
+- requeue_task_rt(rq, rq->curr, 0);
++ requeue_task_rt(rq, rq->donor, 0);
+ }
+
+ static int find_lowest_rq(struct task_struct *task);
+--- a/kernel/sched/syscalls.c
++++ b/kernel/sched/syscalls.c
+@@ -1351,7 +1351,7 @@ static void do_sched_yield(void)
+ rq = this_rq_lock_irq(&rf);
+
+ schedstat_inc(rq->yld_count);
+- current->sched_class->yield_task(rq);
++ rq->donor->sched_class->yield_task(rq);
+
+ preempt_disable();
+ rq_unlock_irq(rq, &rf);
+@@ -1420,12 +1420,13 @@ EXPORT_SYMBOL(yield);
+ */
+ int __sched yield_to(struct task_struct *p, bool preempt)
+ {
+- struct task_struct *curr = current;
++ struct task_struct *curr;
+ struct rq *rq, *p_rq;
+ int yielded = 0;
+
+ scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
+ rq = this_rq();
++ curr = rq->donor;
+
+ again:
+ p_rq = task_rq(p);
--- /dev/null
+From stable+bounces-204127-greg=kroah.com@vger.kernel.org Mon Dec 29 20:36:45 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Dec 2025 14:36:40 -0500
+Subject: sched_ext: Fix incorrect sched_class settings for per-cpu migration tasks
+To: stable@vger.kernel.org
+Cc: Zqiang <qiang.zhang@linux.dev>, Andrea Righi <arighi@nvidia.com>, Tejun Heo <tj@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251229193640.1641653-1-sashal@kernel.org>
+
+From: Zqiang <qiang.zhang@linux.dev>
+
+[ Upstream commit 1dd6c84f1c544e552848a8968599220bd464e338 ]
+
+When loading the ebpf scheduler, the tasks in the scx_tasks list will
+be traversed and invoke __setscheduler_class() to get new sched_class.
+however, this would also incorrectly set the per-cpu migration
+task's->sched_class to rt_sched_class, even after unload, the per-cpu
+migration task's->sched_class remains sched_rt_class.
+
+The log for this issue is as follows:
+
+./scx_rustland --stats 1
+[ 199.245639][ T630] sched_ext: "rustland" does not implement cgroup cpu.weight
+[ 199.269213][ T630] sched_ext: BPF scheduler "rustland" enabled
+04:25:09 [INFO] RustLand scheduler attached
+
+bpftrace -e 'iter:task /strcontains(ctx->task->comm, "migration")/
+{ printf("%s:%d->%pS\n", ctx->task->comm, ctx->task->pid, ctx->task->sched_class); }'
+Attaching 1 probe...
+migration/0:24->rt_sched_class+0x0/0xe0
+migration/1:27->rt_sched_class+0x0/0xe0
+migration/2:33->rt_sched_class+0x0/0xe0
+migration/3:39->rt_sched_class+0x0/0xe0
+migration/4:45->rt_sched_class+0x0/0xe0
+migration/5:52->rt_sched_class+0x0/0xe0
+migration/6:58->rt_sched_class+0x0/0xe0
+migration/7:64->rt_sched_class+0x0/0xe0
+
+sched_ext: BPF scheduler "rustland" disabled (unregistered from user space)
+EXIT: unregistered from user space
+04:25:21 [INFO] Unregister RustLand scheduler
+
+bpftrace -e 'iter:task /strcontains(ctx->task->comm, "migration")/
+{ printf("%s:%d->%pS\n", ctx->task->comm, ctx->task->pid, ctx->task->sched_class); }'
+Attaching 1 probe...
+migration/0:24->rt_sched_class+0x0/0xe0
+migration/1:27->rt_sched_class+0x0/0xe0
+migration/2:33->rt_sched_class+0x0/0xe0
+migration/3:39->rt_sched_class+0x0/0xe0
+migration/4:45->rt_sched_class+0x0/0xe0
+migration/5:52->rt_sched_class+0x0/0xe0
+migration/6:58->rt_sched_class+0x0/0xe0
+migration/7:64->rt_sched_class+0x0/0xe0
+
+This commit therefore generate a new scx_setscheduler_class() and
+add check for stop_sched_class to replace __setscheduler_class().
+
+Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class")
+Cc: stable@vger.kernel.org # v6.12+
+Signed-off-by: Zqiang <qiang.zhang@linux.dev>
+Reviewed-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/ext.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -210,6 +210,14 @@ static struct scx_dispatch_q *find_user_
+ return rhashtable_lookup_fast(&sch->dsq_hash, &dsq_id, dsq_hash_params);
+ }
+
++static const struct sched_class *scx_setscheduler_class(struct task_struct *p)
++{
++ if (p->sched_class == &stop_sched_class)
++ return &stop_sched_class;
++
++ return __setscheduler_class(p->policy, p->prio);
++}
++
+ /*
+ * scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
+ * ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
+@@ -3994,8 +4002,7 @@ static void scx_disable_workfn(struct kt
+ scx_task_iter_start(&sti);
+ while ((p = scx_task_iter_next_locked(&sti))) {
+ const struct sched_class *old_class = p->sched_class;
+- const struct sched_class *new_class =
+- __setscheduler_class(p->policy, p->prio);
++ const struct sched_class *new_class = scx_setscheduler_class(p);
+ struct sched_enq_and_set_ctx ctx;
+
+ if (old_class != new_class && p->se.sched_delayed)
+@@ -4779,8 +4786,7 @@ static int scx_enable(struct sched_ext_o
+ scx_task_iter_start(&sti);
+ while ((p = scx_task_iter_next_locked(&sti))) {
+ const struct sched_class *old_class = p->sched_class;
+- const struct sched_class *new_class =
+- __setscheduler_class(p->policy, p->prio);
++ const struct sched_class *new_class = scx_setscheduler_class(p);
+ struct sched_enq_and_set_ctx ctx;
+
+ if (!tryget_task_struct(p))
--- /dev/null
+sched-proxy-yield-the-donor-task.patch
+drm-nova-depend-on-config_64bit.patch
+x86-microcode-amd-select-which-microcode-patch-to-load.patch
+sched-core-add-comment-explaining-force-idle-vruntime-snapshots.patch
+sched-eevdf-fix-min_vruntime-vs-avg_vruntime.patch
+sched_ext-fix-incorrect-sched_class-settings-for-per-cpu-migration-tasks.patch
+mm-huge_memory-merge-uniform_split_supported-and-non_uniform_split_supported.patch
+kvm-s390-fix-gmap_helper_zap_one_page-again.patch
+drm-edid-add-drm_edid_ident_init-to-initialize-struct-drm_edid_ident.patch
+drm-displayid-add-quirk-to-ignore-displayid-checksum-errors.patch
--- /dev/null
+From 8d171045069c804e5ffaa18be590c42c6af0cf3f Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Thu, 25 Sep 2025 13:46:00 +0200
+Subject: x86/microcode/AMD: Select which microcode patch to load
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit 8d171045069c804e5ffaa18be590c42c6af0cf3f upstream.
+
+All microcode patches up to the proper BIOS Entrysign fix are loaded
+only after the sha256 signature carried in the driver has been verified.
+
+Microcode patches after the Entrysign fix has been applied, do not need
+that signature verification anymore.
+
+In order to not abandon machines which haven't received the BIOS update
+yet, add the capability to select which microcode patch to load.
+
+The corresponding microcode container supplied through firmware-linux
+has been modified to carry two patches per CPU type
+(family/model/stepping) so that the proper one gets selected.
+
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: Waiman Long <longman@redhat.com>
+Link: https://patch.msgid.link/20251027133818.4363-1-bp@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+
+---
+ arch/x86/kernel/cpu/microcode/amd.c | 113 ++++++++++++++++++++++--------------
+ 1 file changed, 72 insertions(+), 41 deletions(-)
+
+--- a/arch/x86/kernel/cpu/microcode/amd.c
++++ b/arch/x86/kernel/cpu/microcode/amd.c
+@@ -186,50 +186,61 @@ static u32 cpuid_to_ucode_rev(unsigned i
+ return p.ucode_rev;
+ }
+
++static u32 get_cutoff_revision(u32 rev)
++{
++ switch (rev >> 8) {
++ case 0x80012: return 0x8001277; break;
++ case 0x80082: return 0x800820f; break;
++ case 0x83010: return 0x830107c; break;
++ case 0x86001: return 0x860010e; break;
++ case 0x86081: return 0x8608108; break;
++ case 0x87010: return 0x8701034; break;
++ case 0x8a000: return 0x8a0000a; break;
++ case 0xa0010: return 0xa00107a; break;
++ case 0xa0011: return 0xa0011da; break;
++ case 0xa0012: return 0xa001243; break;
++ case 0xa0082: return 0xa00820e; break;
++ case 0xa1011: return 0xa101153; break;
++ case 0xa1012: return 0xa10124e; break;
++ case 0xa1081: return 0xa108109; break;
++ case 0xa2010: return 0xa20102f; break;
++ case 0xa2012: return 0xa201212; break;
++ case 0xa4041: return 0xa404109; break;
++ case 0xa5000: return 0xa500013; break;
++ case 0xa6012: return 0xa60120a; break;
++ case 0xa7041: return 0xa704109; break;
++ case 0xa7052: return 0xa705208; break;
++ case 0xa7080: return 0xa708009; break;
++ case 0xa70c0: return 0xa70C009; break;
++ case 0xaa001: return 0xaa00116; break;
++ case 0xaa002: return 0xaa00218; break;
++ case 0xb0021: return 0xb002146; break;
++ case 0xb0081: return 0xb008111; break;
++ case 0xb1010: return 0xb101046; break;
++ case 0xb2040: return 0xb204031; break;
++ case 0xb4040: return 0xb404031; break;
++ case 0xb4041: return 0xb404101; break;
++ case 0xb6000: return 0xb600031; break;
++ case 0xb6080: return 0xb608031; break;
++ case 0xb7000: return 0xb700031; break;
++ default: break;
++
++ }
++ return 0;
++}
++
+ static bool need_sha_check(u32 cur_rev)
+ {
++ u32 cutoff;
++
+ if (!cur_rev) {
+ cur_rev = cpuid_to_ucode_rev(bsp_cpuid_1_eax);
+ pr_info_once("No current revision, generating the lowest one: 0x%x\n", cur_rev);
+ }
+
+- switch (cur_rev >> 8) {
+- case 0x80012: return cur_rev <= 0x8001277; break;
+- case 0x80082: return cur_rev <= 0x800820f; break;
+- case 0x83010: return cur_rev <= 0x830107c; break;
+- case 0x86001: return cur_rev <= 0x860010e; break;
+- case 0x86081: return cur_rev <= 0x8608108; break;
+- case 0x87010: return cur_rev <= 0x8701034; break;
+- case 0x8a000: return cur_rev <= 0x8a0000a; break;
+- case 0xa0010: return cur_rev <= 0xa00107a; break;
+- case 0xa0011: return cur_rev <= 0xa0011da; break;
+- case 0xa0012: return cur_rev <= 0xa001243; break;
+- case 0xa0082: return cur_rev <= 0xa00820e; break;
+- case 0xa1011: return cur_rev <= 0xa101153; break;
+- case 0xa1012: return cur_rev <= 0xa10124e; break;
+- case 0xa1081: return cur_rev <= 0xa108109; break;
+- case 0xa2010: return cur_rev <= 0xa20102f; break;
+- case 0xa2012: return cur_rev <= 0xa201212; break;
+- case 0xa4041: return cur_rev <= 0xa404109; break;
+- case 0xa5000: return cur_rev <= 0xa500013; break;
+- case 0xa6012: return cur_rev <= 0xa60120a; break;
+- case 0xa7041: return cur_rev <= 0xa704109; break;
+- case 0xa7052: return cur_rev <= 0xa705208; break;
+- case 0xa7080: return cur_rev <= 0xa708009; break;
+- case 0xa70c0: return cur_rev <= 0xa70C009; break;
+- case 0xaa001: return cur_rev <= 0xaa00116; break;
+- case 0xaa002: return cur_rev <= 0xaa00218; break;
+- case 0xb0021: return cur_rev <= 0xb002146; break;
+- case 0xb0081: return cur_rev <= 0xb008111; break;
+- case 0xb1010: return cur_rev <= 0xb101046; break;
+- case 0xb2040: return cur_rev <= 0xb204031; break;
+- case 0xb4040: return cur_rev <= 0xb404031; break;
+- case 0xb4041: return cur_rev <= 0xb404101; break;
+- case 0xb6000: return cur_rev <= 0xb600031; break;
+- case 0xb6080: return cur_rev <= 0xb608031; break;
+- case 0xb7000: return cur_rev <= 0xb700031; break;
+- default: break;
+- }
++ cutoff = get_cutoff_revision(cur_rev);
++ if (cutoff)
++ return cur_rev <= cutoff;
+
+ pr_info("You should not be seeing this. Please send the following couple of lines to x86-<at>-kernel.org\n");
+ pr_info("CPUID(1).EAX: 0x%x, current revision: 0x%x\n", bsp_cpuid_1_eax, cur_rev);
+@@ -494,6 +505,7 @@ static int verify_patch(const u8 *buf, s
+ {
+ u8 family = x86_family(bsp_cpuid_1_eax);
+ struct microcode_header_amd *mc_hdr;
++ u32 cur_rev, cutoff, patch_rev;
+ u32 sh_psize;
+ u16 proc_id;
+ u8 patch_fam;
+@@ -533,11 +545,32 @@ static int verify_patch(const u8 *buf, s
+ proc_id = mc_hdr->processor_rev_id;
+ patch_fam = 0xf + (proc_id >> 12);
+
+- ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam);
+-
+ if (patch_fam != family)
+ return 1;
+
++ cur_rev = get_patch_level();
++
++ /* No cutoff revision means old/unaffected by signing algorithm weakness => matches */
++ cutoff = get_cutoff_revision(cur_rev);
++ if (!cutoff)
++ goto ok;
++
++ patch_rev = mc_hdr->patch_id;
++
++ ucode_dbg("cur_rev: 0x%x, cutoff: 0x%x, patch_rev: 0x%x\n",
++ cur_rev, cutoff, patch_rev);
++
++ if (cur_rev <= cutoff && patch_rev <= cutoff)
++ goto ok;
++
++ if (cur_rev > cutoff && patch_rev > cutoff)
++ goto ok;
++
++ return 1;
++
++ok:
++ ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam);
++
+ return 0;
+ }
+
+@@ -606,8 +639,6 @@ static size_t parse_container(u8 *ucode,
+
+ mc = (struct microcode_amd *)(buf + SECTION_HDR_SIZE);
+
+- ucode_dbg("patch_id: 0x%x\n", mc->hdr.patch_id);
+-
+ if (mc_patch_matches(mc, eq_id)) {
+ desc->psize = patch_size;
+ desc->mc = mc;