]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 2 Jan 2026 12:26:55 +0000 (13:26 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 2 Jan 2026 12:26:55 +0000 (13:26 +0100)
added patches:
drm-displayid-add-quirk-to-ignore-displayid-checksum-errors.patch
drm-edid-add-drm_edid_ident_init-to-initialize-struct-drm_edid_ident.patch
drm-nova-depend-on-config_64bit.patch
kvm-s390-fix-gmap_helper_zap_one_page-again.patch
mm-huge_memory-merge-uniform_split_supported-and-non_uniform_split_supported.patch
sched-core-add-comment-explaining-force-idle-vruntime-snapshots.patch
sched-eevdf-fix-min_vruntime-vs-avg_vruntime.patch
sched-proxy-yield-the-donor-task.patch
sched_ext-fix-incorrect-sched_class-settings-for-per-cpu-migration-tasks.patch
series
x86-microcode-amd-select-which-microcode-patch-to-load.patch

queue-6.18/drm-displayid-add-quirk-to-ignore-displayid-checksum-errors.patch [new file with mode: 0644]
queue-6.18/drm-edid-add-drm_edid_ident_init-to-initialize-struct-drm_edid_ident.patch [new file with mode: 0644]
queue-6.18/drm-nova-depend-on-config_64bit.patch [new file with mode: 0644]
queue-6.18/kvm-s390-fix-gmap_helper_zap_one_page-again.patch [new file with mode: 0644]
queue-6.18/mm-huge_memory-merge-uniform_split_supported-and-non_uniform_split_supported.patch [new file with mode: 0644]
queue-6.18/sched-core-add-comment-explaining-force-idle-vruntime-snapshots.patch [new file with mode: 0644]
queue-6.18/sched-eevdf-fix-min_vruntime-vs-avg_vruntime.patch [new file with mode: 0644]
queue-6.18/sched-proxy-yield-the-donor-task.patch [new file with mode: 0644]
queue-6.18/sched_ext-fix-incorrect-sched_class-settings-for-per-cpu-migration-tasks.patch [new file with mode: 0644]
queue-6.18/series [new file with mode: 0644]
queue-6.18/x86-microcode-amd-select-which-microcode-patch-to-load.patch [new file with mode: 0644]

diff --git a/queue-6.18/drm-displayid-add-quirk-to-ignore-displayid-checksum-errors.patch b/queue-6.18/drm-displayid-add-quirk-to-ignore-displayid-checksum-errors.patch
new file mode 100644 (file)
index 0000000..92f6baf
--- /dev/null
@@ -0,0 +1,137 @@
+From stable+bounces-204371-greg=kroah.com@vger.kernel.org Wed Dec 31 17:19:08 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Dec 2025 11:18:56 -0500
+Subject: drm/displayid: add quirk to ignore DisplayID checksum errors
+To: stable@vger.kernel.org
+Cc: "Jani Nikula" <jani.nikula@intel.com>, "Tiago Martins Araújo" <tiago.martins.araujo@gmail.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251231161856.3237284-3-sashal@kernel.org>
+
+From: Jani Nikula <jani.nikula@intel.com>
+
+[ Upstream commit 83cbb4d33dc22b0ca1a4e85c6e892c9b729e28d4 ]
+
+Add a mechanism for DisplayID specific quirks, and add the first quirk
+to ignore DisplayID section checksum errors.
+
+It would be quite inconvenient to pass existing EDID quirks from
+drm_edid.c for DisplayID parsing. Not all places doing DisplayID
+iteration have the quirks readily available, and would have to pass it
+in all places. Simply add a separate array of DisplayID specific EDID
+quirks. We do end up checking it every time we iterate DisplayID blocks,
+but hopefully the number of quirks remains small.
+
+There are a few laptop models with DisplayID checksum failures, leading
+to higher refresh rates only present in the DisplayID blocks being
+ignored. Add a quirk for the panel in the machines.
+
+Reported-by: Tiago Martins Araújo <tiago.martins.araujo@gmail.com>
+Closes: https://lore.kernel.org/r/CACRbrPGvLP5LANXuFi6z0S7XMbAG4X5y2YOLBDxfOVtfGGqiKQ@mail.gmail.com
+Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14703
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Tested-by: Tiago Martins Araújo <tiago.martins.araujo@gmail.com>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/c04d81ae648c5f21b3f5b7953f924718051f2798.1761681968.git.jani.nikula@intel.com
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/drm_displayid.c          |   41 +++++++++++++++++++++++++++----
+ drivers/gpu/drm/drm_displayid_internal.h |    2 +
+ 2 files changed, 39 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/drm_displayid.c
++++ b/drivers/gpu/drm/drm_displayid.c
+@@ -9,6 +9,34 @@
+ #include "drm_crtc_internal.h"
+ #include "drm_displayid_internal.h"
++enum {
++      QUIRK_IGNORE_CHECKSUM,
++};
++
++struct displayid_quirk {
++      const struct drm_edid_ident ident;
++      u8 quirks;
++};
++
++static const struct displayid_quirk quirks[] = {
++      {
++              .ident = DRM_EDID_IDENT_INIT('C', 'S', 'O', 5142, "MNE007ZA1-5"),
++              .quirks = BIT(QUIRK_IGNORE_CHECKSUM),
++      },
++};
++
++static u8 get_quirks(const struct drm_edid *drm_edid)
++{
++      int i;
++
++      for (i = 0; i < ARRAY_SIZE(quirks); i++) {
++              if (drm_edid_match(drm_edid, &quirks[i].ident))
++                      return quirks[i].quirks;
++      }
++
++      return 0;
++}
++
+ static const struct displayid_header *
+ displayid_get_header(const u8 *displayid, int length, int index)
+ {
+@@ -23,7 +51,7 @@ displayid_get_header(const u8 *displayid
+ }
+ static const struct displayid_header *
+-validate_displayid(const u8 *displayid, int length, int idx)
++validate_displayid(const u8 *displayid, int length, int idx, bool ignore_checksum)
+ {
+       int i, dispid_length;
+       u8 csum = 0;
+@@ -41,8 +69,11 @@ validate_displayid(const u8 *displayid,
+       for (i = 0; i < dispid_length; i++)
+               csum += displayid[idx + i];
+       if (csum) {
+-              DRM_NOTE("DisplayID checksum invalid, remainder is %d\n", csum);
+-              return ERR_PTR(-EINVAL);
++              DRM_NOTE("DisplayID checksum invalid, remainder is %d%s\n", csum,
++                       ignore_checksum ? " (ignoring)" : "");
++
++              if (!ignore_checksum)
++                      return ERR_PTR(-EINVAL);
+       }
+       return base;
+@@ -52,6 +83,7 @@ static const u8 *find_next_displayid_ext
+ {
+       const struct displayid_header *base;
+       const u8 *displayid;
++      bool ignore_checksum = iter->quirks & BIT(QUIRK_IGNORE_CHECKSUM);
+       displayid = drm_edid_find_extension(iter->drm_edid, DISPLAYID_EXT, &iter->ext_index);
+       if (!displayid)
+@@ -61,7 +93,7 @@ static const u8 *find_next_displayid_ext
+       iter->length = EDID_LENGTH - 1;
+       iter->idx = 1;
+-      base = validate_displayid(displayid, iter->length, iter->idx);
++      base = validate_displayid(displayid, iter->length, iter->idx, ignore_checksum);
+       if (IS_ERR(base))
+               return NULL;
+@@ -76,6 +108,7 @@ void displayid_iter_edid_begin(const str
+       memset(iter, 0, sizeof(*iter));
+       iter->drm_edid = drm_edid;
++      iter->quirks = get_quirks(drm_edid);
+ }
+ static const struct displayid_block *
+--- a/drivers/gpu/drm/drm_displayid_internal.h
++++ b/drivers/gpu/drm/drm_displayid_internal.h
+@@ -167,6 +167,8 @@ struct displayid_iter {
+       u8 version;
+       u8 primary_use;
++
++      u8 quirks;
+ };
+ void displayid_iter_edid_begin(const struct drm_edid *drm_edid,
diff --git a/queue-6.18/drm-edid-add-drm_edid_ident_init-to-initialize-struct-drm_edid_ident.patch b/queue-6.18/drm-edid-add-drm_edid_ident_init-to-initialize-struct-drm_edid_ident.patch
new file mode 100644 (file)
index 0000000..ba900f9
--- /dev/null
@@ -0,0 +1,42 @@
+From stable+bounces-204370-greg=kroah.com@vger.kernel.org Wed Dec 31 17:19:04 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Dec 2025 11:18:55 -0500
+Subject: drm/edid: add DRM_EDID_IDENT_INIT() to initialize struct drm_edid_ident
+To: stable@vger.kernel.org
+Cc: "Jani Nikula" <jani.nikula@intel.com>, "Tiago Martins Araújo" <tiago.martins.araujo@gmail.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251231161856.3237284-2-sashal@kernel.org>
+
+From: Jani Nikula <jani.nikula@intel.com>
+
+[ Upstream commit 8b61583f993589a64c061aa91b44f5bd350d90a5 ]
+
+Add a convenience helper for initializing struct drm_edid_ident.
+
+Cc: Tiago Martins Araújo <tiago.martins.araujo@gmail.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Tested-by: Tiago Martins Araújo <tiago.martins.araujo@gmail.com>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/710b2ac6a211606ec1f90afa57b79e8c7375a27e.1761681968.git.jani.nikula@intel.com
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Stable-dep-of: 83cbb4d33dc2 ("drm/displayid: add quirk to ignore DisplayID checksum errors")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/drm/drm_edid.h |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/include/drm/drm_edid.h
++++ b/include/drm/drm_edid.h
+@@ -340,6 +340,12 @@ struct drm_edid_ident {
+       const char *name;
+ };
++#define DRM_EDID_IDENT_INIT(_vend_chr_0, _vend_chr_1, _vend_chr_2, _product_id, _name) \
++{ \
++      .panel_id = drm_edid_encode_panel_id(_vend_chr_0, _vend_chr_1, _vend_chr_2, _product_id), \
++      .name = _name, \
++}
++
+ #define EDID_PRODUCT_ID(e) ((e)->prod_code[0] | ((e)->prod_code[1] << 8))
+ /* Short Audio Descriptor */
diff --git a/queue-6.18/drm-nova-depend-on-config_64bit.patch b/queue-6.18/drm-nova-depend-on-config_64bit.patch
new file mode 100644 (file)
index 0000000..41c6245
--- /dev/null
@@ -0,0 +1,31 @@
+From ba1b40ed0e34bab597fd90d4c4e9f7397f878c8f Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@kernel.org>
+Date: Tue, 28 Oct 2025 12:00:52 +0100
+Subject: drm: nova: depend on CONFIG_64BIT
+
+From: Danilo Krummrich <dakr@kernel.org>
+
+commit ba1b40ed0e34bab597fd90d4c4e9f7397f878c8f upstream.
+
+nova-core already depends on CONFIG_64BIT, hence also depend on
+CONFIG_64BIT for nova-drm.
+
+Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
+Reviewed-by: John Hubbard <jhubbard@nvidia.com>
+Link: https://patch.msgid.link/20251028110058.340320-1-dakr@kernel.org
+Signed-off-by: Danilo Krummrich <dakr@kernel.org>
+Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/nova/Kconfig |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/nova/Kconfig
++++ b/drivers/gpu/drm/nova/Kconfig
+@@ -1,5 +1,6 @@
+ config DRM_NOVA
+       tristate "Nova DRM driver"
++      depends on 64BIT
+       depends on DRM=y
+       depends on PCI
+       depends on RUST
diff --git a/queue-6.18/kvm-s390-fix-gmap_helper_zap_one_page-again.patch b/queue-6.18/kvm-s390-fix-gmap_helper_zap_one_page-again.patch
new file mode 100644 (file)
index 0000000..fa74d87
--- /dev/null
@@ -0,0 +1,57 @@
+From stable+bounces-204314-greg=kroah.com@vger.kernel.org Wed Dec 31 04:16:32 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 Dec 2025 22:16:26 -0500
+Subject: KVM: s390: Fix gmap_helper_zap_one_page() again
+To: stable@vger.kernel.org
+Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>, Marc Hartmayer <mhartmay@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Heiko Carstens <hca@linux.ibm.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251231031626.2684565-1-sashal@kernel.org>
+
+From: Claudio Imbrenda <imbrenda@linux.ibm.com>
+
+[ Upstream commit 2f393c228cc519ddf19b8c6c05bf15723241aa96 ]
+
+A few checks were missing in gmap_helper_zap_one_page(), which can lead
+to memory corruption in the guest under specific circumstances.
+
+Add the missing checks.
+
+Fixes: 5deafa27d9ae ("KVM: s390: Fix to clear PTE when discarding a swapped page")
+Cc: stable@vger.kernel.org
+Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
+Tested-by: Marc Hartmayer <mhartmay@linux.ibm.com>
+Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+[ adapted ptep_zap_softleaf_entry() and softleaf_from_pte() calls to ptep_zap_swap_entry() and pte_to_swp_entry() ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/mm/gmap_helpers.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/mm/gmap_helpers.c
++++ b/arch/s390/mm/gmap_helpers.c
+@@ -47,6 +47,7 @@ static void ptep_zap_swap_entry(struct m
+ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
+ {
+       struct vm_area_struct *vma;
++      unsigned long pgstev;
+       spinlock_t *ptl;
+       pgste_t pgste;
+       pte_t *ptep;
+@@ -65,9 +66,13 @@ void gmap_helper_zap_one_page(struct mm_
+       if (pte_swap(*ptep)) {
+               preempt_disable();
+               pgste = pgste_get_lock(ptep);
++              pgstev = pgste_val(pgste);
+-              ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+-              pte_clear(mm, vmaddr, ptep);
++              if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
++                  (pgstev & _PGSTE_GPS_ZERO)) {
++                      ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
++                      pte_clear(mm, vmaddr, ptep);
++              }
+               pgste_set_unlock(ptep, pgste);
+               preempt_enable();
diff --git a/queue-6.18/mm-huge_memory-merge-uniform_split_supported-and-non_uniform_split_supported.patch b/queue-6.18/mm-huge_memory-merge-uniform_split_supported-and-non_uniform_split_supported.patch
new file mode 100644 (file)
index 0000000..4eb93f1
--- /dev/null
@@ -0,0 +1,186 @@
+From stable+bounces-204176-greg=kroah.com@vger.kernel.org Tue Dec 30 03:48:38 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Dec 2025 21:48:31 -0500
+Subject: mm/huge_memory: merge uniform_split_supported() and non_uniform_split_supported()
+To: stable@vger.kernel.org
+Cc: Wei Yang <richard.weiyang@gmail.com>, Zi Yan <ziy@nvidia.com>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Baolin Wang <baolin.wang@linux.alibaba.com>, Barry Song <baohua@kernel.org>, Dev Jain <dev.jain@arm.com>, Lance Yang <lance.yang@linux.dev>, Liam Howlett <liam.howlett@oracle.com>, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, Nico Pache <npache@redhat.com>, Ryan Roberts <ryan.roberts@arm.com>, Andrew Morton <akpm@linux-foundation.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251230024831.1972219-1-sashal@kernel.org>
+
+From: Wei Yang <richard.weiyang@gmail.com>
+
+[ Upstream commit 8a0e4bdddd1c998b894d879a1d22f1e745606215 ]
+
+uniform_split_supported() and non_uniform_split_supported() share
+significantly similar logic.
+
+The only functional difference is that uniform_split_supported() includes
+an additional check on the requested @new_order.
+
+The reason for this check comes from the following two aspects:
+
+  * some file system or swap cache just supports order-0 folio
+  * the behavioral difference between uniform/non-uniform split
+
+The behavioral difference between uniform split and non-uniform:
+
+  * uniform split splits folio directly to @new_order
+  * non-uniform split creates after-split folios with orders from
+    folio_order(folio) - 1 to new_order.
+
+This means for non-uniform split or !new_order split we should check the
+file system and swap cache respectively.
+
+This commit unifies the logic and merge the two functions into a single
+combined helper, removing redundant code and simplifying the split
+support checking mechanism.
+
+Link: https://lkml.kernel.org/r/20251106034155.21398-3-richard.weiyang@gmail.com
+Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages")
+Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
+Reviewed-by: Zi Yan <ziy@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: "David Hildenbrand (Red Hat)" <david@kernel.org>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Lance Yang <lance.yang@linux.dev>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Nico Pache <npache@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[ split_type => uniform_split and replaced SPLIT_TYPE_NON_UNIFORM checks ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/huge_mm.h |    8 ++---
+ mm/huge_memory.c        |   71 ++++++++++++++++++++----------------------------
+ 2 files changed, 33 insertions(+), 46 deletions(-)
+
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -369,10 +369,8 @@ int split_huge_page_to_list_to_order(str
+               unsigned int new_order);
+ int min_order_for_split(struct folio *folio);
+ int split_folio_to_list(struct folio *folio, struct list_head *list);
+-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
+-              bool warns);
+-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
+-              bool warns);
++bool folio_split_supported(struct folio *folio, unsigned int new_order,
++              bool uniform_split, bool warns);
+ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
+               struct list_head *list);
+ /*
+@@ -392,7 +390,7 @@ int folio_split(struct folio *folio, uns
+ static inline int try_folio_split_to_order(struct folio *folio,
+               struct page *page, unsigned int new_order)
+ {
+-      if (!non_uniform_split_supported(folio, new_order, /* warns= */ false))
++      if (!folio_split_supported(folio, new_order, false, /* warns= */ false))
+               return split_huge_page_to_list_to_order(&folio->page, NULL,
+                               new_order);
+       return folio_split(folio, new_order, page, NULL);
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -3515,8 +3515,8 @@ static int __split_unmapped_folio(struct
+       return ret;
+ }
+-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
+-              bool warns)
++bool folio_split_supported(struct folio *folio, unsigned int new_order,
++              bool uniform_split, bool warns)
+ {
+       if (folio_test_anon(folio)) {
+               /* order-1 is not supported for anonymous THP. */
+@@ -3524,48 +3524,41 @@ bool non_uniform_split_supported(struct
+                               "Cannot split to order-1 folio");
+               if (new_order == 1)
+                       return false;
+-      } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+-          !mapping_large_folio_support(folio->mapping)) {
+-              /*
+-               * No split if the file system does not support large folio.
+-               * Note that we might still have THPs in such mappings due to
+-               * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
+-               * does not actually support large folios properly.
+-               */
+-              VM_WARN_ONCE(warns,
+-                      "Cannot split file folio to non-0 order");
+-              return false;
+-      }
+-
+-      /* Only swapping a whole PMD-mapped folio is supported */
+-      if (folio_test_swapcache(folio)) {
+-              VM_WARN_ONCE(warns,
+-                      "Cannot split swapcache folio to non-0 order");
+-              return false;
+-      }
+-
+-      return true;
+-}
+-
+-/* See comments in non_uniform_split_supported() */
+-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
+-              bool warns)
+-{
+-      if (folio_test_anon(folio)) {
+-              VM_WARN_ONCE(warns && new_order == 1,
+-                              "Cannot split to order-1 folio");
+-              if (new_order == 1)
+-                      return false;
+-      } else  if (new_order) {
++      } else if (!uniform_split || new_order) {
+               if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+                   !mapping_large_folio_support(folio->mapping)) {
++                      /*
++                       * We can always split a folio down to a single page
++                       * (new_order == 0) uniformly.
++                       *
++                       * For any other scenario
++                       *   a) uniform split targeting a large folio
++                       *      (new_order > 0)
++                       *   b) any non-uniform split
++                       * we must confirm that the file system supports large
++                       * folios.
++                       *
++                       * Note that we might still have THPs in such
++                       * mappings, which is created from khugepaged when
++                       * CONFIG_READ_ONLY_THP_FOR_FS is enabled. But in that
++                       * case, the mapping does not actually support large
++                       * folios properly.
++                       */
+                       VM_WARN_ONCE(warns,
+                               "Cannot split file folio to non-0 order");
+                       return false;
+               }
+       }
+-      if (new_order && folio_test_swapcache(folio)) {
++      /*
++       * swapcache folio could only be split to order 0
++       *
++       * non-uniform split creates after-split folios with orders from
++       * folio_order(folio) - 1 to new_order, making it not suitable for any
++       * swapcache folio split. Only uniform split to order-0 can be used
++       * here.
++       */
++      if ((!uniform_split || new_order) && folio_test_swapcache(folio)) {
+               VM_WARN_ONCE(warns,
+                       "Cannot split swapcache folio to non-0 order");
+               return false;
+@@ -3632,11 +3625,7 @@ static int __folio_split(struct folio *f
+       if (new_order >= folio_order(folio))
+               return -EINVAL;
+-      if (uniform_split && !uniform_split_supported(folio, new_order, true))
+-              return -EINVAL;
+-
+-      if (!uniform_split &&
+-          !non_uniform_split_supported(folio, new_order, true))
++      if (!folio_split_supported(folio, new_order, uniform_split, /* warn = */ true))
+               return -EINVAL;
+       is_hzp = is_huge_zero_folio(folio);
diff --git a/queue-6.18/sched-core-add-comment-explaining-force-idle-vruntime-snapshots.patch b/queue-6.18/sched-core-add-comment-explaining-force-idle-vruntime-snapshots.patch
new file mode 100644 (file)
index 0000000..5cbed64
--- /dev/null
@@ -0,0 +1,218 @@
+From stable+bounces-204125-greg=kroah.com@vger.kernel.org Mon Dec 29 20:35:44 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Dec 2025 14:35:37 -0500
+Subject: sched/core: Add comment explaining force-idle vruntime snapshots
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251229193539.1640748-1-sashal@kernel.org>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 9359d9785d85bb53f1ff1738a59aeeec4b878906 ]
+
+I always end up having to re-read these emails every time I look at
+this code. And a future patch is going to change this story a little.
+This means it is past time to stick them in a comment so it can be
+modified and stay current.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20200506143506.GH5298@hirez.programming.kicks-ass.net
+Link: https://lkml.kernel.org/r/20200515103844.GG2978@hirez.programming.kicks-ass.net
+Link: https://patch.msgid.link/20251106111603.GB4068168@noisy.programming.kicks-ass.net
+Stable-dep-of: 79f3f9bedd14 ("sched/eevdf: Fix min_vruntime vs avg_vruntime")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/fair.c |  181 ++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 181 insertions(+)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -13014,6 +13014,187 @@ static inline void task_tick_core(struct
+ }
+ /*
++ * Consider any infeasible weight scenario. Take for instance two tasks,
++ * each bound to their respective sibling, one with weight 1 and one with
++ * weight 2. Then the lower weight task will run ahead of the higher weight
++ * task without bound.
++ *
++ * This utterly destroys the concept of a shared time base.
++ *
++ * Remember; all this is about a proportionally fair scheduling, where each
++ * tasks receives:
++ *
++ *              w_i
++ *   dt_i = ---------- dt                                     (1)
++ *          \Sum_j w_j
++ *
++ * which we do by tracking a virtual time, s_i:
++ *
++ *          1
++ *   s_i = --- d[t]_i                                         (2)
++ *         w_i
++ *
++ * Where d[t] is a delta of discrete time, while dt is an infinitesimal.
++ * The immediate corollary is that the ideal schedule S, where (2) to use
++ * an infinitesimal delta, is:
++ *
++ *           1
++ *   S = ---------- dt                                        (3)
++ *       \Sum_i w_i
++ *
++ * From which we can define the lag, or deviation from the ideal, as:
++ *
++ *   lag(i) = S - s_i                                         (4)
++ *
++ * And since the one and only purpose is to approximate S, we get that:
++ *
++ *   \Sum_i w_i lag(i) := 0                                   (5)
++ *
++ * If this were not so, we no longer converge to S, and we can no longer
++ * claim our scheduler has any of the properties we derive from S. This is
++ * exactly what you did above, you broke it!
++ *
++ *
++ * Let's continue for a while though; to see if there is anything useful to
++ * be learned. We can combine (1)-(3) or (4)-(5) and express S in s_i:
++ *
++ *       \Sum_i w_i s_i
++ *   S = --------------                                       (6)
++ *         \Sum_i w_i
++ *
++ * Which gives us a way to compute S, given our s_i. Now, if you've read
++ * our code, you know that we do not in fact do this, the reason for this
++ * is two-fold. Firstly, computing S in that way requires a 64bit division
++ * for every time we'd use it (see 12), and secondly, this only describes
++ * the steady-state, it doesn't handle dynamics.
++ *
++ * Anyway, in (6):  s_i -> x + (s_i - x), to get:
++ *
++ *           \Sum_i w_i (s_i - x)
++ *   S - x = --------------------                             (7)
++ *              \Sum_i w_i
++ *
++ * Which shows that S and s_i transform alike (which makes perfect sense
++ * given that S is basically the (weighted) average of s_i).
++ *
++ * Then:
++ *
++ *   x -> s_min := min{s_i}                                   (8)
++ *
++ * to obtain:
++ *
++ *               \Sum_i w_i (s_i - s_min)
++ *   S = s_min + ------------------------                     (9)
++ *                     \Sum_i w_i
++ *
++ * Which already looks familiar, and is the basis for our current
++ * approximation:
++ *
++ *   S ~= s_min                                              (10)
++ *
++ * Now, obviously, (10) is absolute crap :-), but it sorta works.
++ *
++ * So the thing to remember is that the above is strictly UP. It is
++ * possible to generalize to multiple runqueues -- however it gets really
++ * yuck when you have to add affinity support, as illustrated by our very
++ * first counter-example.
++ *
++ * Luckily I think we can avoid needing a full multi-queue variant for
++ * core-scheduling (or load-balancing). The crucial observation is that we
++ * only actually need this comparison in the presence of forced-idle; only
++ * then do we need to tell if the stalled rq has higher priority over the
++ * other.
++ *
++ * [XXX assumes SMT2; better consider the more general case, I suspect
++ * it'll work out because our comparison is always between 2 rqs and the
++ * answer is only interesting if one of them is forced-idle]
++ *
++ * And (under assumption of SMT2) when there is forced-idle, there is only
++ * a single queue, so everything works like normal.
++ *
++ * Let, for our runqueue 'k':
++ *
++ *   T_k = \Sum_i w_i s_i
++ *   W_k = \Sum_i w_i      ; for all i of k                  (11)
++ *
++ * Then we can write (6) like:
++ *
++ *         T_k
++ *   S_k = ---                                               (12)
++ *         W_k
++ *
++ * From which immediately follows that:
++ *
++ *           T_k + T_l
++ *   S_k+l = ---------                                       (13)
++ *           W_k + W_l
++ *
++ * On which we can define a combined lag:
++ *
++ *   lag_k+l(i) := S_k+l - s_i                               (14)
++ *
++ * And that gives us the tools to compare tasks across a combined runqueue.
++ *
++ *
++ * Combined this gives the following:
++ *
++ *  a) when a runqueue enters force-idle, sync it against it's sibling rq(s)
++ *     using (7); this only requires storing single 'time'-stamps.
++ *
++ *  b) when comparing tasks between 2 runqueues of which one is forced-idle,
++ *     compare the combined lag, per (14).
++ *
++ * Now, of course cgroups (I so hate them) make this more interesting in
++ * that a) seems to suggest we need to iterate all cgroup on a CPU at such
++ * boundaries, but I think we can avoid that. The force-idle is for the
++ * whole CPU, all it's rqs. So we can mark it in the root and lazily
++ * propagate downward on demand.
++ */
++
++/*
++ * So this sync is basically a relative reset of S to 0.
++ *
++ * So with 2 queues, when one goes idle, we drop them both to 0 and one
++ * then increases due to not being idle, and the idle one builds up lag to
++ * get re-elected. So far so simple, right?
++ *
++ * When there's 3, we can have the situation where 2 run and one is idle,
++ * we sync to 0 and let the idle one build up lag to get re-election. Now
++ * suppose another one also drops idle. At this point dropping all to 0
++ * again would destroy the built-up lag from the queue that was already
++ * idle, not good.
++ *
++ * So instead of syncing everything, we can:
++ *
++ *   less := !((s64)(s_a - s_b) <= 0)
++ *
++ *   (v_a - S_a) - (v_b - S_b) == v_a - v_b - S_a + S_b
++ *                             == v_a - (v_b - S_a + S_b)
++ *
++ * IOW, we can recast the (lag) comparison to a one-sided difference.
++ * So if then, instead of syncing the whole queue, sync the idle queue
++ * against the active queue with S_a + S_b at the point where we sync.
++ *
++ * (XXX consider the implication of living in a cyclic group: N / 2^n N)
++ *
++ * This gives us means of syncing single queues against the active queue,
++ * and for already idle queues to preserve their build-up lag.
++ *
++ * Of course, then we get the situation where there's 2 active and one
++ * going idle, who do we pick to sync against? Theory would have us sync
++ * against the combined S, but as we've already demonstrated, there is no
++ * such thing in infeasible weight scenarios.
++ *
++ * One thing I've considered; and this is where that core_active rudiment
++ * came from, is having active queues sync up between themselves after
++ * every tick. This limits the observed divergence due to the work
++ * conservancy.
++ *
++ * On top of that, we can improve upon things by moving away from our
++ * horrible (10) hack and moving to (9) and employing (13) here.
++ */
++
++/*
+  * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed.
+  */
+ static void se_fi_update(const struct sched_entity *se, unsigned int fi_seq,
diff --git a/queue-6.18/sched-eevdf-fix-min_vruntime-vs-avg_vruntime.patch b/queue-6.18/sched-eevdf-fix-min_vruntime-vs-avg_vruntime.patch
new file mode 100644 (file)
index 0000000..94aad23
--- /dev/null
@@ -0,0 +1,387 @@
+From stable+bounces-204126-greg=kroah.com@vger.kernel.org Mon Dec 29 20:35:47 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Dec 2025 14:35:38 -0500
+Subject: sched/eevdf: Fix min_vruntime vs avg_vruntime
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Zicheng Qu <quzicheng@huawei.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251229193539.1640748-2-sashal@kernel.org>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 79f3f9bedd149ea438aaeb0fb6a083637affe205 ]
+
+Basically, from the constraint that the sum of lag is zero, you can
+infer that the 0-lag point is the weighted average of the individual
+vruntime, which is what we're trying to compute:
+
+        \Sum w_i * v_i
+  avg = --------------
+           \Sum w_i
+
+Now, since vruntime takes the whole u64 (worse, it wraps), this
+multiplication term in the numerator is not something we can compute;
+instead we do the min_vruntime (v0 henceforth) thing like:
+
+  v_i = (v_i - v0) + v0
+
+This does two things:
+ - it keeps the key: (v_i - v0) 'small';
+ - it creates a relative 0-point in the modular space.
+
+If you do that subtitution and work it all out, you end up with:
+
+        \Sum w_i * (v_i - v0)
+  avg = --------------------- + v0
+              \Sum w_i
+
+Since you cannot very well track a ratio like that (and not suffer
+terrible numerical problems) we simpy track the numerator and
+denominator individually and only perform the division when strictly
+needed.
+
+Notably, the numerator lives in cfs_rq->avg_vruntime and the denominator
+lives in cfs_rq->avg_load.
+
+The one extra 'funny' is that these numbers track the entities in the
+tree, and current is typically outside of the tree, so avg_vruntime()
+adds current when needed before doing the division.
+
+(vruntime_eligible() elides the division by cross-wise multiplication)
+
+Anyway, as mentioned above, we currently use the CFS era min_vruntime
+for this purpose. However, this thing can only move forward, while the
+above avg can in fact move backward (when a non-eligible task leaves,
+the average becomes smaller), this can cause trouble when through
+happenstance (or construction) these values drift far enough apart to
+wreck the game.
+
+Replace cfs_rq::min_vruntime with cfs_rq::zero_vruntime which is kept
+near/at avg_vruntime, following its motion.
+
+The down-side is that this requires computing the avg more often.
+
+Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
+Reported-by: Zicheng Qu <quzicheng@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://patch.msgid.link/20251106111741.GC4068168@noisy.programming.kicks-ass.net
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/debug.c |    8 +--
+ kernel/sched/fair.c  |  114 +++++++++++----------------------------------------
+ kernel/sched/sched.h |    4 -
+ 3 files changed, 31 insertions(+), 95 deletions(-)
+
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -796,7 +796,7 @@ static void print_rq(struct seq_file *m,
+ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+ {
+-      s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, left_deadline = -1, spread;
++      s64 left_vruntime = -1, zero_vruntime, right_vruntime = -1, left_deadline = -1, spread;
+       struct sched_entity *last, *first, *root;
+       struct rq *rq = cpu_rq(cpu);
+       unsigned long flags;
+@@ -819,15 +819,15 @@ void print_cfs_rq(struct seq_file *m, in
+       last = __pick_last_entity(cfs_rq);
+       if (last)
+               right_vruntime = last->vruntime;
+-      min_vruntime = cfs_rq->min_vruntime;
++      zero_vruntime = cfs_rq->zero_vruntime;
+       raw_spin_rq_unlock_irqrestore(rq, flags);
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "left_deadline",
+                       SPLIT_NS(left_deadline));
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "left_vruntime",
+                       SPLIT_NS(left_vruntime));
+-      SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
+-                      SPLIT_NS(min_vruntime));
++      SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "zero_vruntime",
++                      SPLIT_NS(zero_vruntime));
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "avg_vruntime",
+                       SPLIT_NS(avg_vruntime(cfs_rq)));
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "right_vruntime",
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -554,7 +554,7 @@ static inline bool entity_before(const s
+ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
+-      return (s64)(se->vruntime - cfs_rq->min_vruntime);
++      return (s64)(se->vruntime - cfs_rq->zero_vruntime);
+ }
+ #define __node_2_se(node) \
+@@ -606,13 +606,13 @@ static inline s64 entity_key(struct cfs_
+  *
+  * Which we track using:
+  *
+- *                    v0 := cfs_rq->min_vruntime
++ *                    v0 := cfs_rq->zero_vruntime
+  * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
+  *              \Sum w_i := cfs_rq->avg_load
+  *
+- * Since min_vruntime is a monotonic increasing variable that closely tracks
+- * the per-task service, these deltas: (v_i - v), will be in the order of the
+- * maximal (virtual) lag induced in the system due to quantisation.
++ * Since zero_vruntime closely tracks the per-task service, these
++ * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
++ * induced in the system due to quantisation.
+  *
+  * Also, we use scale_load_down() to reduce the size.
+  *
+@@ -671,7 +671,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
+               avg = div_s64(avg, load);
+       }
+-      return cfs_rq->min_vruntime + avg;
++      return cfs_rq->zero_vruntime + avg;
+ }
+ /*
+@@ -732,7 +732,7 @@ static int vruntime_eligible(struct cfs_
+               load += weight;
+       }
+-      return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
++      return avg >= (s64)(vruntime - cfs_rq->zero_vruntime) * load;
+ }
+ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -740,42 +740,14 @@ int entity_eligible(struct cfs_rq *cfs_r
+       return vruntime_eligible(cfs_rq, se->vruntime);
+ }
+-static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
++static void update_zero_vruntime(struct cfs_rq *cfs_rq)
+ {
+-      u64 min_vruntime = cfs_rq->min_vruntime;
+-      /*
+-       * open coded max_vruntime() to allow updating avg_vruntime
+-       */
+-      s64 delta = (s64)(vruntime - min_vruntime);
+-      if (delta > 0) {
+-              avg_vruntime_update(cfs_rq, delta);
+-              min_vruntime = vruntime;
+-      }
+-      return min_vruntime;
+-}
+-
+-static void update_min_vruntime(struct cfs_rq *cfs_rq)
+-{
+-      struct sched_entity *se = __pick_root_entity(cfs_rq);
+-      struct sched_entity *curr = cfs_rq->curr;
+-      u64 vruntime = cfs_rq->min_vruntime;
++      u64 vruntime = avg_vruntime(cfs_rq);
++      s64 delta = (s64)(vruntime - cfs_rq->zero_vruntime);
+-      if (curr) {
+-              if (curr->on_rq)
+-                      vruntime = curr->vruntime;
+-              else
+-                      curr = NULL;
+-      }
+-
+-      if (se) {
+-              if (!curr)
+-                      vruntime = se->min_vruntime;
+-              else
+-                      vruntime = min_vruntime(vruntime, se->min_vruntime);
+-      }
++      avg_vruntime_update(cfs_rq, delta);
+-      /* ensure we never gain time by being placed backwards. */
+-      cfs_rq->min_vruntime = __update_min_vruntime(cfs_rq, vruntime);
++      cfs_rq->zero_vruntime = vruntime;
+ }
+ static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq)
+@@ -848,6 +820,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntim
+ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ {
+       avg_vruntime_add(cfs_rq, se);
++      update_zero_vruntime(cfs_rq);
+       se->min_vruntime = se->vruntime;
+       se->min_slice = se->slice;
+       rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+@@ -859,6 +832,7 @@ static void __dequeue_entity(struct cfs_
+       rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+                                 &min_vruntime_cb);
+       avg_vruntime_sub(cfs_rq, se);
++      update_zero_vruntime(cfs_rq);
+ }
+ struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
+@@ -1226,7 +1200,6 @@ static void update_curr(struct cfs_rq *c
+       curr->vruntime += calc_delta_fair(delta_exec, curr);
+       resched = update_deadline(cfs_rq, curr);
+-      update_min_vruntime(cfs_rq);
+       if (entity_is_task(curr)) {
+               /*
+@@ -3808,15 +3781,6 @@ static void reweight_entity(struct cfs_r
+               if (!curr)
+                       __enqueue_entity(cfs_rq, se);
+               cfs_rq->nr_queued++;
+-
+-              /*
+-               * The entity's vruntime has been adjusted, so let's check
+-               * whether the rq-wide min_vruntime needs updated too. Since
+-               * the calculations above require stable min_vruntime rather
+-               * than up-to-date one, we do the update at the end of the
+-               * reweight process.
+-               */
+-              update_min_vruntime(cfs_rq);
+       }
+ }
+@@ -5432,15 +5396,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
+       update_cfs_group(se);
+-      /*
+-       * Now advance min_vruntime if @se was the entity holding it back,
+-       * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
+-       * put back on, and if we advance min_vruntime, we'll be placed back
+-       * further than we started -- i.e. we'll be penalized.
+-       */
+-      if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
+-              update_min_vruntime(cfs_rq);
+-
+       if (flags & DEQUEUE_DELAYED)
+               finish_delayed_dequeue_entity(se);
+@@ -9028,7 +8983,6 @@ static void yield_task_fair(struct rq *r
+       if (entity_eligible(cfs_rq, se)) {
+               se->vruntime = se->deadline;
+               se->deadline += calc_delta_fair(se->slice, se);
+-              update_min_vruntime(cfs_rq);
+       }
+ }
+@@ -13077,23 +13031,6 @@ static inline void task_tick_core(struct
+  * Which shows that S and s_i transform alike (which makes perfect sense
+  * given that S is basically the (weighted) average of s_i).
+  *
+- * Then:
+- *
+- *   x -> s_min := min{s_i}                                   (8)
+- *
+- * to obtain:
+- *
+- *               \Sum_i w_i (s_i - s_min)
+- *   S = s_min + ------------------------                     (9)
+- *                     \Sum_i w_i
+- *
+- * Which already looks familiar, and is the basis for our current
+- * approximation:
+- *
+- *   S ~= s_min                                              (10)
+- *
+- * Now, obviously, (10) is absolute crap :-), but it sorta works.
+- *
+  * So the thing to remember is that the above is strictly UP. It is
+  * possible to generalize to multiple runqueues -- however it gets really
+  * yuck when you have to add affinity support, as illustrated by our very
+@@ -13115,23 +13052,23 @@ static inline void task_tick_core(struct
+  * Let, for our runqueue 'k':
+  *
+  *   T_k = \Sum_i w_i s_i
+- *   W_k = \Sum_i w_i      ; for all i of k                  (11)
++ *   W_k = \Sum_i w_i      ; for all i of k                  (8)
+  *
+  * Then we can write (6) like:
+  *
+  *         T_k
+- *   S_k = ---                                               (12)
++ *   S_k = ---                                               (9)
+  *         W_k
+  *
+  * From which immediately follows that:
+  *
+  *           T_k + T_l
+- *   S_k+l = ---------                                       (13)
++ *   S_k+l = ---------                                       (10)
+  *           W_k + W_l
+  *
+  * On which we can define a combined lag:
+  *
+- *   lag_k+l(i) := S_k+l - s_i                               (14)
++ *   lag_k+l(i) := S_k+l - s_i                               (11)
+  *
+  * And that gives us the tools to compare tasks across a combined runqueue.
+  *
+@@ -13142,7 +13079,7 @@ static inline void task_tick_core(struct
+  *     using (7); this only requires storing single 'time'-stamps.
+  *
+  *  b) when comparing tasks between 2 runqueues of which one is forced-idle,
+- *     compare the combined lag, per (14).
++ *     compare the combined lag, per (11).
+  *
+  * Now, of course cgroups (I so hate them) make this more interesting in
+  * that a) seems to suggest we need to iterate all cgroup on a CPU at such
+@@ -13190,12 +13127,11 @@ static inline void task_tick_core(struct
+  * every tick. This limits the observed divergence due to the work
+  * conservancy.
+  *
+- * On top of that, we can improve upon things by moving away from our
+- * horrible (10) hack and moving to (9) and employing (13) here.
++ * On top of that, we can improve upon things by employing (10) here.
+  */
+ /*
+- * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed.
++ * se_fi_update - Update the cfs_rq->zero_vruntime_fi in a CFS hierarchy if needed.
+  */
+ static void se_fi_update(const struct sched_entity *se, unsigned int fi_seq,
+                        bool forceidle)
+@@ -13209,7 +13145,7 @@ static void se_fi_update(const struct sc
+                       cfs_rq->forceidle_seq = fi_seq;
+               }
+-              cfs_rq->min_vruntime_fi = cfs_rq->min_vruntime;
++              cfs_rq->zero_vruntime_fi = cfs_rq->zero_vruntime;
+       }
+ }
+@@ -13262,11 +13198,11 @@ bool cfs_prio_less(const struct task_str
+       /*
+        * Find delta after normalizing se's vruntime with its cfs_rq's
+-       * min_vruntime_fi, which would have been updated in prior calls
++       * zero_vruntime_fi, which would have been updated in prior calls
+        * to se_fi_update().
+        */
+       delta = (s64)(sea->vruntime - seb->vruntime) +
+-              (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
++              (s64)(cfs_rqb->zero_vruntime_fi - cfs_rqa->zero_vruntime_fi);
+       return delta > 0;
+ }
+@@ -13502,7 +13438,7 @@ static void set_next_task_fair(struct rq
+ void init_cfs_rq(struct cfs_rq *cfs_rq)
+ {
+       cfs_rq->tasks_timeline = RB_ROOT_CACHED;
+-      cfs_rq->min_vruntime = (u64)(-(1LL << 20));
++      cfs_rq->zero_vruntime = (u64)(-(1LL << 20));
+       raw_spin_lock_init(&cfs_rq->removed.lock);
+ }
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -682,10 +682,10 @@ struct cfs_rq {
+       s64                     avg_vruntime;
+       u64                     avg_load;
+-      u64                     min_vruntime;
++      u64                     zero_vruntime;
+ #ifdef CONFIG_SCHED_CORE
+       unsigned int            forceidle_seq;
+-      u64                     min_vruntime_fi;
++      u64                     zero_vruntime_fi;
+ #endif
+       struct rb_root_cached   tasks_timeline;
diff --git a/queue-6.18/sched-proxy-yield-the-donor-task.patch b/queue-6.18/sched-proxy-yield-the-donor-task.patch
new file mode 100644 (file)
index 0000000..cc3328b
--- /dev/null
@@ -0,0 +1,111 @@
+From 127b90315ca07ccad2618db7ba950a63e3b32d22 Mon Sep 17 00:00:00 2001
+From: Fernand Sieber <sieberf@amazon.com>
+Date: Thu, 6 Nov 2025 12:40:10 +0200
+Subject: sched/proxy: Yield the donor task
+
+From: Fernand Sieber <sieberf@amazon.com>
+
+commit 127b90315ca07ccad2618db7ba950a63e3b32d22 upstream.
+
+When executing a task in proxy context, handle yields as if they were
+requested by the donor task. This matches the traditional PI semantics
+of yield() as well.
+
+This avoids scenario like proxy task yielding, pick next task selecting the
+same previous blocked donor, running the proxy task again, etc.
+
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Closes: https://lore.kernel.org/oe-lkp/202510211205.1e0f5223-lkp@intel.com
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Fernand Sieber <sieberf@amazon.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://patch.msgid.link/20251106104022.195157-1-sieberf@amazon.com
+Cc: Holger Hoffstätte <holger@applied-asynchrony.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/deadline.c |    2 +-
+ kernel/sched/ext.c      |    4 ++--
+ kernel/sched/fair.c     |    2 +-
+ kernel/sched/rt.c       |    2 +-
+ kernel/sched/syscalls.c |    5 +++--
+ 5 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -2143,7 +2143,7 @@ static void yield_task_dl(struct rq *rq)
+        * it and the bandwidth timer will wake it up and will give it
+        * new scheduling parameters (thanks to dl_yielded=1).
+        */
+-      rq->curr->dl.dl_yielded = 1;
++      rq->donor->dl.dl_yielded = 1;
+       update_rq_clock(rq);
+       update_curr_dl(rq);
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -1493,7 +1493,7 @@ static bool dequeue_task_scx(struct rq *
+ static void yield_task_scx(struct rq *rq)
+ {
+       struct scx_sched *sch = scx_root;
+-      struct task_struct *p = rq->curr;
++      struct task_struct *p = rq->donor;
+       if (SCX_HAS_OP(sch, yield))
+               SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq, p, NULL);
+@@ -1504,7 +1504,7 @@ static void yield_task_scx(struct rq *rq
+ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
+ {
+       struct scx_sched *sch = scx_root;
+-      struct task_struct *from = rq->curr;
++      struct task_struct *from = rq->donor;
+       if (SCX_HAS_OP(sch, yield))
+               return SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq,
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8993,7 +8993,7 @@ static void put_prev_task_fair(struct rq
+  */
+ static void yield_task_fair(struct rq *rq)
+ {
+-      struct task_struct *curr = rq->curr;
++      struct task_struct *curr = rq->donor;
+       struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+       struct sched_entity *se = &curr->se;
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -1490,7 +1490,7 @@ static void requeue_task_rt(struct rq *r
+ static void yield_task_rt(struct rq *rq)
+ {
+-      requeue_task_rt(rq, rq->curr, 0);
++      requeue_task_rt(rq, rq->donor, 0);
+ }
+ static int find_lowest_rq(struct task_struct *task);
+--- a/kernel/sched/syscalls.c
++++ b/kernel/sched/syscalls.c
+@@ -1351,7 +1351,7 @@ static void do_sched_yield(void)
+       rq = this_rq_lock_irq(&rf);
+       schedstat_inc(rq->yld_count);
+-      current->sched_class->yield_task(rq);
++      rq->donor->sched_class->yield_task(rq);
+       preempt_disable();
+       rq_unlock_irq(rq, &rf);
+@@ -1420,12 +1420,13 @@ EXPORT_SYMBOL(yield);
+  */
+ int __sched yield_to(struct task_struct *p, bool preempt)
+ {
+-      struct task_struct *curr = current;
++      struct task_struct *curr;
+       struct rq *rq, *p_rq;
+       int yielded = 0;
+       scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
+               rq = this_rq();
++              curr = rq->donor;
+ again:
+               p_rq = task_rq(p);
diff --git a/queue-6.18/sched_ext-fix-incorrect-sched_class-settings-for-per-cpu-migration-tasks.patch b/queue-6.18/sched_ext-fix-incorrect-sched_class-settings-for-per-cpu-migration-tasks.patch
new file mode 100644 (file)
index 0000000..64efa45
--- /dev/null
@@ -0,0 +1,105 @@
+From stable+bounces-204127-greg=kroah.com@vger.kernel.org Mon Dec 29 20:36:45 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Dec 2025 14:36:40 -0500
+Subject: sched_ext: Fix incorrect sched_class settings for per-cpu migration tasks
+To: stable@vger.kernel.org
+Cc: Zqiang <qiang.zhang@linux.dev>, Andrea Righi <arighi@nvidia.com>, Tejun Heo <tj@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251229193640.1641653-1-sashal@kernel.org>
+
+From: Zqiang <qiang.zhang@linux.dev>
+
+[ Upstream commit 1dd6c84f1c544e552848a8968599220bd464e338 ]
+
+When loading the ebpf scheduler, the tasks in the scx_tasks list will
+be traversed and invoke __setscheduler_class() to get new sched_class.
+however, this would also incorrectly set the per-cpu migration
+task's->sched_class to rt_sched_class, even after unload, the per-cpu
+migration task's->sched_class remains sched_rt_class.
+
+The log for this issue is as follows:
+
+./scx_rustland --stats 1
+[  199.245639][  T630] sched_ext: "rustland" does not implement cgroup cpu.weight
+[  199.269213][  T630] sched_ext: BPF scheduler "rustland" enabled
+04:25:09 [INFO] RustLand scheduler attached
+
+bpftrace -e 'iter:task /strcontains(ctx->task->comm, "migration")/
+{ printf("%s:%d->%pS\n", ctx->task->comm, ctx->task->pid, ctx->task->sched_class); }'
+Attaching 1 probe...
+migration/0:24->rt_sched_class+0x0/0xe0
+migration/1:27->rt_sched_class+0x0/0xe0
+migration/2:33->rt_sched_class+0x0/0xe0
+migration/3:39->rt_sched_class+0x0/0xe0
+migration/4:45->rt_sched_class+0x0/0xe0
+migration/5:52->rt_sched_class+0x0/0xe0
+migration/6:58->rt_sched_class+0x0/0xe0
+migration/7:64->rt_sched_class+0x0/0xe0
+
+sched_ext: BPF scheduler "rustland" disabled (unregistered from user space)
+EXIT: unregistered from user space
+04:25:21 [INFO] Unregister RustLand scheduler
+
+bpftrace -e 'iter:task /strcontains(ctx->task->comm, "migration")/
+{ printf("%s:%d->%pS\n", ctx->task->comm, ctx->task->pid, ctx->task->sched_class); }'
+Attaching 1 probe...
+migration/0:24->rt_sched_class+0x0/0xe0
+migration/1:27->rt_sched_class+0x0/0xe0
+migration/2:33->rt_sched_class+0x0/0xe0
+migration/3:39->rt_sched_class+0x0/0xe0
+migration/4:45->rt_sched_class+0x0/0xe0
+migration/5:52->rt_sched_class+0x0/0xe0
+migration/6:58->rt_sched_class+0x0/0xe0
+migration/7:64->rt_sched_class+0x0/0xe0
+
+This commit therefore generate a new scx_setscheduler_class() and
+add check for stop_sched_class to replace __setscheduler_class().
+
+Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class")
+Cc: stable@vger.kernel.org # v6.12+
+Signed-off-by: Zqiang <qiang.zhang@linux.dev>
+Reviewed-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/ext.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -210,6 +210,14 @@ static struct scx_dispatch_q *find_user_
+       return rhashtable_lookup_fast(&sch->dsq_hash, &dsq_id, dsq_hash_params);
+ }
++static const struct sched_class *scx_setscheduler_class(struct task_struct *p)
++{
++      if (p->sched_class == &stop_sched_class)
++              return &stop_sched_class;
++
++      return __setscheduler_class(p->policy, p->prio);
++}
++
+ /*
+  * scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
+  * ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
+@@ -3994,8 +4002,7 @@ static void scx_disable_workfn(struct kt
+       scx_task_iter_start(&sti);
+       while ((p = scx_task_iter_next_locked(&sti))) {
+               const struct sched_class *old_class = p->sched_class;
+-              const struct sched_class *new_class =
+-                      __setscheduler_class(p->policy, p->prio);
++              const struct sched_class *new_class = scx_setscheduler_class(p);
+               struct sched_enq_and_set_ctx ctx;
+               if (old_class != new_class && p->se.sched_delayed)
+@@ -4779,8 +4786,7 @@ static int scx_enable(struct sched_ext_o
+       scx_task_iter_start(&sti);
+       while ((p = scx_task_iter_next_locked(&sti))) {
+               const struct sched_class *old_class = p->sched_class;
+-              const struct sched_class *new_class =
+-                      __setscheduler_class(p->policy, p->prio);
++              const struct sched_class *new_class = scx_setscheduler_class(p);
+               struct sched_enq_and_set_ctx ctx;
+               if (!tryget_task_struct(p))
diff --git a/queue-6.18/series b/queue-6.18/series
new file mode 100644 (file)
index 0000000..60388b5
--- /dev/null
@@ -0,0 +1,10 @@
+sched-proxy-yield-the-donor-task.patch
+drm-nova-depend-on-config_64bit.patch
+x86-microcode-amd-select-which-microcode-patch-to-load.patch
+sched-core-add-comment-explaining-force-idle-vruntime-snapshots.patch
+sched-eevdf-fix-min_vruntime-vs-avg_vruntime.patch
+sched_ext-fix-incorrect-sched_class-settings-for-per-cpu-migration-tasks.patch
+mm-huge_memory-merge-uniform_split_supported-and-non_uniform_split_supported.patch
+kvm-s390-fix-gmap_helper_zap_one_page-again.patch
+drm-edid-add-drm_edid_ident_init-to-initialize-struct-drm_edid_ident.patch
+drm-displayid-add-quirk-to-ignore-displayid-checksum-errors.patch
diff --git a/queue-6.18/x86-microcode-amd-select-which-microcode-patch-to-load.patch b/queue-6.18/x86-microcode-amd-select-which-microcode-patch-to-load.patch
new file mode 100644 (file)
index 0000000..8b6331c
--- /dev/null
@@ -0,0 +1,185 @@
+From 8d171045069c804e5ffaa18be590c42c6af0cf3f Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Thu, 25 Sep 2025 13:46:00 +0200
+Subject: x86/microcode/AMD: Select which microcode patch to load
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit 8d171045069c804e5ffaa18be590c42c6af0cf3f upstream.
+
+All microcode patches up to the proper BIOS Entrysign fix are loaded
+only after the sha256 signature carried in the driver has been verified.
+
+Microcode patches after the Entrysign fix has been applied, do not need
+that signature verification anymore.
+
+In order to not abandon machines which haven't received the BIOS update
+yet, add the capability to select which microcode patch to load.
+
+The corresponding microcode container supplied through firmware-linux
+has been modified to carry two patches per CPU type
+(family/model/stepping) so that the proper one gets selected.
+
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Tested-by: Waiman Long <longman@redhat.com>
+Link: https://patch.msgid.link/20251027133818.4363-1-bp@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+
+---
+ arch/x86/kernel/cpu/microcode/amd.c |  113 ++++++++++++++++++++++--------------
+ 1 file changed, 72 insertions(+), 41 deletions(-)
+
+--- a/arch/x86/kernel/cpu/microcode/amd.c
++++ b/arch/x86/kernel/cpu/microcode/amd.c
+@@ -186,50 +186,61 @@ static u32 cpuid_to_ucode_rev(unsigned i
+       return p.ucode_rev;
+ }
++static u32 get_cutoff_revision(u32 rev)
++{
++      switch (rev >> 8) {
++      case 0x80012: return 0x8001277; break;
++      case 0x80082: return 0x800820f; break;
++      case 0x83010: return 0x830107c; break;
++      case 0x86001: return 0x860010e; break;
++      case 0x86081: return 0x8608108; break;
++      case 0x87010: return 0x8701034; break;
++      case 0x8a000: return 0x8a0000a; break;
++      case 0xa0010: return 0xa00107a; break;
++      case 0xa0011: return 0xa0011da; break;
++      case 0xa0012: return 0xa001243; break;
++      case 0xa0082: return 0xa00820e; break;
++      case 0xa1011: return 0xa101153; break;
++      case 0xa1012: return 0xa10124e; break;
++      case 0xa1081: return 0xa108109; break;
++      case 0xa2010: return 0xa20102f; break;
++      case 0xa2012: return 0xa201212; break;
++      case 0xa4041: return 0xa404109; break;
++      case 0xa5000: return 0xa500013; break;
++      case 0xa6012: return 0xa60120a; break;
++      case 0xa7041: return 0xa704109; break;
++      case 0xa7052: return 0xa705208; break;
++      case 0xa7080: return 0xa708009; break;
++      case 0xa70c0: return 0xa70C009; break;
++      case 0xaa001: return 0xaa00116; break;
++      case 0xaa002: return 0xaa00218; break;
++      case 0xb0021: return 0xb002146; break;
++      case 0xb0081: return 0xb008111; break;
++      case 0xb1010: return 0xb101046; break;
++      case 0xb2040: return 0xb204031; break;
++      case 0xb4040: return 0xb404031; break;
++      case 0xb4041: return 0xb404101; break;
++      case 0xb6000: return 0xb600031; break;
++      case 0xb6080: return 0xb608031; break;
++      case 0xb7000: return 0xb700031; break;
++      default: break;
++
++      }
++      return 0;
++}
++
+ static bool need_sha_check(u32 cur_rev)
+ {
++      u32 cutoff;
++
+       if (!cur_rev) {
+               cur_rev = cpuid_to_ucode_rev(bsp_cpuid_1_eax);
+               pr_info_once("No current revision, generating the lowest one: 0x%x\n", cur_rev);
+       }
+-      switch (cur_rev >> 8) {
+-      case 0x80012: return cur_rev <= 0x8001277; break;
+-      case 0x80082: return cur_rev <= 0x800820f; break;
+-      case 0x83010: return cur_rev <= 0x830107c; break;
+-      case 0x86001: return cur_rev <= 0x860010e; break;
+-      case 0x86081: return cur_rev <= 0x8608108; break;
+-      case 0x87010: return cur_rev <= 0x8701034; break;
+-      case 0x8a000: return cur_rev <= 0x8a0000a; break;
+-      case 0xa0010: return cur_rev <= 0xa00107a; break;
+-      case 0xa0011: return cur_rev <= 0xa0011da; break;
+-      case 0xa0012: return cur_rev <= 0xa001243; break;
+-      case 0xa0082: return cur_rev <= 0xa00820e; break;
+-      case 0xa1011: return cur_rev <= 0xa101153; break;
+-      case 0xa1012: return cur_rev <= 0xa10124e; break;
+-      case 0xa1081: return cur_rev <= 0xa108109; break;
+-      case 0xa2010: return cur_rev <= 0xa20102f; break;
+-      case 0xa2012: return cur_rev <= 0xa201212; break;
+-      case 0xa4041: return cur_rev <= 0xa404109; break;
+-      case 0xa5000: return cur_rev <= 0xa500013; break;
+-      case 0xa6012: return cur_rev <= 0xa60120a; break;
+-      case 0xa7041: return cur_rev <= 0xa704109; break;
+-      case 0xa7052: return cur_rev <= 0xa705208; break;
+-      case 0xa7080: return cur_rev <= 0xa708009; break;
+-      case 0xa70c0: return cur_rev <= 0xa70C009; break;
+-      case 0xaa001: return cur_rev <= 0xaa00116; break;
+-      case 0xaa002: return cur_rev <= 0xaa00218; break;
+-      case 0xb0021: return cur_rev <= 0xb002146; break;
+-      case 0xb0081: return cur_rev <= 0xb008111; break;
+-      case 0xb1010: return cur_rev <= 0xb101046; break;
+-      case 0xb2040: return cur_rev <= 0xb204031; break;
+-      case 0xb4040: return cur_rev <= 0xb404031; break;
+-      case 0xb4041: return cur_rev <= 0xb404101; break;
+-      case 0xb6000: return cur_rev <= 0xb600031; break;
+-      case 0xb6080: return cur_rev <= 0xb608031; break;
+-      case 0xb7000: return cur_rev <= 0xb700031; break;
+-      default: break;
+-      }
++      cutoff = get_cutoff_revision(cur_rev);
++      if (cutoff)
++              return cur_rev <= cutoff;
+       pr_info("You should not be seeing this. Please send the following couple of lines to x86-<at>-kernel.org\n");
+       pr_info("CPUID(1).EAX: 0x%x, current revision: 0x%x\n", bsp_cpuid_1_eax, cur_rev);
+@@ -494,6 +505,7 @@ static int verify_patch(const u8 *buf, s
+ {
+       u8 family = x86_family(bsp_cpuid_1_eax);
+       struct microcode_header_amd *mc_hdr;
++      u32 cur_rev, cutoff, patch_rev;
+       u32 sh_psize;
+       u16 proc_id;
+       u8 patch_fam;
+@@ -533,11 +545,32 @@ static int verify_patch(const u8 *buf, s
+       proc_id = mc_hdr->processor_rev_id;
+       patch_fam = 0xf + (proc_id >> 12);
+-      ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam);
+-
+       if (patch_fam != family)
+               return 1;
++      cur_rev = get_patch_level();
++
++      /* No cutoff revision means old/unaffected by signing algorithm weakness => matches */
++      cutoff = get_cutoff_revision(cur_rev);
++      if (!cutoff)
++              goto ok;
++
++      patch_rev = mc_hdr->patch_id;
++
++      ucode_dbg("cur_rev: 0x%x, cutoff: 0x%x, patch_rev: 0x%x\n",
++                cur_rev, cutoff, patch_rev);
++
++      if (cur_rev <= cutoff && patch_rev <= cutoff)
++              goto ok;
++
++      if (cur_rev > cutoff && patch_rev > cutoff)
++              goto ok;
++
++      return 1;
++
++ok:
++      ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam);
++
+       return 0;
+ }
+@@ -606,8 +639,6 @@ static size_t parse_container(u8 *ucode,
+               mc = (struct microcode_amd *)(buf + SECTION_HDR_SIZE);
+-              ucode_dbg("patch_id: 0x%x\n", mc->hdr.patch_id);
+-
+               if (mc_patch_matches(mc, eq_id)) {
+                       desc->psize = patch_size;
+                       desc->mc = mc;