mm/memory-failure: improve large block size folio handling

author Zi Yan <ziy@nvidia.com>

Fri, 31 Oct 2025 16:20:00 +0000 (12:20 -0400)

committer Andrew Morton <akpm@linux-foundation.org>

Mon, 24 Nov 2025 23:08:49 +0000 (15:08 -0800)
author Zi Yan <ziy@nvidia.com>
Fri, 31 Oct 2025 16:20:00 +0000 (12:20 -0400)
committer Andrew Morton <akpm@linux-foundation.org>
Mon, 24 Nov 2025 23:08:49 +0000 (15:08 -0800)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c

index 77391b6f9f7634a4042f2a278395237971715675..1f7fb9bf287a8e3f3dd17b7ce9310a232a487b38 100644 (file)
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1659,12 +1659,13 @@ static int identify_page_state(unsigned long pfn, struct page *p,
   * there is still more to do, hence the page refcount we took earlier
   * is still needed.
   */
-static int try_to_split_thp_page(struct page *page, bool release)
+static int try_to_split_thp_page(struct page *page, unsigned int new_order,
+               bool release)
  {
         int ret;
  
         lock_page(page);
-       ret = split_huge_page(page);
+       ret = split_huge_page_to_order(page, new_order);
         unlock_page(page);
  
         if (ret && release)
@@ -2420,6 +2421,9 @@ try_again:
         folio_unlock(folio);
  
         if (folio_test_large(folio)) {
+               const int new_order = min_order_for_split(folio);
+               int err;
+
                 /*
                  * The flag must be set after the refcount is bumped
                  * otherwise it may race with THP split.
@@ -2434,7 +2438,16 @@ try_again:
                  * page is a valid handlable page.
                  */
                 folio_set_has_hwpoisoned(folio);
-               if (try_to_split_thp_page(p, false) < 0) {
+               err = try_to_split_thp_page(p, new_order, /* release= */ false);
+               /*
+                * If splitting a folio to order-0 fails, kill the process.
+                * Split the folio regardless to minimize unusable pages.
+                * Because the memory failure code cannot handle large
+                * folios, this split is always treated as if it failed.
+                */
+               if (err || new_order) {
+                       /* get folio again in case the original one is split */
+                       folio = page_folio(p);
                         res = -EHWPOISON;
                         kill_procs_now(p, pfn, flags, folio);
                         put_page(p);
@@ -2761,7 +2774,17 @@ static int soft_offline_in_use_page(struct page *page)
         };
  
         if (!huge && folio_test_large(folio)) {
-               if (try_to_split_thp_page(page, true)) {
+               const int new_order = min_order_for_split(folio);
+
+               /*
+                * If new_order (target split order) is not 0, do not split the
+                * folio at all to retain the still accessible large folio.
+                * NOTE: if minimizing the number of soft offline pages is
+                * preferred, split it to non-zero new_order like it is done in
+                * memory_failure().
+                */
+               if (new_order || try_to_split_thp_page(page, /* new_order= */ 0,
+                                                      /* release= */ true)) {
                         pr_info("%#lx: thp split failed\n", pfn);
                         return -EBUSY;
                 }
author	Zi Yan <ziy@nvidia.com>
	Fri, 31 Oct 2025 16:20:00 +0000 (12:20 -0400)
committer	Andrew Morton <akpm@linux-foundation.org>
	Mon, 24 Nov 2025 23:08:49 +0000 (15:08 -0800)