page_alloc: allow migration of smaller hugepages during contig_alloc

author Gregory Price <gourry@gourry.net>

Sun, 21 Dec 2025 12:46:56 +0000 (07:46 -0500)

committer Andrew Morton <akpm@linux-foundation.org>

Wed, 21 Jan 2026 03:24:50 +0000 (19:24 -0800)
author Gregory Price <gourry@gourry.net>
Sun, 21 Dec 2025 12:46:56 +0000 (07:46 -0500)
committer Andrew Morton <akpm@linux-foundation.org>
Wed, 21 Jan 2026 03:24:50 +0000 (19:24 -0800)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 3333524e879c49d81838c8a3ca1735dbbb058b38..bc3ee3102b19a6e4719d844586d1023c84552c11 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7136,7 +7136,8 @@ static int __alloc_contig_pages(unsigned long start_pfn,
  }
  
  static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
-                                  unsigned long nr_pages)
+                                  unsigned long nr_pages, bool skip_hugetlb,
+                                  bool *skipped_hugetlb)
  {
         unsigned long i, end_pfn = start_pfn + nr_pages;
         struct page *page;
@@ -7152,8 +7153,42 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
                 if (PageReserved(page))
                         return false;
  
-               if (PageHuge(page))
-                       return false;
+               /*
+                * Only consider ranges containing hugepages if those pages are
+                * smaller than the requested contiguous region.  e.g.:
+                *     Move 2MB pages to free up a 1GB range.
+                *     Don't move 1GB pages to free up a 2MB range.
+                *
+                * This makes contiguous allocation more reliable if multiple
+                * hugepage sizes are used without causing needless movement.
+                */
+               if (PageHuge(page)) {
+                       unsigned int order;
+
+                       if (!IS_ENABLED(CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION))
+                               return false;
+
+                       if (skip_hugetlb) {
+                               *skipped_hugetlb = true;
+                               return false;
+                       }
+
+                       page = compound_head(page);
+                       order = compound_order(page);
+                       if ((order >= MAX_FOLIO_ORDER) ||
+                           (nr_pages <= (1 << order)))
+                               return false;
+
+                       /*
+                        * Reaching this point means we've encounted a huge page
+                        * smaller than nr_pages, skip all pfn's for that page.
+                        *
+                        * We can't get here from a tail-PageHuge, as it implies
+                        * we started a scan in the middle of a hugepage larger
+                        * than nr_pages - which the prior check filters for.
+                        */
+                       i += (1 << order) - 1;
+               }
         }
         return true;
  }
@@ -7196,7 +7231,10 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
         struct zonelist *zonelist;
         struct zone *zone;
         struct zoneref *z;
+       bool skip_hugetlb = true;
+       bool skipped_hugetlb = false;
  
+retry:
         zonelist = node_zonelist(nid, gfp_mask);
         for_each_zone_zonelist_nodemask(zone, z, zonelist,
                                         gfp_zone(gfp_mask), nodemask) {
@@ -7204,7 +7242,9 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
  
                 pfn = ALIGN(zone->zone_start_pfn, nr_pages);
                 while (zone_spans_last_pfn(zone, pfn, nr_pages)) {
-                       if (pfn_range_valid_contig(zone, pfn, nr_pages)) {
+                       if (pfn_range_valid_contig(zone, pfn, nr_pages,
+                                                  skip_hugetlb,
+                                                  &skipped_hugetlb)) {
                                 /*
                                  * We release the zone lock here because
                                  * alloc_contig_range() will also lock the zone
@@ -7223,6 +7263,17 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
                 }
                 spin_unlock_irqrestore(&zone->lock, flags);
         }
+       /*
+        * If we failed, retry the search, but treat regions with HugeTLB pages
+        * as valid targets.  This retains fast-allocations on first pass
+        * without trying to migrate HugeTLB pages (which may fail). On the
+        * second pass, we will try moving HugeTLB pages when those pages are
+        * smaller than the requested contiguous region size.
+        */
+       if (skip_hugetlb && skipped_hugetlb) {
+               skip_hugetlb = false;
+               goto retry;
+       }
         return NULL;
  }
  #endif /* CONFIG_CONTIG_ALLOC */
author	Gregory Price <gourry@gourry.net>
	Sun, 21 Dec 2025 12:46:56 +0000 (07:46 -0500)
committer	Andrew Morton <akpm@linux-foundation.org>
	Wed, 21 Jan 2026 03:24:50 +0000 (19:24 -0800)