]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm/hugetlb: add pre-HVO framework
authorFrank van der Linden <fvdl@google.com>
Fri, 28 Feb 2025 18:29:18 +0000 (18:29 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 17 Mar 2025 05:06:29 +0000 (22:06 -0700)
Define flags for pre-HVOed bootmem hugetlb pages, and act on them.

The most important flag is the HVO flag, signalling that a bootmem
allocated gigantic page has already been HVO-ed.  If this flag is seen by
the hugetlb bootmem gather code, the page is marked as HVO optimized.  The
HVO code will then not try to optimize it again.  Instead, it will just
map the tail page mirror pages read-only, completing the HVO steps.

No functional change, as nothing sets the flags yet.

Link: https://lkml.kernel.org/r/20250228182928.2645936-18-fvdl@google.com
Signed-off-by: Frank van der Linden <fvdl@google.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
arch/powerpc/mm/hugetlbpage.c
include/linux/hugetlb.h
mm/hugetlb.c
mm/hugetlb_vmemmap.c
mm/hugetlb_vmemmap.h

index 6b043180220a7c232559fbc4045b85fc232c206a..d3c1b749dcfc885c1bb58114dd1df34dbef2009f 100644 (file)
@@ -113,6 +113,7 @@ static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
        gpage_freearray[nr_gpages] = 0;
        list_add(&m->list, &huge_boot_pages[0]);
        m->hstate = hstate;
+       m->flags = 0;
        return 1;
 }
 
index f0ab4ca4ecf2a0ca193d17ca528fddb3ec4ad74f..bbccc3e6b9ddae3af9cfd823d362b772541d000d 100644 (file)
@@ -681,8 +681,12 @@ struct hstate {
 struct huge_bootmem_page {
        struct list_head list;
        struct hstate *hstate;
+       unsigned long flags;
 };
 
+#define HUGE_BOOTMEM_HVO               0x0001
+#define HUGE_BOOTMEM_ZONES_VALID       0x0002
+
 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
 int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
 void wait_for_freed_hugetlb_folios(void);
index f9287d87b8b7d175b8ba74ed2614accb99968ece..db0d35bc9b9b23bfe6c1179639ef60db8eb9daae 100644 (file)
@@ -3227,6 +3227,7 @@ found:
        INIT_LIST_HEAD(&m->list);
        list_add(&m->list, &huge_boot_pages[node]);
        m->hstate = h;
+       m->flags = 0;
        return 1;
 }
 
@@ -3294,7 +3295,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
        struct folio *folio, *tmp_f;
 
        /* Send list for bulk vmemmap optimization processing */
-       hugetlb_vmemmap_optimize_folios(h, folio_list);
+       hugetlb_vmemmap_optimize_bootmem_folios(h, folio_list);
 
        list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
                if (!folio_test_hugetlb_vmemmap_optimized(folio)) {
@@ -3323,6 +3324,13 @@ static bool __init hugetlb_bootmem_page_zones_valid(int nid,
        unsigned long start_pfn;
        bool valid;
 
+       if (m->flags & HUGE_BOOTMEM_ZONES_VALID) {
+               /*
+                * Already validated, skip check.
+                */
+               return true;
+       }
+
        start_pfn = virt_to_phys(m) >> PAGE_SHIFT;
 
        valid = !pfn_range_intersects_zones(nid, start_pfn,
@@ -3355,6 +3363,11 @@ static void __init hugetlb_bootmem_free_invalid_page(int nid, struct page *page,
        }
 }
 
+static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m)
+{
+       return (m->flags & HUGE_BOOTMEM_HVO);
+}
+
 /*
  * Put bootmem huge pages into the standard lists after mem_map is up.
  * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages.
@@ -3395,6 +3408,15 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid)
                hugetlb_folio_init_vmemmap(folio, h,
                                           HUGETLB_VMEMMAP_RESERVE_PAGES);
                init_new_hugetlb_folio(h, folio);
+
+               if (hugetlb_bootmem_page_prehvo(m))
+                       /*
+                        * If pre-HVO was done, just set the
+                        * flag, the HVO code will then skip
+                        * this folio.
+                        */
+                       folio_set_hugetlb_vmemmap_optimized(folio);
+
                list_add(&folio->lru, &folio_list);
 
                /*
index 5b484758f813e7b3b0dab7f66e29239a516e1d9f..be6b33ecbc8ece6eff5a87c837b5a89b57b5fa64 100644 (file)
@@ -649,14 +649,39 @@ static int hugetlb_vmemmap_split_folio(const struct hstate *h, struct folio *fol
        return vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse);
 }
 
-void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
+static void __hugetlb_vmemmap_optimize_folios(struct hstate *h,
+                                             struct list_head *folio_list,
+                                             bool boot)
 {
        struct folio *folio;
+       int nr_to_optimize;
        LIST_HEAD(vmemmap_pages);
        unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU;
 
+       nr_to_optimize = 0;
        list_for_each_entry(folio, folio_list, lru) {
-               int ret = hugetlb_vmemmap_split_folio(h, folio);
+               int ret;
+               unsigned long spfn, epfn;
+
+               if (boot && folio_test_hugetlb_vmemmap_optimized(folio)) {
+                       /*
+                        * Already optimized by pre-HVO, just map the
+                        * mirrored tail page structs RO.
+                        */
+                       spfn = (unsigned long)&folio->page;
+                       epfn = spfn + pages_per_huge_page(h);
+                       vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio),
+                                       HUGETLB_VMEMMAP_RESERVE_SIZE);
+                       register_page_bootmem_memmap(pfn_to_section_nr(spfn),
+                                       &folio->page,
+                                       HUGETLB_VMEMMAP_RESERVE_SIZE);
+                       static_branch_inc(&hugetlb_optimize_vmemmap_key);
+                       continue;
+               }
+
+               nr_to_optimize++;
+
+               ret = hugetlb_vmemmap_split_folio(h, folio);
 
                /*
                 * Spliting the PMD requires allocating a page, thus lets fail
@@ -668,6 +693,16 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
                        break;
        }
 
+       if (!nr_to_optimize)
+               /*
+                * All pre-HVO folios, nothing left to do. It's ok if
+                * there is a mix of pre-HVO and not yet HVO-ed folios
+                * here, as __hugetlb_vmemmap_optimize_folio() will
+                * skip any folios that already have the optimized flag
+                * set, see vmemmap_should_optimize_folio().
+                */
+               goto out;
+
        flush_tlb_all();
 
        list_for_each_entry(folio, folio_list, lru) {
@@ -693,10 +728,21 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
                }
        }
 
+out:
        flush_tlb_all();
        free_vmemmap_page_list(&vmemmap_pages);
 }
 
+void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
+{
+       __hugetlb_vmemmap_optimize_folios(h, folio_list, false);
+}
+
+void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list)
+{
+       __hugetlb_vmemmap_optimize_folios(h, folio_list, true);
+}
+
 static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
        {
                .procname       = "hugetlb_optimize_vmemmap",
index 2fcae92d33590c80f5e6ee949973a0ff441be345..71110a90275f60e3d4626052e65a25270c2ad226 100644 (file)
@@ -24,6 +24,8 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
                                        struct list_head *non_hvo_folios);
 void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio);
 void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list);
+void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list);
+
 
 static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
 {
@@ -64,6 +66,11 @@ static inline void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list
 {
 }
 
+static inline void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h,
+                                               struct list_head *folio_list)
+{
+}
+
 static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
 {
        return 0;