]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm/hugetlb_vmemmap: fix incorrect vmemmap restore in rollback
authorMuchun Song <songmuchun@bytedance.com>
Mon, 25 May 2026 02:52:13 +0000 (10:52 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 1 Jun 2026 04:50:24 +0000 (21:50 -0700)
vmemmap_restore_pte() rebuilds restored vmemmap pages from a tail-page
template derived from compound_head().  This is wrong when the current PTE
already maps a page whose contents are not tail-page metadata.

In the rollback path of vmemmap_remap_free(), the first restored PTE is
backed by vmemmap_head and contains head-page metadata.  Reconstructing
that page from a tail-page template overwrites the head-page state and
corrupts the restored vmemmap page.

Fix this by copying the full page from the page currently mapped by the
PTE.  Also pass vmemmap_tail to the rollback walk so only PTEs backed by
the shared tail page are restored, while the head PTE remains mapped to
vmemmap_head.  Add VM_WARN_ON_ONCE() checks for unexpected cases.

Link: https://lore.kernel.org/20260525025213.2229628-1-songmuchun@bytedance.com
Fixes: c0b495b91a47 ("mm/hugetlb: refactor code around vmemmap_walk")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Kiryl Shutsemau <kas@kernel.org>
Acked-by: Oscar Salvador (SUSE) <osalvador@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/hugetlb_vmemmap.c

index 4a077d231d3a2cb12d8ed56e8cefbfec41dcf5f5..133b46dfb09f5c8474941d9ac78fbea824aed4e0 100644 (file)
@@ -207,6 +207,8 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
 
        /* Remapping the head page requires r/w */
        if (unlikely(walk->nr_walked == 0 && walk->vmemmap_head)) {
+               VM_WARN_ON_ONCE(!PageHead((const struct page *)addr));
+
                list_del(&walk->vmemmap_head->lru);
 
                /*
@@ -218,6 +220,8 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
 
                entry = mk_pte(walk->vmemmap_head, PAGE_KERNEL);
        } else {
+               VM_WARN_ON_ONCE(!PageTail((const struct page *)addr));
+
                /*
                 * Remap the tail pages as read-only to catch illegal write
                 * operation to the tail pages.
@@ -232,33 +236,28 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
 static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
                                struct vmemmap_remap_walk *walk)
 {
-       struct page *page;
-       struct page *from, *to;
-
-       page = list_first_entry(walk->vmemmap_pages, struct page, lru);
-       list_del(&page->lru);
+       struct page *src = pte_page(ptep_get(pte)), *dst;
 
        /*
-        * Initialize tail pages in the newly allocated vmemmap page.
-        *
-        * There is folio-scope metadata that is encoded in the first few
-        * tail pages.
-        *
-        * Use the value last tail page in the page with the head page
-        * to initialize the rest of tail pages.
+        * When rolling back vmemmap_remap_free(), keep the copied head page
+        * mapping and restore only PTEs currently pointing at the shared tail
+        * page.
         */
-       from = compound_head((struct page *)addr) +
-               PAGE_SIZE / sizeof(struct page) - 1;
-       to = page_to_virt(page);
-       for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++, to++)
-               *to = *from;
+       if (walk->vmemmap_tail && walk->vmemmap_tail != src)
+               return;
+
+       VM_WARN_ON_ONCE(PageHead((const struct page *)addr));
+
+       dst = list_first_entry(walk->vmemmap_pages, struct page, lru);
+       list_del(&dst->lru);
+       copy_page(page_to_virt(dst), page_to_virt(src));
 
        /*
         * Makes sure that preceding stores to the page contents become visible
         * before the set_pte_at() write.
         */
        smp_wmb();
-       set_pte_at(&init_mm, addr, pte, mk_pte(page, PAGE_KERNEL));
+       set_pte_at(&init_mm, addr, pte, mk_pte(dst, PAGE_KERNEL));
 }
 
 /**
@@ -324,6 +323,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
         */
        walk = (struct vmemmap_remap_walk) {
                .remap_pte      = vmemmap_restore_pte,
+               .vmemmap_tail   = vmemmap_tail,
                .vmemmap_pages  = vmemmap_pages,
                .flags          = 0,
        };