]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm/vmalloc: Gracefully unmap huge ptes
authorRyan Roberts <ryan.roberts@arm.com>
Tue, 22 Apr 2025 08:18:16 +0000 (09:18 +0100)
committerWill Deacon <will@kernel.org>
Fri, 9 May 2025 12:43:07 +0000 (13:43 +0100)
Commit f7ee1f13d606 ("mm/vmalloc: enable mapping of huge pages at pte
level in vmap") added its support by reusing the set_huge_pte_at() API,
which is otherwise only used for user mappings. But when unmapping those
huge ptes, it continued to call ptep_get_and_clear(), which is a
layering violation. To date, the only arch to implement this support is
powerpc and it all happens to work ok for it.

But arm64's implementation of ptep_get_and_clear() can not be safely
used to clear a previous set_huge_pte_at(). So let's introduce a new
arch opt-in function, arch_vmap_pte_range_unmap_size(), which can
provide the size of a (present) pte. Then we can call
huge_ptep_get_and_clear() to tear it down properly.

Note that if vunmap_range() is called with a range that starts in the
middle of a huge pte-mapped page, we must unmap the entire huge page so
the behaviour is consistent with pmd and pud block mappings. In this
case emit a warning just like we do for pmd/pud mappings.

Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Tested-by: Luiz Capitulino <luizcap@redhat.com>
Link: https://lore.kernel.org/r/20250422081822.1836315-9-ryan.roberts@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
include/linux/vmalloc.h
mm/vmalloc.c

index 31e9ffd936e39334ddaff910222d4751c18da5e7..16dd4cba64f2402e7942fac237026745183977ff 100644 (file)
@@ -113,6 +113,14 @@ static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, uns
 }
 #endif
 
+#ifndef arch_vmap_pte_range_unmap_size
+static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr,
+                                                          pte_t *ptep)
+{
+       return PAGE_SIZE;
+}
+#endif
+
 #ifndef arch_vmap_pte_supported_shift
 static inline int arch_vmap_pte_supported_shift(unsigned long size)
 {
index d60d3a29d149517b6c5f7aee5d79b89d62bc5c43..fe2e2cc8da94240d95c95fbd25cc39b498e09fdd 100644 (file)
@@ -350,12 +350,26 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                             pgtbl_mod_mask *mask)
 {
        pte_t *pte;
+       pte_t ptent;
+       unsigned long size = PAGE_SIZE;
 
        pte = pte_offset_kernel(pmd, addr);
        do {
-               pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
+#ifdef CONFIG_HUGETLB_PAGE
+               size = arch_vmap_pte_range_unmap_size(addr, pte);
+               if (size != PAGE_SIZE) {
+                       if (WARN_ON(!IS_ALIGNED(addr, size))) {
+                               addr = ALIGN_DOWN(addr, size);
+                               pte = PTR_ALIGN_DOWN(pte, sizeof(*pte) * (size >> PAGE_SHIFT));
+                       }
+                       ptent = huge_ptep_get_and_clear(&init_mm, addr, pte, size);
+                       if (WARN_ON(end - addr < size))
+                               size = end - addr;
+               } else
+#endif
+                       ptent = ptep_get_and_clear(&init_mm, addr, pte);
                WARN_ON(!pte_none(ptent) && !pte_present(ptent));
-       } while (pte++, addr += PAGE_SIZE, addr != end);
+       } while (pte += (size >> PAGE_SHIFT), addr += size, addr != end);
        *mask |= PGTBL_PTE_MODIFIED;
 }