]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
hugetlb: unshare some PMDs when splitting VMAs
authorJames Houghton <jthoughton@google.com>
Wed, 4 Jan 2023 23:19:10 +0000 (23:19 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 27 Jun 2025 10:04:21 +0000 (11:04 +0100)
commit b30c14cd61025eeea2f2e8569606cd167ba9ad2d upstream.

PMD sharing can only be done in PUD_SIZE-aligned pieces of VMAs; however,
it is possible that HugeTLB VMAs are split without unsharing the PMDs
first.

Without this fix, it is possible to hit the uffd-wp-related WARN_ON_ONCE
in hugetlb_change_protection [1].  The key there is that
hugetlb_unshare_all_pmds will not attempt to unshare PMDs in
non-PUD_SIZE-aligned sections of the VMA.

It might seem ideal to unshare in hugetlb_vm_op_open, but we need to
unshare in both the new and old VMAs, so unsharing in hugetlb_vm_op_split
seems natural.

[1]: https://lore.kernel.org/linux-mm/CADrL8HVeOkj0QH5VZZbRzybNE8CG-tEGFshnA+bG9nMgcWtBSg@mail.gmail.com/

Link: https://lkml.kernel.org/r/20230104231910.1464197-1-jthoughton@google.com
Fixes: 6dfeaff93be1 ("hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp")
Signed-off-by: James Houghton <jthoughton@google.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[backport notes: I believe the "Fixes" tag is somewhat wrong - kernels
before that commit already had an adjust_range_if_pmd_sharing_possible()
that assumes that shared PMDs can't straddle page table boundaries.
huge_pmd_unshare() takes different parameter type]
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
mm/hugetlb.c

index 02b7c8f9b0e871447b5b36fe819831436246ae9f..1be0d9a88e6c5f71dd89c7675a5f459e1312b842 100644 (file)
@@ -96,6 +96,8 @@ static inline void ClearPageHugeFreed(struct page *head)
 
 /* Forward declaration */
 static int hugetlb_acct_memory(struct hstate *h, long delta);
+static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
+               unsigned long start, unsigned long end);
 
 static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
 {
@@ -3697,6 +3699,25 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
 {
        if (addr & ~(huge_page_mask(hstate_vma(vma))))
                return -EINVAL;
+
+       /*
+        * PMD sharing is only possible for PUD_SIZE-aligned address ranges
+        * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
+        * split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
+        */
+       if (addr & ~PUD_MASK) {
+               /*
+                * hugetlb_vm_op_split is called right before we attempt to
+                * split the VMA. We will need to unshare PMDs in the old and
+                * new VMAs, so let's unshare before we split.
+                */
+               unsigned long floor = addr & PUD_MASK;
+               unsigned long ceil = floor + PUD_SIZE;
+
+               if (floor >= vma->vm_start && ceil <= vma->vm_end)
+                       hugetlb_unshare_pmds(vma, floor, ceil);
+       }
+
        return 0;
 }
 
@@ -5706,6 +5727,50 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
        }
 }
 
+static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
+                                  unsigned long start,
+                                  unsigned long end)
+{
+       struct hstate *h = hstate_vma(vma);
+       unsigned long sz = huge_page_size(h);
+       struct mm_struct *mm = vma->vm_mm;
+       struct mmu_notifier_range range;
+       unsigned long address;
+       spinlock_t *ptl;
+       pte_t *ptep;
+
+       if (!(vma->vm_flags & VM_MAYSHARE))
+               return;
+
+       if (start >= end)
+               return;
+
+       flush_cache_range(vma, start, end);
+       /*
+        * No need to call adjust_range_if_pmd_sharing_possible(), because
+        * we have already done the PUD_SIZE alignment.
+        */
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
+                               start, end);
+       mmu_notifier_invalidate_range_start(&range);
+       i_mmap_lock_write(vma->vm_file->f_mapping);
+       for (address = start; address < end; address += PUD_SIZE) {
+               ptep = huge_pte_offset(mm, address, sz);
+               if (!ptep)
+                       continue;
+               ptl = huge_pte_lock(h, mm, ptep);
+               huge_pmd_unshare(mm, vma, &address, ptep);
+               spin_unlock(ptl);
+       }
+       flush_hugetlb_tlb_range(vma, start, end);
+       i_mmap_unlock_write(vma->vm_file->f_mapping);
+       /*
+        * No need to call mmu_notifier_invalidate_range(), see
+        * Documentation/mm/mmu_notifier.rst.
+        */
+       mmu_notifier_invalidate_range_end(&range);
+}
+
 #ifdef CONFIG_CMA
 static bool cma_reserve_called __initdata;