]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mm/rmap: integrate PMD-mapped folio splitting into pagewalk loop
authorLance Yang <ioworker0@gmail.com>
Fri, 14 Jun 2024 01:51:37 +0000 (09:51 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 4 Jul 2024 02:30:08 +0000 (19:30 -0700)
In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
folios, start the pagewalk first, then call split_huge_pmd_address() to
split the folio.

Link: https://lkml.kernel.org/r/20240614015138.31461-3-ioworker0@gmail.com
Signed-off-by: Lance Yang <ioworker0@gmail.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: Bang Li <libang.li@antgroup.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Fangrui Song <maskray@google.com>
Cc: Jeff Xie <xiehuan09@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yin Fengwei <fengwei.yin@intel.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/huge_mm.h
include/linux/rmap.h
mm/huge_memory.c
mm/rmap.c

index 7ad41de5eaea40011d9031f585f6aff5dab91d57..9f720b0731c42fcc5398a0a4f636c8a3e0a61ba2 100644 (file)
@@ -428,6 +428,9 @@ static inline bool thp_migration_supported(void)
        return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 }
 
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+                          pmd_t *pmd, bool freeze, struct folio *folio);
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline bool folio_test_pmd_mappable(struct folio *folio)
@@ -490,6 +493,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long address, bool freeze, struct folio *folio) {}
 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
                unsigned long address, bool freeze, struct folio *folio) {}
+static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
+                                        unsigned long address, pmd_t *pmd,
+                                        bool freeze, struct folio *folio) {}
 
 #define split_huge_pud(__vma, __pmd, __address)        \
        do { } while (0)
index bb53e5920b88bbffb8dd1efdf8ec2ccc45462030..bf46787c8eba66835a873b4b77de751539a76586 100644 (file)
@@ -703,6 +703,30 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
                spin_unlock(pvmw->ptl);
 }
 
+/**
+ * page_vma_mapped_walk_restart - Restart the page table walk.
+ * @pvmw: Pointer to struct page_vma_mapped_walk.
+ *
+ * It restarts the page table walk when changes occur in the page
+ * table, such as splitting a PMD. Ensures that the PTL held during
+ * the previous walk is released and resets the state to allow for
+ * a new walk starting at the current address stored in pvmw->address.
+ */
+static inline void
+page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw)
+{
+       WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte);
+
+       if (likely(pvmw->ptl))
+               spin_unlock(pvmw->ptl);
+       else
+               WARN_ON_ONCE(1);
+
+       pvmw->ptl = NULL;
+       pvmw->pmd = NULL;
+       pvmw->pte = NULL;
+}
+
 bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
 
 /*
index 6b294c9160de17ece7ee358155113dfbaaa505f1..b247490a093129d193a6573310e3bc9de3ad124e 100644 (file)
@@ -2583,6 +2583,27 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        pmd_populate(mm, pmd, pgtable);
 }
 
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+                          pmd_t *pmd, bool freeze, struct folio *folio)
+{
+       VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
+       VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
+       VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
+       VM_BUG_ON(freeze && !folio);
+
+       /*
+        * When the caller requests to set up a migration entry, we
+        * require a folio to check the PMD against. Otherwise, there
+        * is a risk of replacing the wrong folio.
+        */
+       if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
+           is_pmd_migration_entry(*pmd)) {
+               if (folio && folio != pmd_folio(*pmd))
+                       return;
+               __split_huge_pmd_locked(vma, pmd, address, freeze);
+       }
+}
+
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long address, bool freeze, struct folio *folio)
 {
@@ -2594,26 +2615,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                                (address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);
        mmu_notifier_invalidate_range_start(&range);
        ptl = pmd_lock(vma->vm_mm, pmd);
-
-       /*
-        * If caller asks to setup a migration entry, we need a folio to check
-        * pmd against. Otherwise we can end up replacing wrong folio.
-        */
-       VM_BUG_ON(freeze && !folio);
-       VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
-
-       if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
-           is_pmd_migration_entry(*pmd)) {
-               /*
-                * It's safe to call pmd_page when folio is set because it's
-                * guaranteed that pmd is present.
-                */
-               if (folio && folio != pmd_folio(*pmd))
-                       goto out;
-               __split_huge_pmd_locked(vma, pmd, range.start, freeze);
-       }
-
-out:
+       split_huge_pmd_locked(vma, range.start, pmd, freeze, folio);
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(&range);
 }
index cf97a0f17752c2dea4492e10fc5ad3ad093ae289..42ffdcdfcd278c9d84539a262ae0d9ee7c942acd 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1642,9 +1642,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
        if (flags & TTU_SYNC)
                pvmw.flags = PVMW_SYNC;
 
-       if (flags & TTU_SPLIT_HUGE_PMD)
-               split_huge_pmd_address(vma, address, false, folio);
-
        /*
         * For THP, we have to assume the worse case ie pmd for invalidation.
         * For hugetlb, it could be much worse if we need to do pud
@@ -1670,9 +1667,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
        mmu_notifier_invalidate_range_start(&range);
 
        while (page_vma_mapped_walk(&pvmw)) {
-               /* Unexpected PMD-mapped THP? */
-               VM_BUG_ON_FOLIO(!pvmw.pte, folio);
-
                /*
                 * If the folio is in an mlock()d vma, we must not swap it out.
                 */
@@ -1684,6 +1678,21 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                        goto walk_abort;
                }
 
+               if (!pvmw.pte && (flags & TTU_SPLIT_HUGE_PMD)) {
+                       /*
+                        * We temporarily have to drop the PTL and start once
+                        * again from that now-PTE-mapped page table.
+                        */
+                       split_huge_pmd_locked(vma, pvmw.address, pvmw.pmd,
+                                             false, folio);
+                       flags &= ~TTU_SPLIT_HUGE_PMD;
+                       page_vma_mapped_walk_restart(&pvmw);
+                       continue;
+               }
+
+               /* Unexpected PMD-mapped THP? */
+               VM_BUG_ON_FOLIO(!pvmw.pte, folio);
+
                pfn = pte_pfn(ptep_get(pvmw.pte));
                subpage = folio_page(folio, pfn - folio_pfn(folio));
                address = pvmw.address;