]> git.ipfire.org Git - thirdparty/linux.git/blobdiff - mm/huge_memory.c
Merge tag 'mm-stable-2023-02-20-13-37' of git://git.kernel.org/pub/scm/linux/kernel...
[thirdparty/linux.git] / mm / huge_memory.c
index 1b791b26d72d7aa678512a40488beab342c2648b..4fc43859e59a31932a657cd2fac2b511c00e812b 100644 (file)
@@ -119,7 +119,8 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags,
         * own flags.
         */
        if (!in_pf && shmem_file(vma->vm_file))
-               return shmem_huge_enabled(vma, !enforce_sysfs);
+               return shmem_is_huge(file_inode(vma->vm_file), vma->vm_pgoff,
+                                    !enforce_sysfs, vma->vm_mm, vm_flags);
 
        /* Enforce sysfs THP requirements as necessary */
        if (enforce_sysfs &&
@@ -559,10 +560,11 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 }
 
 #ifdef CONFIG_MEMCG
-static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+static inline
+struct deferred_split *get_deferred_split_queue(struct folio *folio)
 {
-       struct mem_cgroup *memcg = page_memcg(compound_head(page));
-       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+       struct mem_cgroup *memcg = folio_memcg(folio);
+       struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
 
        if (memcg)
                return &memcg->deferred_split_queue;
@@ -570,9 +572,10 @@ static inline struct deferred_split *get_deferred_split_queue(struct page *page)
                return &pgdat->deferred_split_queue;
 }
 #else
-static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+static inline
+struct deferred_split *get_deferred_split_queue(struct folio *folio)
 {
-       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+       struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
 
        return &pgdat->deferred_split_queue;
 }
@@ -580,23 +583,23 @@ static inline struct deferred_split *get_deferred_split_queue(struct page *page)
 
 void prep_transhuge_page(struct page *page)
 {
-       /*
-        * we use page->mapping and page->index in second tail page
-        * as list_head: assuming THP order >= 2
-        */
+       struct folio *folio = (struct folio *)page;
 
-       INIT_LIST_HEAD(page_deferred_list(page));
+       VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio);
+       INIT_LIST_HEAD(&folio->_deferred_list);
        set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
 }
 
 static inline bool is_transparent_hugepage(struct page *page)
 {
+       struct folio *folio;
+
        if (!PageCompound(page))
                return false;
 
-       page = compound_head(page);
-       return is_huge_zero_page(page) ||
-              page[1].compound_dtor == TRANSHUGE_PAGE_DTOR;
+       folio = page_folio(page);
+       return is_huge_zero_page(&folio->page) ||
+              folio->_folio_dtor == TRANSHUGE_PAGE_DTOR;
 }
 
 static unsigned long __thp_get_unmapped_area(struct file *filp,
@@ -1039,11 +1042,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 
        assert_spin_locked(pmd_lockptr(mm, pmd));
 
-       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
-       if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
-                        (FOLL_PIN | FOLL_GET)))
-               return NULL;
-
        if (flags & FOLL_WRITE && !pmd_write(*pmd))
                return NULL;
 
@@ -1202,11 +1200,6 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
        if (flags & FOLL_WRITE && !pud_write(*pud))
                return NULL;
 
-       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
-       if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
-                        (FOLL_PIN | FOLL_GET)))
-               return NULL;
-
        if (pud_present(*pud) && pud_devmap(*pud))
                /* pass */;
        else
@@ -1603,7 +1596,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 {
        spinlock_t *ptl;
        pmd_t orig_pmd;
-       struct page *page;
+       struct folio *folio;
        struct mm_struct *mm = tlb->mm;
        bool ret = false;
 
@@ -1623,15 +1616,15 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                goto out;
        }
 
-       page = pmd_page(orig_pmd);
+       folio = pfn_folio(pmd_pfn(orig_pmd));
        /*
-        * If other processes are mapping this page, we couldn't discard
-        * the page unless they all do MADV_FREE so let's skip the page.
+        * If other processes are mapping this folio, we couldn't discard
+        * the folio unless they all do MADV_FREE so let's skip the folio.
         */
-       if (total_mapcount(page) != 1)
+       if (folio_mapcount(folio) != 1)
                goto out;
 
-       if (!trylock_page(page))
+       if (!folio_trylock(folio))
                goto out;
 
        /*
@@ -1639,17 +1632,17 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
         * will deactivate only them.
         */
        if (next - addr != HPAGE_PMD_SIZE) {
-               get_page(page);
+               folio_get(folio);
                spin_unlock(ptl);
-               split_huge_page(page);
-               unlock_page(page);
-               put_page(page);
+               split_folio(folio);
+               folio_unlock(folio);
+               folio_put(folio);
                goto out_unlocked;
        }
 
-       if (PageDirty(page))
-               ClearPageDirty(page);
-       unlock_page(page);
+       if (folio_test_dirty(folio))
+               folio_clear_dirty(folio);
+       folio_unlock(folio);
 
        if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
                pmdp_invalidate(vma, addr, pmd);
@@ -1660,7 +1653,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
        }
 
-       mark_page_lazyfree(page);
+       folio_mark_lazyfree(folio);
        ret = true;
 out:
        spin_unlock(ptl);
@@ -1920,17 +1913,15 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        oldpmd = pmdp_invalidate_ad(vma, addr, pmd);
 
        entry = pmd_modify(oldpmd, newprot);
-       if (uffd_wp) {
-               entry = pmd_wrprotect(entry);
+       if (uffd_wp)
                entry = pmd_mkuffd_wp(entry);
-       } else if (uffd_wp_resolve) {
+       else if (uffd_wp_resolve)
                /*
                 * Leave the write bit to be handled by PF interrupt
                 * handler, then things like COW could be properly
                 * handled.
                 */
                entry = pmd_clear_uffd_wp(entry);
-       }
 
        /* See change_pte_range(). */
        if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
@@ -2022,7 +2013,7 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
        spinlock_t *ptl;
        struct mmu_notifier_range range;
 
-       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
                                address & HPAGE_PUD_MASK,
                                (address & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE);
        mmu_notifier_invalidate_range_start(&range);
@@ -2284,7 +2275,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
        spinlock_t *ptl;
        struct mmu_notifier_range range;
 
-       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
                                address & HPAGE_PMD_MASK,
                                (address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);
        mmu_notifier_invalidate_range_start(&range);
@@ -2479,9 +2470,9 @@ static void __split_huge_page_tail(struct page *head, int tail,
         * of swap cache pages that store the swp_entry_t in tail pages.
         * Fix up and warn once if private is unexpectedly set.
         *
-        * What of 32-bit systems, on which head[1].compound_pincount overlays
+        * What of 32-bit systems, on which folio->_pincount overlays
         * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
-        * compound_pincount must be 0 for folio_ref_freeze() to have succeeded.
+        * pincount must be 0 for folio_ref_freeze() to have succeeded.
         */
        if (!folio_test_swapcache(page_folio(head))) {
                VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
@@ -2652,7 +2643,7 @@ bool can_split_folio(struct folio *folio, int *pextra_pins)
 int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
        struct folio *folio = page_folio(page);
-       struct deferred_split *ds_queue = get_deferred_split_queue(&folio->page);
+       struct deferred_split *ds_queue = get_deferred_split_queue(folio);
        XA_STATE(xas, &folio->mapping->i_pages, folio->index);
        struct anon_vma *anon_vma = NULL;
        struct address_space *mapping = NULL;
@@ -2756,9 +2747,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        /* Prevent deferred_split_scan() touching ->_refcount */
        spin_lock(&ds_queue->split_queue_lock);
        if (folio_ref_freeze(folio, 1 + extra_pins)) {
-               if (!list_empty(page_deferred_list(&folio->page))) {
+               if (!list_empty(&folio->_deferred_list)) {
                        ds_queue->split_queue_len--;
-                       list_del(page_deferred_list(&folio->page));
+                       list_del(&folio->_deferred_list);
                }
                spin_unlock(&ds_queue->split_queue_lock);
                if (mapping) {
@@ -2802,49 +2793,53 @@ out:
 
 void free_transhuge_page(struct page *page)
 {
-       struct deferred_split *ds_queue = get_deferred_split_queue(page);
+       struct folio *folio = (struct folio *)page;
+       struct deferred_split *ds_queue = get_deferred_split_queue(folio);
        unsigned long flags;
 
        spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
-       if (!list_empty(page_deferred_list(page))) {
+       if (!list_empty(&folio->_deferred_list)) {
                ds_queue->split_queue_len--;
-               list_del(page_deferred_list(page));
+               list_del(&folio->_deferred_list);
        }
        spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
        free_compound_page(page);
 }
 
-void deferred_split_huge_page(struct page *page)
+void deferred_split_folio(struct folio *folio)
 {
-       struct deferred_split *ds_queue = get_deferred_split_queue(page);
+       struct deferred_split *ds_queue = get_deferred_split_queue(folio);
 #ifdef CONFIG_MEMCG
-       struct mem_cgroup *memcg = page_memcg(compound_head(page));
+       struct mem_cgroup *memcg = folio_memcg(folio);
 #endif
        unsigned long flags;
 
-       VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+       VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio);
 
        /*
         * The try_to_unmap() in page reclaim path might reach here too,
         * this may cause a race condition to corrupt deferred split queue.
-        * And, if page reclaim is already handling the same page, it is
+        * And, if page reclaim is already handling the same folio, it is
         * unnecessary to handle it again in shrinker.
         *
-        * Check PageSwapCache to determine if the page is being
-        * handled by page reclaim since THP swap would add the page into
+        * Check the swapcache flag to determine if the folio is being
+        * handled by page reclaim since THP swap would add the folio into
         * swap cache before calling try_to_unmap().
         */
-       if (PageSwapCache(page))
+       if (folio_test_swapcache(folio))
+               return;
+
+       if (!list_empty(&folio->_deferred_list))
                return;
 
        spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
-       if (list_empty(page_deferred_list(page))) {
+       if (list_empty(&folio->_deferred_list)) {
                count_vm_event(THP_DEFERRED_SPLIT_PAGE);
-               list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
+               list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
                ds_queue->split_queue_len++;
 #ifdef CONFIG_MEMCG
                if (memcg)
-                       set_shrinker_bit(memcg, page_to_nid(page),
+                       set_shrinker_bit(memcg, folio_nid(folio),
                                         deferred_split_shrinker.id);
 #endif
        }
@@ -2870,8 +2865,8 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
        struct pglist_data *pgdata = NODE_DATA(sc->nid);
        struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
        unsigned long flags;
-       LIST_HEAD(list), *pos, *next;
-       struct page *page;
+       LIST_HEAD(list);
+       struct folio *folio, *next;
        int split = 0;
 
 #ifdef CONFIG_MEMCG
@@ -2881,14 +2876,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
 
        spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
        /* Take pin on all head pages to avoid freeing them under us */
-       list_for_each_safe(pos, next, &ds_queue->split_queue) {
-               page = list_entry((void *)pos, struct page, deferred_list);
-               page = compound_head(page);
-               if (get_page_unless_zero(page)) {
-                       list_move(page_deferred_list(page), &list);
+       list_for_each_entry_safe(folio, next, &ds_queue->split_queue,
+                                                       _deferred_list) {
+               if (folio_try_get(folio)) {
+                       list_move(&folio->_deferred_list, &list);
                } else {
-                       /* We lost race with put_compound_page() */
-                       list_del_init(page_deferred_list(page));
+                       /* We lost race with folio_put() */
+                       list_del_init(&folio->_deferred_list);
                        ds_queue->split_queue_len--;
                }
                if (!--sc->nr_to_scan)
@@ -2896,16 +2890,15 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
        }
        spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 
-       list_for_each_safe(pos, next, &list) {
-               page = list_entry((void *)pos, struct page, deferred_list);
-               if (!trylock_page(page))
+       list_for_each_entry_safe(folio, next, &list, _deferred_list) {
+               if (!folio_trylock(folio))
                        goto next;
                /* split_huge_page() removes page from list on success */
-               if (!split_huge_page(page))
+               if (!split_folio(folio))
                        split++;
-               unlock_page(page);
+               folio_unlock(folio);
 next:
-               put_page(page);
+               folio_put(folio);
        }
 
        spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
@@ -2934,6 +2927,7 @@ static void split_huge_pages_all(void)
 {
        struct zone *zone;
        struct page *page;
+       struct folio *folio;
        unsigned long pfn, max_zone_pfn;
        unsigned long total = 0, split = 0;
 
@@ -2946,24 +2940,32 @@ static void split_huge_pages_all(void)
                        int nr_pages;
 
                        page = pfn_to_online_page(pfn);
-                       if (!page || !get_page_unless_zero(page))
+                       if (!page || PageTail(page))
+                               continue;
+                       folio = page_folio(page);
+                       if (!folio_try_get(folio))
                                continue;
 
-                       if (zone != page_zone(page))
+                       if (unlikely(page_folio(page) != folio))
                                goto next;
 
-                       if (!PageHead(page) || PageHuge(page) || !PageLRU(page))
+                       if (zone != folio_zone(folio))
+                               goto next;
+
+                       if (!folio_test_large(folio)
+                               || folio_test_hugetlb(folio)
+                               || !folio_test_lru(folio))
                                goto next;
 
                        total++;
-                       lock_page(page);
-                       nr_pages = thp_nr_pages(page);
-                       if (!split_huge_page(page))
+                       folio_lock(folio);
+                       nr_pages = folio_nr_pages(folio);
+                       if (!split_folio(folio))
                                split++;
                        pfn += nr_pages - 1;
-                       unlock_page(page);
+                       folio_unlock(folio);
 next:
-                       put_page(page);
+                       folio_put(folio);
                        cond_resched();
                }
        }
@@ -3273,7 +3275,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
        if (pmd_swp_soft_dirty(*pvmw->pmd))
                pmde = pmd_mksoft_dirty(pmde);
        if (pmd_swp_uffd_wp(*pvmw->pmd))
-               pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde));
+               pmde = pmd_mkuffd_wp(pmde);
        if (!is_migration_entry_young(entry))
                pmde = pmd_mkold(pmde);
        /* NOTE: this may contain setting soft-dirty on some archs */