]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm/huge_memory: simplify vma_is_specal_huge()
authorLorenzo Stoakes (Oracle) <ljs@kernel.org>
Fri, 20 Mar 2026 18:07:18 +0000 (18:07 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Sun, 5 Apr 2026 20:53:45 +0000 (13:53 -0700)
Patch series "mm/huge_memory: refactor zap_huge_pmd()", v3.

zap_huge_pmd() is overly complicated, clean it up and also add an assert
in the case that we encounter a buggy PMD entry that doesn't match
expectations.

This is motivated by a bug discovered [0] where the PMD entry was none of:

* A non-DAX, PFN or mixed map.
* The huge zero folio
* A present PMD entry
* A softleaf entry

In zap_huge_pmd(), but due to the bug we manged to reach this code.

It is useful to explicitly call this out rather than have an arbitrary
NULL pointer dereference happen, which also improves understanding of
what's going on.

The series goes further to make use of vm_normal_folio_pmd() rather than
implementing custom logic for retrieving the folio, and extends softleaf
functionality to provide and use an equivalent softleaf function.

This patch (of 13):

This function is confused - it overloads the term 'special' yet again,
checks for DAX but in many cases the code explicitly excludes DAX before
invoking the predicate.

It also unnecessarily checks for vma->vm_file - this has to be present for
a driver to have set VMA_MIXEDMAP_BIT or VMA_PFNMAP_BIT.

In fact, a far simpler form of this is to reverse the DAX predicate and
return false if DAX is set.

This makes sense from the point of view of 'special' as in
vm_normal_page(), as DAX actually does potentially have retrievable
folios.

Also there's no need to have this in mm.h so move it to huge_memory.c.

No functional change intended.

Link: https://lkml.kernel.org/r/cover.1774029655.git.ljs@kernel.org
Link: https://lkml.kernel.org/r/d2b65883dc4895f197c4b4a69fbf27a063463412.1774029655.git.ljs@kernel.org
Link: https://lore.kernel.org/all/6b3d7ad7-49e1-407a-903d-3103704160d8@lucifer.local/
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/huge_mm.h
include/linux/mm.h
mm/huge_memory.c

index bd7f0e1d80945f092f1c048a0c36c7e526099398..61fda1672b292d41dfe5ae136d9801ce1f490e6d 100644 (file)
@@ -83,7 +83,7 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
  * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to
  * it.  Same to PFNMAPs where there's neither page* nor pagecache.
  */
-#define THP_ORDERS_ALL_SPECIAL         \
+#define THP_ORDERS_ALL_SPECIAL_DAX     \
        (BIT(PMD_ORDER) | BIT(PUD_ORDER))
 #define THP_ORDERS_ALL_FILE_DEFAULT    \
        ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0))
@@ -92,7 +92,7 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
  * Mask of all large folio orders supported for THP.
  */
 #define THP_ORDERS_ALL \
-       (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT)
+       (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL_DAX | THP_ORDERS_ALL_FILE_DEFAULT)
 
 enum tva_type {
        TVA_SMAPS,              /* Exposing "THPeligible:" in smaps. */
index 61dff7f035541eb77cb14b5e9a913df589bb89eb..8260e28205e905b74821bb299203b4a4d4f1754b 100644 (file)
@@ -5068,22 +5068,6 @@ long copy_folio_from_user(struct folio *dst_folio,
                           const void __user *usr_src,
                           bool allow_pagefault);
 
-/**
- * vma_is_special_huge - Are transhuge page-table entries considered special?
- * @vma: Pointer to the struct vm_area_struct to consider
- *
- * Whether transhuge page-table entries are considered "special" following
- * the definition in vm_normal_page().
- *
- * Return: true if transhuge page-table entries should be considered special,
- * false otherwise.
- */
-static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
-{
-       return vma_is_dax(vma) || (vma->vm_file &&
-                                  (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
-}
-
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 #if MAX_NUMNODES > 1
index 1c1a7cf7b209bd7f0511a56c9f40696175a0d1dc..db390b0098d9e2ae4471db0fea16e4a0b5706cf2 100644 (file)
@@ -100,6 +100,14 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma)
        return !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);
 }
 
+/* If returns true, we are unable to access the VMA's folios. */
+static bool vma_is_special_huge(const struct vm_area_struct *vma)
+{
+       if (vma_is_dax(vma))
+               return false;
+       return vma_test_any(vma, VMA_PFNMAP_BIT, VMA_MIXEDMAP_BIT);
+}
+
 unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
                                         vm_flags_t vm_flags,
                                         enum tva_type type,
@@ -113,8 +121,8 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
        /* Check the intersection of requested and supported orders. */
        if (vma_is_anonymous(vma))
                supported_orders = THP_ORDERS_ALL_ANON;
-       else if (vma_is_special_huge(vma))
-               supported_orders = THP_ORDERS_ALL_SPECIAL;
+       else if (vma_is_dax(vma) || vma_is_special_huge(vma))
+               supported_orders = THP_ORDERS_ALL_SPECIAL_DAX;
        else
                supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;
 
@@ -2415,7 +2423,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                                                tlb->fullmm);
        arch_check_zapped_pmd(vma, orig_pmd);
        tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
-       if (!vma_is_dax(vma) && vma_is_special_huge(vma)) {
+       if (vma_is_special_huge(vma)) {
                if (arch_needs_pgtable_deposit())
                        zap_deposited_table(tlb->mm, pmd);
                spin_unlock(ptl);
@@ -2917,7 +2925,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
        orig_pud = pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm);
        arch_check_zapped_pud(vma, orig_pud);
        tlb_remove_pud_tlb_entry(tlb, pud, addr);
-       if (!vma_is_dax(vma) && vma_is_special_huge(vma)) {
+       if (vma_is_special_huge(vma)) {
                spin_unlock(ptl);
                /* No zero page support yet */
        } else {
@@ -3068,7 +3076,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                 */
                if (arch_needs_pgtable_deposit())
                        zap_deposited_table(mm, pmd);
-               if (!vma_is_dax(vma) && vma_is_special_huge(vma))
+               if (vma_is_special_huge(vma))
                        return;
                if (unlikely(pmd_is_migration_entry(old_pmd))) {
                        const softleaf_t old_entry = softleaf_from_pmd(old_pmd);
@@ -4629,8 +4637,16 @@ next:
 
 static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
 {
-       return vma_is_special_huge(vma) || (vma->vm_flags & VM_IO) ||
-                   is_vm_hugetlb_page(vma);
+       if (vma_is_dax(vma))
+               return true;
+       if (vma_is_special_huge(vma))
+               return true;
+       if (vma_test(vma, VMA_IO_BIT))
+               return true;
+       if (is_vm_hugetlb_page(vma))
+               return true;
+
+       return false;
 }
 
 static int split_huge_pages_pid(int pid, unsigned long vaddr_start,