]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mm/huge_memory: respect MADV_COLLAPSE with PR_THP_DISABLE_EXCEPT_ADVISED
authorDavid Hildenbrand <david@redhat.com>
Fri, 15 Aug 2025 13:54:55 +0000 (14:54 +0100)
committerAndrew Morton <akpm@linux-foundation.org>
Sat, 13 Sep 2025 23:55:05 +0000 (16:55 -0700)
Let's allow for making MADV_COLLAPSE succeed on areas that neither have
VM_HUGEPAGE nor VM_NOHUGEPAGE when we have THP disabled unless explicitly
advised (PR_THP_DISABLE_EXCEPT_ADVISED).

MADV_COLLAPSE is a clear advice that we want to collapse.

Note that we still respect the VM_NOHUGEPAGE flag, just like
MADV_COLLAPSE always does. So consequently, MADV_COLLAPSE is now only
refused on VM_NOHUGEPAGE with PR_THP_DISABLE_EXCEPT_ADVISED,
including for shmem.

Link: https://lkml.kernel.org/r/20250815135549.130506-4-usamaarif642@gmail.com
Co-developed-by: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yafang <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/huge_mm.h
include/uapi/linux/prctl.h
mm/huge_memory.c
mm/memory.c
mm/shmem.c

index 92ea0b9771fae6abdb612aa78dda047f78472d9f..1ac0d06fb3c1d3954225c920f89013774bbf4a86 100644 (file)
@@ -329,7 +329,7 @@ struct thpsize {
  * through madvise or prctl.
  */
 static inline bool vma_thp_disabled(struct vm_area_struct *vma,
-               vm_flags_t vm_flags)
+               vm_flags_t vm_flags, bool forced_collapse)
 {
        /* Are THPs disabled for this VMA? */
        if (vm_flags & VM_NOHUGEPAGE)
@@ -343,6 +343,12 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma,
         */
        if (vm_flags & VM_HUGEPAGE)
                return false;
+       /*
+        * Forcing a collapse (e.g., madv_collapse), is a clear advice to
+        * use THPs.
+        */
+       if (forced_collapse)
+               return false;
        return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm);
 }
 
index 150b6deebfb1e9640dcd9ac8d7f10dfa0745f2a6..51c4e8c82b1e98899c00d2e4609ca35432176533 100644 (file)
@@ -185,7 +185,7 @@ struct prctl_mm_map {
 #define PR_SET_THP_DISABLE     41
 /*
  * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE /
- * VM_HUGEPAGE).
+ * VM_HUGEPAGE, MADV_COLLAPSE).
  */
 # define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1)
 #define PR_GET_THP_DISABLE     42
index 899d9ac86ecd4c80164a2f220345da16279965c7..d89992b65acc982909ea9af07ce1c1655fb3662f 100644 (file)
@@ -104,7 +104,8 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 {
        const bool smaps = type == TVA_SMAPS;
        const bool in_pf = type == TVA_PAGEFAULT;
-       const bool enforce_sysfs = type != TVA_FORCED_COLLAPSE;
+       const bool forced_collapse = type == TVA_FORCED_COLLAPSE;
+       const bool enforce_sysfs = !forced_collapse;
        unsigned long supported_orders;
 
        /* Check the intersection of requested and supported orders. */
@@ -122,7 +123,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
        if (!vma->vm_mm)                /* vdso */
                return 0;
 
-       if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags))
+       if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags, forced_collapse))
                return 0;
 
        /* khugepaged doesn't collapse DAX vma, but page fault is fine. */
index 7b1e8f137fa3fb1c577baab697831fd4e3a94d92..d9de6c056179483c73c2279e3f54a7cdf5e66feb 100644 (file)
@@ -5332,9 +5332,11 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *pa
         * It is too late to allocate a small folio, we already have a large
         * folio in the pagecache: especially s390 KVM cannot tolerate any
         * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any
-        * PMD mappings if THPs are disabled.
+        * PMD mappings if THPs are disabled. As we already have a THP,
+        * behave as if we are forcing a collapse.
         */
-       if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags))
+       if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags,
+                                                    /* forced_collapse=*/ true))
                return ret;
 
        if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
index e2c76a30802b6ee3b8cf05a33943fb5583fdc360..d945de3a7f0e7a111b4f11c421a97449e3f50028 100644 (file)
@@ -1817,7 +1817,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
        vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
        unsigned int global_orders;
 
-       if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags)))
+       if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
                return 0;
 
        global_orders = shmem_huge_global_enabled(inode, index, write_end,