From: Greg Kroah-Hartman Date: Thu, 25 Jun 2026 11:29:47 +0000 (+0100) Subject: 6.18-stable patches X-Git-Tag: v6.18.37~18 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b41808d38cf9322dfc0195d535c8582ddb8ae4ff;p=thirdparty%2Fkernel%2Fstable-queue.git 6.18-stable patches added patches: mm-add-atomic-vma-flags-and-set-vm_maybe_guard-as-such.patch mm-implement-sticky-vma-flags.patch mm-introduce-copy-on-fork-vmas-and-make-vm_maybe_guard-one.patch mm-introduce-vm_maybe_guard-and-make-visible-in-proc-pid-smaps.patch mm-propagate-vm_softdirty-on-merge.patch mm-set-the-vm_maybe_guard-flag-on-guard-region-install.patch mm-update-vma_modify_flags-to-handle-residual-flags-document.patch testing-selftests-mm-add-soft-dirty-merge-self-test.patch --- diff --git a/queue-6.18/mm-add-atomic-vma-flags-and-set-vm_maybe_guard-as-such.patch b/queue-6.18/mm-add-atomic-vma-flags-and-set-vm_maybe_guard-as-such.patch new file mode 100644 index 0000000000..ba8c9604b4 --- /dev/null +++ b/queue-6.18/mm-add-atomic-vma-flags-and-set-vm_maybe_guard-as-such.patch @@ -0,0 +1,111 @@ +From stable+bounces-247749-greg=kroah.com@vger.kernel.org Fri May 15 14:05:33 2026 +From: Ahmed Elaidy +Date: Fri, 15 May 2026 15:42:12 +0300 +Subject: mm: add atomic VMA flags and set VM_MAYBE_GUARD as such +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes , Pedro Falcato , Vlastimil Babka , "David Hildenbrand (Red Hat)" , Lance Yang , Baolin Wang , Barry Song , Dev Jain , Jann Horn , Jonathan Corbet , Liam Howlett , "Masami Hiramatsu (Google)" , Mathieu Desnoyers , Michal Hocko , Mike Rapoport , Nico Pache , Ryan Roberts , Steven Rostedt , Suren Baghdasaryan , Zi Yan , Ahmed Elaidy +Message-ID: <20260515124218.151966-4-elaidya225@gmail.com> + +From: Lorenzo Stoakes + +commit 568822502383acd57d7cc1c72ee43932c45a9524 upstream. + +This patch adds the ability to atomically set VMA flags with only the mmap +read/VMA read lock held. + +As this could be hugely problematic for VMA flags in general given that +all other accesses are non-atomic and serialised by the mmap/VMA locks, we +implement this with a strict allow-list - that is, only designated flags +are allowed to do this. + +We make VM_MAYBE_GUARD one of these flags. + +Link: https://lkml.kernel.org/r/97e57abed09f2663077ed7a36fb8206e243171a9.1763460113.git.ljs@kernel.org +Signed-off-by: Lorenzo Stoakes +Reviewed-by: Pedro Falcato +Reviewed-by: Vlastimil Babka +Acked-by: David Hildenbrand (Red Hat) +Reviewed-by: Lance Yang +Cc: Andrei Vagin +Cc: Baolin Wang +Cc: Barry Song +Cc: Dev Jain +Cc: Jann Horn +Cc: Jonathan Corbet +Cc: Liam Howlett +Cc: "Masami Hiramatsu (Google)" +Cc: Mathieu Desnoyers +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Nico Pache +Cc: Ryan Roberts +Cc: Steven Rostedt +Cc: Suren Baghdasaryan +Cc: Zi Yan +Signed-off-by: Andrew Morton +Signed-off-by: Ahmed Elaidy +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 44 insertions(+) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -501,6 +501,9 @@ extern unsigned int kobjsize(const void + /* This mask represents all the VMA flag bits used by mlock */ + #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) + ++/* These flags can be updated atomically via VMA/mmap read lock. */ ++#define VM_ATOMIC_SET_ALLOWED VM_MAYBE_GUARD ++ + /* Arch-specific flags to clear when updating VM flags on protection change */ + #ifndef VM_ARCH_CLEAR + # define VM_ARCH_CLEAR VM_NONE +@@ -843,6 +846,47 @@ static inline void vm_flags_mod(struct v + __vm_flags_mod(vma, set, clear); + } + ++static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma, ++ int bit) ++{ ++ const vm_flags_t mask = BIT(bit); ++ ++ /* Only specific flags are permitted */ ++ if (WARN_ON_ONCE(!(mask & VM_ATOMIC_SET_ALLOWED))) ++ return false; ++ ++ return true; ++} ++ ++/* ++ * Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific ++ * valid flags are allowed to do this. ++ */ ++static inline void vma_flag_set_atomic(struct vm_area_struct *vma, int bit) ++{ ++ /* mmap read lock/VMA read lock must be held. */ ++ if (!rwsem_is_locked(&vma->vm_mm->mmap_lock)) ++ vma_assert_locked(vma); ++ ++ if (__vma_flag_atomic_valid(vma, bit)) ++ set_bit(bit, &ACCESS_PRIVATE(vma, __vm_flags)); ++} ++ ++/* ++ * Test for VMA flag atomically. Requires no locks. Only specific valid flags ++ * are allowed to do this. ++ * ++ * This is necessarily racey, so callers must ensure that serialisation is ++ * achieved through some other means, or that races are permissible. ++ */ ++static inline bool vma_flag_test_atomic(struct vm_area_struct *vma, int bit) ++{ ++ if (__vma_flag_atomic_valid(vma, bit)) ++ return test_bit(bit, &vma->vm_flags); ++ ++ return false; ++} ++ + static inline void vma_set_anonymous(struct vm_area_struct *vma) + { + vma->vm_ops = NULL; diff --git a/queue-6.18/mm-implement-sticky-vma-flags.patch b/queue-6.18/mm-implement-sticky-vma-flags.patch new file mode 100644 index 0000000000..96777d4fcc --- /dev/null +++ b/queue-6.18/mm-implement-sticky-vma-flags.patch @@ -0,0 +1,276 @@ +From stable+bounces-247751-greg=kroah.com@vger.kernel.org Fri May 15 14:05:45 2026 +From: Ahmed Elaidy +Date: Fri, 15 May 2026 15:42:14 +0300 +Subject: mm: implement sticky VMA flags +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes , Pedro Falcato , Vlastimil Babka , Baolin Wang , Barry Song , "David Hildenbrand (Red Hat)" , Dev Jain , Jann Horn , Jonathan Corbet , Lance Yang , Liam Howlett , "Masami Hiramatsu (Google)" , Mathieu Desnoyers , Michal Hocko , Mike Rapoport , Nico Pache , Ryan Roberts , Steven Rostedt , Suren Baghdasaryan , Zi Yan , Ahmed Elaidy +Message-ID: <20260515124218.151966-6-elaidya225@gmail.com> + +From: Lorenzo Stoakes + +commit 64212ba02e66e705cabce188453ba4e61e9d7325 upstream. + +It is useful to be able to designate that certain flags are 'sticky', that +is, if two VMAs are merged one with a flag of this nature and one without, +the merged VMA sets this flag. + +As a result we ignore these flags for the purposes of determining VMA flag +differences between VMAs being considered for merge. + +This patch therefore updates the VMA merge logic to perform this action, +with flags possessing this property being described in the VM_STICKY +bitmap. + +Those flags which ought to be ignored for the purposes of VMA merge are +described in the VM_IGNORE_MERGE bitmap, which the VMA merge logic is also +updated to use. + +As part of this change we place VM_SOFTDIRTY in VM_IGNORE_MERGE as it +already had this behaviour, alongside VM_STICKY as sticky flags by +implication must not disallow merge. + +Ultimately it seems that we should make VM_SOFTDIRTY a sticky flag in its +own right, but this change is out of scope for this series. + +The only sticky flag designated as such is VM_MAYBE_GUARD, so as a result +of this change, once the VMA flag is set upon guard region installation, +VMAs with guard ranges will now not have their merge behaviour impacted as +a result and can be freely merged with other VMAs without VM_MAYBE_GUARD +set. + +Also update the comments for vma_modify_flags() to directly reference +sticky flags now we have established the concept. + +We also update the VMA userland tests to account for the changes. + +Link: https://lkml.kernel.org/r/22ad5269f7669d62afb42ce0c79bad70b994c58d.1763460113.git.ljs@kernel.org +Signed-off-by: Lorenzo Stoakes +Reviewed-by: Pedro Falcato +Reviewed-by: Vlastimil Babka +Cc: Andrei Vagin +Cc: Baolin Wang +Cc: Barry Song +Cc: David Hildenbrand (Red Hat) +Cc: Dev Jain +Cc: Jann Horn +Cc: Jonathan Corbet +Cc: Lance Yang +Cc: Liam Howlett +Cc: "Masami Hiramatsu (Google)" +Cc: Mathieu Desnoyers +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Nico Pache +Cc: Ryan Roberts +Cc: Steven Rostedt +Cc: Suren Baghdasaryan +Cc: Zi Yan +Signed-off-by: Andrew Morton +Signed-off-by: Ahmed Elaidy +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 28 ++++++++++++++++++++++++++++ + mm/vma.c | 31 +++++++++++++++++-------------- + mm/vma.h | 10 ++++------ + tools/testing/vma/vma_internal.h | 28 ++++++++++++++++++++++++++++ + 4 files changed, 77 insertions(+), 20 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -511,6 +511,34 @@ extern unsigned int kobjsize(const void + #define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR) + + /* ++ * Flags which should be 'sticky' on merge - that is, flags which, when one VMA ++ * possesses it but the other does not, the merged VMA should nonetheless have ++ * applied to it: ++ * ++ * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that ++ * mapped page tables may contain metadata not described by the ++ * VMA and thus any merged VMA may also contain this metadata, ++ * and thus we must make this flag sticky. ++ */ ++#define VM_STICKY VM_MAYBE_GUARD ++ ++/* ++ * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one ++ * of these flags and the other not does not preclude a merge. ++ * ++ * VM_SOFTDIRTY - Should not prevent from VMA merging, if we match the flags but ++ * dirty bit -- the caller should mark merged VMA as dirty. If ++ * dirty bit won't be excluded from comparison, we increase ++ * pressure on the memory system forcing the kernel to generate ++ * new VMAs when old one could be extended instead. ++ * ++ * VM_STICKY - When merging VMAs, VMA flags must match, unless they are ++ * 'sticky'. If any sticky flags exist in either VMA, we simply ++ * set all of them on the merged VMA. ++ */ ++#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY) ++ ++/* + * mapping from the currently active vm_flags protection bits (the + * low four bits) to a page protection mask.. + */ +--- a/mm/vma.c ++++ b/mm/vma.c +@@ -82,15 +82,7 @@ static inline bool is_mergeable_vma(stru + + if (!mpol_equal(vmg->policy, vma_policy(vma))) + return false; +- /* +- * VM_SOFTDIRTY should not prevent from VMA merging, if we +- * match the flags but dirty bit -- the caller should mark +- * merged VMA as dirty. If dirty bit won't be excluded from +- * comparison, we increase pressure on the memory system forcing +- * the kernel to generate new VMAs when old one could be +- * extended instead. +- */ +- if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_SOFTDIRTY) ++ if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_IGNORE_MERGE) + return false; + if (vma->vm_file != vmg->file) + return false; +@@ -810,6 +802,7 @@ static bool can_merge_remove_vma(struct + static __must_check struct vm_area_struct *vma_merge_existing_range( + struct vma_merge_struct *vmg) + { ++ vm_flags_t sticky_flags = vmg->vm_flags & VM_STICKY; + struct vm_area_struct *middle = vmg->middle; + struct vm_area_struct *prev = vmg->prev; + struct vm_area_struct *next; +@@ -904,11 +897,13 @@ static __must_check struct vm_area_struc + if (merge_right) { + vma_start_write(next); + vmg->target = next; ++ sticky_flags |= (next->vm_flags & VM_STICKY); + } + + if (merge_left) { + vma_start_write(prev); + vmg->target = prev; ++ sticky_flags |= (prev->vm_flags & VM_STICKY); + } + + if (merge_both) { +@@ -978,6 +973,7 @@ static __must_check struct vm_area_struc + if (err || commit_merge(vmg)) + goto abort; + ++ vm_flags_set(vmg->target, sticky_flags); + khugepaged_enter_vma(vmg->target, vmg->vm_flags); + vmg->state = VMA_MERGE_SUCCESS; + return vmg->target; +@@ -1156,14 +1152,20 @@ int vma_expand(struct vma_merge_struct * + struct vm_area_struct *target = vmg->target; + struct vm_area_struct *next = vmg->next; + int ret = 0; ++ vm_flags_t sticky_flags; ++ ++ sticky_flags = vmg->vm_flags & VM_STICKY; ++ sticky_flags |= target->vm_flags & VM_STICKY; + + VM_WARN_ON_VMG(!target, vmg); + + mmap_assert_write_locked(vmg->mm); + vma_start_write(target); + +- if (next && target != next && vmg->end == next->vm_end) ++ if (next && target != next && vmg->end == next->vm_end) { ++ sticky_flags |= next->vm_flags & VM_STICKY; + remove_next = true; ++ } + + /* We must have a target. */ + VM_WARN_ON_VMG(!target, vmg); +@@ -1197,6 +1199,7 @@ int vma_expand(struct vma_merge_struct * + if (commit_merge(vmg)) + goto nomem; + ++ vm_flags_set(target, sticky_flags); + return 0; + + nomem: +@@ -1692,9 +1695,9 @@ struct vm_area_struct *vma_modify_flags( + return ret; + + /* +- * For a merge to succeed, the flags must match those requested. For +- * flags which do not obey typical merge rules (i.e. do not need to +- * match), we must let the caller know about them. ++ * For a merge to succeed, the flags must match those ++ * requested. However, sticky flags may have been retained, so propagate ++ * them to the caller. + */ + if (vmg.state == VMA_MERGE_SUCCESS) + *vm_flags_ptr = ret->vm_flags; +@@ -1959,7 +1962,7 @@ static int anon_vma_compatible(struct vm + return a->vm_end == b->vm_start && + mpol_equal(vma_policy(a), vma_policy(b)) && + a->vm_file == b->vm_file && +- !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) && ++ !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_IGNORE_MERGE)) && + b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); + } + +--- a/mm/vma.h ++++ b/mm/vma.h +@@ -276,17 +276,15 @@ void unmap_region(struct ma_state *mas, + * @start: The start of the range to update. May be offset within @vma. + * @end: The exclusive end of the range to update, may be offset within @vma. + * @vm_flags_ptr: A pointer to the VMA flags that the @start to @end range is +- * about to be set to. On merge, this will be updated to include any additional +- * flags which remain in place. ++ * about to be set to. On merge, this will be updated to include sticky flags. + * + * IMPORTANT: The actual modification being requested here is NOT applied, + * rather the VMA is perhaps split, perhaps merged to accommodate the change, + * and the caller is expected to perform the actual modification. + * +- * In order to account for VMA flags which may persist (e.g. soft-dirty), the +- * @vm_flags_ptr parameter points to the requested flags which are then updated +- * so the caller, should they overwrite any existing flags, correctly retains +- * these. ++ * In order to account for sticky VMA flags, the @vm_flags_ptr parameter points ++ * to the requested flags which are then updated so the caller, should they ++ * overwrite any existing flags, correctly retains these. + * + * Returns: A VMA which contains the range @start to @end ready to have its + * flags altered to *@vm_flags. +--- a/tools/testing/vma/vma_internal.h ++++ b/tools/testing/vma/vma_internal.h +@@ -117,6 +117,34 @@ extern unsigned long dac_mmap_min_addr; + #define VM_SEALED VM_NONE + #endif + ++/* ++ * Flags which should be 'sticky' on merge - that is, flags which, when one VMA ++ * possesses it but the other does not, the merged VMA should nonetheless have ++ * applied to it: ++ * ++ * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that ++ * mapped page tables may contain metadata not described by the ++ * VMA and thus any merged VMA may also contain this metadata, ++ * and thus we must make this flag sticky. ++ */ ++#define VM_STICKY VM_MAYBE_GUARD ++ ++/* ++ * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one ++ * of these flags and the other not does not preclude a merge. ++ * ++ * VM_SOFTDIRTY - Should not prevent from VMA merging, if we match the flags but ++ * dirty bit -- the caller should mark merged VMA as dirty. If ++ * dirty bit won't be excluded from comparison, we increase ++ * pressure on the memory system forcing the kernel to generate ++ * new VMAs when old one could be extended instead. ++ * ++ * VM_STICKY - When merging VMAs, VMA flags must match, unless they are ++ * 'sticky'. If any sticky flags exist in either VMA, we simply ++ * set all of them on the merged VMA. ++ */ ++#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY) ++ + #define FIRST_USER_ADDRESS 0UL + #define USER_PGTABLES_CEILING 0UL + diff --git a/queue-6.18/mm-introduce-copy-on-fork-vmas-and-make-vm_maybe_guard-one.patch b/queue-6.18/mm-introduce-copy-on-fork-vmas-and-make-vm_maybe_guard-one.patch new file mode 100644 index 0000000000..1360536ff4 --- /dev/null +++ b/queue-6.18/mm-introduce-copy-on-fork-vmas-and-make-vm_maybe_guard-one.patch @@ -0,0 +1,166 @@ +From stable+bounces-247752-greg=kroah.com@vger.kernel.org Fri May 15 14:05:45 2026 +From: Ahmed Elaidy +Date: Fri, 15 May 2026 15:42:15 +0300 +Subject: mm: introduce copy-on-fork VMAs and make VM_MAYBE_GUARD one +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes , Pedro Falcato , Vlastimil Babka , "David Hildenbrand (Red Hat)" , Baolin Wang , Barry Song , Dev Jain , Jann Horn , Jonathan Corbet , Lance Yang , Liam Howlett , "Masami Hiramatsu (Google)" , Mathieu Desnoyers , Michal Hocko , Mike Rapoport , Nico Pache , Ryan Roberts , Steven Rostedt , Suren Baghdasaryan , Zi Yan , Ahmed Elaidy +Message-ID: <20260515124218.151966-7-elaidya225@gmail.com> + +From: Lorenzo Stoakes + +commit ab04b530e7e8bd5cf9fb0c1ad20e0deee8f569ec upstream. + +Gather all the VMA flags whose presence implies that page tables must be +copied on fork into a single bitmap - VM_COPY_ON_FORK - and use this +rather than specifying individual flags in vma_needs_copy(). + +We also add VM_MAYBE_GUARD to this list, as it being set on a VMA implies +that there may be metadata contained in the page tables (that is - guard +markers) which would will not and cannot be propagated upon fork. + +This was already being done manually previously in vma_needs_copy(), but +this makes it very explicit, alongside VM_PFNMAP, VM_MIXEDMAP and +VM_UFFD_WP all of which imply the same. + +Note that VM_STICKY flags ought generally to be marked VM_COPY_ON_FORK too +- because equally a flag being VM_STICKY indicates that the VMA contains +metadat that is not propagated by being faulted in - i.e. that the VMA +metadata does not fully describe the VMA alone, and thus we must propagate +whatever metadata there is on a fork. + +However, for maximum flexibility, we do not make this necessarily the case +here. + +Link: https://lkml.kernel.org/r/5d41b24e7bc622cda0af92b6d558d7f4c0d1bc8c.1763460113.git.ljs@kernel.org +Signed-off-by: Lorenzo Stoakes +Reviewed-by: Pedro Falcato +Reviewed-by: Vlastimil Babka +Acked-by: David Hildenbrand (Red Hat) +Cc: Andrei Vagin +Cc: Baolin Wang +Cc: Barry Song +Cc: Dev Jain +Cc: Jann Horn +Cc: Jonathan Corbet +Cc: Lance Yang +Cc: Liam Howlett +Cc: "Masami Hiramatsu (Google)" +Cc: Mathieu Desnoyers +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Nico Pache +Cc: Ryan Roberts +Cc: Steven Rostedt +Cc: Suren Baghdasaryan +Cc: Zi Yan +Signed-off-by: Andrew Morton +Signed-off-by: Ahmed Elaidy +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 26 ++++++++++++++++++++++++++ + mm/memory.c | 18 ++++-------------- + tools/testing/vma/vma_internal.h | 26 ++++++++++++++++++++++++++ + 3 files changed, 56 insertions(+), 14 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -539,6 +539,32 @@ extern unsigned int kobjsize(const void + #define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY) + + /* ++ * Flags which should result in page tables being copied on fork. These are ++ * flags which indicate that the VMA maps page tables which cannot be ++ * reconsistuted upon page fault, so necessitate page table copying upon ++ * ++ * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be ++ * reasonably reconstructed on page fault. ++ * ++ * VM_UFFD_WP - Encodes metadata about an installed uffd ++ * write protect handler, which cannot be ++ * reconstructed on page fault. ++ * ++ * We always copy pgtables when dst_vma has uffd-wp ++ * enabled even if it's file-backed ++ * (e.g. shmem). Because when uffd-wp is enabled, ++ * pgtable contains uffd-wp protection information, ++ * that's something we can't retrieve from page cache, ++ * and skip copying will lose those info. ++ * ++ * VM_MAYBE_GUARD - Could contain page guard region markers which ++ * by design are a property of the page tables ++ * only and thus cannot be reconstructed on page ++ * fault. ++ */ ++#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD) ++ ++/* + * mapping from the currently active vm_flags protection bits (the + * low four bits) to a page protection mask.. + */ +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1479,25 +1479,15 @@ copy_p4d_range(struct vm_area_struct *ds + static bool + vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) + { ++ if (src_vma->vm_flags & VM_COPY_ON_FORK) ++ return true; + /* +- * Always copy pgtables when dst_vma has uffd-wp enabled even if it's +- * file-backed (e.g. shmem). Because when uffd-wp is enabled, pgtable +- * contains uffd-wp protection information, that's something we can't +- * retrieve from page cache, and skip copying will lose those info. ++ * The presence of an anon_vma indicates an anonymous VMA has page ++ * tables which naturally cannot be reconstituted on page fault. + */ +- if (userfaultfd_wp(dst_vma)) +- return true; +- +- if (src_vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) +- return true; +- + if (src_vma->anon_vma) + return true; + +- /* Guard regions have modified page tables that require copying. */ +- if (src_vma->vm_flags & VM_MAYBE_GUARD) +- return true; +- + /* + * Don't copy ptes where a page fault will fill them correctly. Fork + * becomes much lighter when there are big shared or private readonly +--- a/tools/testing/vma/vma_internal.h ++++ b/tools/testing/vma/vma_internal.h +@@ -145,6 +145,32 @@ extern unsigned long dac_mmap_min_addr; + */ + #define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY) + ++/* ++ * Flags which should result in page tables being copied on fork. These are ++ * flags which indicate that the VMA maps page tables which cannot be ++ * reconsistuted upon page fault, so necessitate page table copying upon ++ * ++ * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be ++ * reasonably reconstructed on page fault. ++ * ++ * VM_UFFD_WP - Encodes metadata about an installed uffd ++ * write protect handler, which cannot be ++ * reconstructed on page fault. ++ * ++ * We always copy pgtables when dst_vma has uffd-wp ++ * enabled even if it's file-backed ++ * (e.g. shmem). Because when uffd-wp is enabled, ++ * pgtable contains uffd-wp protection information, ++ * that's something we can't retrieve from page cache, ++ * and skip copying will lose those info. ++ * ++ * VM_MAYBE_GUARD - Could contain page guard region markers which ++ * by design are a property of the page tables ++ * only and thus cannot be reconstructed on page ++ * fault. ++ */ ++#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD) ++ + #define FIRST_USER_ADDRESS 0UL + #define USER_PGTABLES_CEILING 0UL + diff --git a/queue-6.18/mm-introduce-vm_maybe_guard-and-make-visible-in-proc-pid-smaps.patch b/queue-6.18/mm-introduce-vm_maybe_guard-and-make-visible-in-proc-pid-smaps.patch new file mode 100644 index 0000000000..3fb657aca6 --- /dev/null +++ b/queue-6.18/mm-introduce-vm_maybe_guard-and-make-visible-in-proc-pid-smaps.patch @@ -0,0 +1,220 @@ +From stable+bounces-247748-greg=kroah.com@vger.kernel.org Fri May 15 14:05:27 2026 +From: Ahmed Elaidy +Date: Fri, 15 May 2026 15:42:11 +0300 +Subject: mm: introduce VM_MAYBE_GUARD and make visible in /proc/$pid/smaps +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes , Pedro Falcato , Vlastimil Babka , "David Hildenbrand (Red Hat)" , Lance Yang , Baolin Wang , Barry Song , Dev Jain , Jann Horn , Jonathan Corbet , Liam Howlett , "Masami Hiramatsu (Google)" , Mathieu Desnoyers , Michal Hocko , Mike Rapoport , Nico Pache , Ryan Roberts , Steven Rostedt , Suren Baghdasaryan , Zi Yan , Ahmed Elaidy +Message-ID: <20260515124218.151966-3-elaidya225@gmail.com> + +From: Lorenzo Stoakes + +commit 5dba5cc2e0ffa76f2f6c8922a04469dc9602c396 upstream. + +Patch series "introduce VM_MAYBE_GUARD and make it sticky", v4. + +Currently, guard regions are not visible to users except through +/proc/$pid/pagemap, with no explicit visibility at the VMA level. + +This makes the feature less useful, as it isn't entirely apparent which +VMAs may have these entries present, especially when performing actions +which walk through memory regions such as those performed by CRIU. + +This series addresses this issue by introducing the VM_MAYBE_GUARD flag +which fulfils this role, updating the smaps logic to display an entry for +these. + +The semantics of this flag are that a guard region MAY be present if set +(we cannot be sure, as we can't efficiently track whether an +MADV_GUARD_REMOVE finally removes all the guard regions in a VMA) - but if +not set the VMA definitely does NOT have any guard regions present. + +It's problematic to establish this flag without further action, because +that means that VMAs with guard regions in them become non-mergeable with +adjacent VMAs for no especially good reason. + +To work around this, this series also introduces the concept of 'sticky' +VMA flags - that is flags which: + +a. if set in one VMA and not in another still permit those VMAs to be + merged (if otherwise compatible). + +b. When they are merged, the resultant VMA must have the flag set. + +The VMA logic is updated to propagate these flags correctly. + +Additionally, VM_MAYBE_GUARD being an explicit VMA flag allows us to solve +an issue with file-backed guard regions - previously these established an +anon_vma object for file-backed mappings solely to have vma_needs_copy() +correctly propagate guard region mappings to child processes. + +We introduce a new flag alias VM_COPY_ON_FORK (which currently only +specifies VM_MAYBE_GUARD) and update vma_needs_copy() to check explicitly +for this flag and to copy page tables if it is present, which resolves +this issue. + +Additionally, we add the ability for allow-listed VMA flags to be +atomically writable with only mmap/VMA read locks held. + +The only flag we allow so far is VM_MAYBE_GUARD, which we carefully ensure +does not cause any races by being allowed to do so. + +This allows us to maintain guard region installation as a read-locked +operation and not endure the overhead of obtaining a write lock here. + +Finally we introduce extensive VMA userland tests to assert that the +sticky VMA logic behaves correctly as well as guard region self tests to +assert that smaps visibility is correctly implemented. + +This patch (of 9): + +Currently, if a user needs to determine if guard regions are present in a +range, they have to scan all VMAs (or have knowledge of which ones might +have guard regions). + +Since commit 8e2f2aeb8b48 ("fs/proc/task_mmu: add guard region bit to +pagemap") and the related commit a516403787e0 ("fs/proc: extend the +PAGEMAP_SCAN ioctl to report guard regions"), users can use either +/proc/$pid/pagemap or the PAGEMAP_SCAN functionality to perform this +operation at a virtual address level. + +This is not ideal, and it gives no visibility at a /proc/$pid/smaps level +that guard regions exist in ranges. + +This patch remedies the situation by establishing a new VMA flag, +VM_MAYBE_GUARD, to indicate that a VMA may contain guard regions (it is +uncertain because we cannot reasonably determine whether a +MADV_GUARD_REMOVE call has removed all of the guard regions in a VMA, and +additionally VMAs may change across merge/split). + +We utilise 0x800 for this flag which makes it available to 32-bit +architectures also, a flag that was previously used by VM_DENYWRITE, which +was removed in commit 8d0920bde5eb ("mm: remove VM_DENYWRITE") and hasn't +bee reused yet. + +We also update the smaps logic and documentation to identify these VMAs. + +Another major use of this functionality is that we can use it to identify +that we ought to copy page tables on fork. + +We do not actually implement usage of this flag in mm/madvise.c yet as we +need to allow some VMA flags to be applied atomically under mmap/VMA read +lock in order to avoid the need to acquire a write lock for this purpose. + +Link: https://lkml.kernel.org/r/cover.1763460113.git.ljs@kernel.org +Link: https://lkml.kernel.org/r/cf8ef821eba29b6c5b5e138fffe95d6dcabdedb9.1763460113.git.ljs@kernel.org +Signed-off-by: Lorenzo Stoakes +Reviewed-by: Pedro Falcato +Reviewed-by: Vlastimil Babka +Acked-by: David Hildenbrand (Red Hat) +Reviewed-by: Lance Yang +Cc: Andrei Vagin +Cc: Baolin Wang +Cc: Barry Song +Cc: Dev Jain +Cc: Jann Horn +Cc: Jonathan Corbet +Cc: Liam Howlett +Cc: "Masami Hiramatsu (Google)" +Cc: Mathieu Desnoyers +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Nico Pache +Cc: Ryan Roberts +Cc: Steven Rostedt +Cc: Suren Baghdasaryan +Cc: Zi Yan +Signed-off-by: Andrew Morton +Signed-off-by: Ahmed Elaidy +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/filesystems/proc.rst | 5 +++-- + fs/proc/task_mmu.c | 1 + + include/linux/mm.h | 3 +++ + include/trace/events/mmflags.h | 1 + + mm/memory.c | 4 ++++ + tools/testing/vma/vma_internal.h | 1 + + 6 files changed, 13 insertions(+), 2 deletions(-) + +--- a/Documentation/filesystems/proc.rst ++++ b/Documentation/filesystems/proc.rst +@@ -553,7 +553,7 @@ otherwise. + kernel flags associated with the particular virtual memory area in two letter + encoded manner. The codes are the following: + +- == ======================================= ++ == ============================================================= + rd readable + wr writeable + ex executable +@@ -591,7 +591,8 @@ encoded manner. The codes are the follow + sl sealed + lf lock on fault pages + dp always lazily freeable mapping +- == ======================================= ++ gu maybe contains guard regions (if not set, definitely doesn't) ++ == ============================================================= + + Note that there is no guarantee that every flag and associated mnemonic will + be present in all further kernel releases. Things get changed, the flags may +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -1159,6 +1159,7 @@ static void show_smap_vma_flags(struct s + [ilog2(VM_MAYSHARE)] = "ms", + [ilog2(VM_GROWSDOWN)] = "gd", + [ilog2(VM_PFNMAP)] = "pf", ++ [ilog2(VM_MAYBE_GUARD)] = "gu", + [ilog2(VM_LOCKED)] = "lo", + [ilog2(VM_IO)] = "io", + [ilog2(VM_SEQ_READ)] = "sr", +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -269,6 +269,8 @@ extern struct rw_semaphore nommu_region_ + extern unsigned int kobjsize(const void *objp); + #endif + ++#define VM_MAYBE_GUARD_BIT 11 ++ + /* + * vm_flags in vm_area_struct, see mm_types.h. + * When changing, update also include/trace/events/mmflags.h +@@ -294,6 +296,7 @@ extern unsigned int kobjsize(const void + #define VM_UFFD_MISSING 0 + #endif /* CONFIG_MMU */ + #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ ++#define VM_MAYBE_GUARD BIT(VM_MAYBE_GUARD_BIT) /* The VMA maybe contains guard regions. */ + #define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */ + + #define VM_LOCKED 0x00002000 +--- a/include/trace/events/mmflags.h ++++ b/include/trace/events/mmflags.h +@@ -213,6 +213,7 @@ IF_HAVE_PG_ARCH_3(arch_3) + {VM_UFFD_MISSING, "uffd_missing" }, \ + IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \ + {VM_PFNMAP, "pfnmap" }, \ ++ {VM_MAYBE_GUARD, "maybe_guard" }, \ + {VM_UFFD_WP, "uffd_wp" }, \ + {VM_LOCKED, "locked" }, \ + {VM_IO, "io" }, \ +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1494,6 +1494,10 @@ vma_needs_copy(struct vm_area_struct *ds + if (src_vma->anon_vma) + return true; + ++ /* Guard regions have modified page tables that require copying. */ ++ if (src_vma->vm_flags & VM_MAYBE_GUARD) ++ return true; ++ + /* + * Don't copy ptes where a page fault will fill them correctly. Fork + * becomes much lighter when there are big shared or private readonly +--- a/tools/testing/vma/vma_internal.h ++++ b/tools/testing/vma/vma_internal.h +@@ -56,6 +56,7 @@ extern unsigned long dac_mmap_min_addr; + #define VM_MAYEXEC 0x00000040 + #define VM_GROWSDOWN 0x00000100 + #define VM_PFNMAP 0x00000400 ++#define VM_MAYBE_GUARD 0x00000800 + #define VM_LOCKED 0x00002000 + #define VM_IO 0x00004000 + #define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ diff --git a/queue-6.18/mm-propagate-vm_softdirty-on-merge.patch b/queue-6.18/mm-propagate-vm_softdirty-on-merge.patch new file mode 100644 index 0000000000..a341096671 --- /dev/null +++ b/queue-6.18/mm-propagate-vm_softdirty-on-merge.patch @@ -0,0 +1,148 @@ +From stable+bounces-247755-greg=kroah.com@vger.kernel.org Fri May 15 14:06:04 2026 +From: Ahmed Elaidy +Date: Fri, 15 May 2026 15:42:18 +0300 +Subject: mm: propagate VM_SOFTDIRTY on merge +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes , Vlastimil Babka , "David Hildenbrand (Red Hat)" , Pedro Falcato , Cyrill Gorcunov , Jann Horn , Liam Howlett , Michal Hocko , Mike Rapoport , Suren Baghdasaryan , Ahmed Elaidy +Message-ID: <20260515124218.151966-10-elaidya225@gmail.com> + +From: Lorenzo Stoakes + +commit 6707915e030a3258868355f989b80140c1a45bbe upstream. + +Patch series "make VM_SOFTDIRTY a sticky VMA flag", v2. + +Currently we set VM_SOFTDIRTY when a new mapping is set up (whether by +establishing a new VMA, or via merge) as implemented in __mmap_complete() +and do_brk_flags(). + +However, when performing a merge of existing mappings such as when +performing mprotect(), we may lose the VM_SOFTDIRTY flag. + +Now we have the concept of making VMA flags 'sticky', that is that they +both don't prevent merge and, importantly, are propagated to merged VMAs, +this seems a sensible alternative to the existing special-casing of +VM_SOFTDIRTY. + +We additionally add a self-test that demonstrates that this logic behaves +as expected. + +This patch (of 2): + +Currently we set VM_SOFTDIRTY when a new mapping is set up (whether by +establishing a new VMA, or via merge) as implemented in __mmap_complete() +and do_brk_flags(). + +However, when performing a merge of existing mappings such as when +performing mprotect(), we may lose the VM_SOFTDIRTY flag. + +This is because currently we simply ignore VM_SOFTDIRTY for the purposes +of merge, so one VMA may possess the flag and another not, and whichever +happens to be the target VMA will be the one upon which the merge is +performed which may or may not have VM_SOFTDIRTY set. + +Now we have the concept of 'sticky' VMA flags, let's make VM_SOFTDIRTY one +which solves this issue. + +Additionally update VMA userland tests to propagate changes. + +[akpm@linux-foundation.org: update comments, per Lorenzo] + Link: https://lkml.kernel.org/r/0019e0b8-ee1e-4359-b5ee-94225cbe5588@lucifer.local +Link: https://lkml.kernel.org/r/cover.1763399675.git.ljs@kernel.org +Link: https://lkml.kernel.org/r/955478b5170715c895d1ef3b7f68e0cd77f76868.1763399675.git.ljs@kernel.org +Signed-off-by: Lorenzo Stoakes +Suggested-by: Vlastimil Babka +Acked-by: David Hildenbrand (Red Hat) +Reviewed-by: Pedro Falcato +Acked-by: Andrey Vagin +Reviewed-by: Vlastimil Babka +Acked-by: Cyrill Gorcunov +Cc: Jann Horn +Cc: Liam Howlett +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Suren Baghdasaryan +Signed-off-by: Andrew Morton +Signed-off-by: Ahmed Elaidy +Fixes: 34228d473efe ("mm: ignore VM_SOFTDIRTY on VMA merging") +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 15 +++++++-------- + tools/testing/vma/vma_internal.h | 18 ++++++------------ + 2 files changed, 13 insertions(+), 20 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -515,28 +515,27 @@ extern unsigned int kobjsize(const void + * possesses it but the other does not, the merged VMA should nonetheless have + * applied to it: + * ++ * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its ++ * references cleared via /proc/$pid/clear_refs, any merged VMA ++ * should be considered soft-dirty also as it operates at a VMA ++ * granularity. ++ * + * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that + * mapped page tables may contain metadata not described by the + * VMA and thus any merged VMA may also contain this metadata, + * and thus we must make this flag sticky. + */ +-#define VM_STICKY VM_MAYBE_GUARD ++#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD) + + /* + * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one + * of these flags and the other not does not preclude a merge. + * +- * VM_SOFTDIRTY - Should not prevent from VMA merging, if we match the flags but +- * dirty bit -- the caller should mark merged VMA as dirty. If +- * dirty bit won't be excluded from comparison, we increase +- * pressure on the memory system forcing the kernel to generate +- * new VMAs when old one could be extended instead. +- * + * VM_STICKY - When merging VMAs, VMA flags must match, unless they are + * 'sticky'. If any sticky flags exist in either VMA, we simply + * set all of them on the merged VMA. + */ +-#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY) ++#define VM_IGNORE_MERGE VM_STICKY + + /* + * Flags which should result in page tables being copied on fork. These are +--- a/tools/testing/vma/vma_internal.h ++++ b/tools/testing/vma/vma_internal.h +@@ -122,28 +122,22 @@ extern unsigned long dac_mmap_min_addr; + * possesses it but the other does not, the merged VMA should nonetheless have + * applied to it: + * +- * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that +- * mapped page tables may contain metadata not described by the +- * VMA and thus any merged VMA may also contain this metadata, +- * and thus we must make this flag sticky. ++ * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its ++ * references cleared via /proc/$pid/clear_refs, any merged VMA ++ * should be considered soft-dirty also as it operates at a VMA ++ * granularity. + */ +-#define VM_STICKY VM_MAYBE_GUARD ++#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD) + + /* + * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one + * of these flags and the other not does not preclude a merge. + * +- * VM_SOFTDIRTY - Should not prevent from VMA merging, if we match the flags but +- * dirty bit -- the caller should mark merged VMA as dirty. If +- * dirty bit won't be excluded from comparison, we increase +- * pressure on the memory system forcing the kernel to generate +- * new VMAs when old one could be extended instead. +- * + * VM_STICKY - When merging VMAs, VMA flags must match, unless they are + * 'sticky'. If any sticky flags exist in either VMA, we simply + * set all of them on the merged VMA. + */ +-#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY) ++#define VM_IGNORE_MERGE VM_STICKY + + /* + * Flags which should result in page tables being copied on fork. These are diff --git a/queue-6.18/mm-set-the-vm_maybe_guard-flag-on-guard-region-install.patch b/queue-6.18/mm-set-the-vm_maybe_guard-flag-on-guard-region-install.patch new file mode 100644 index 0000000000..cbfffd5572 --- /dev/null +++ b/queue-6.18/mm-set-the-vm_maybe_guard-flag-on-guard-region-install.patch @@ -0,0 +1,250 @@ +From stable+bounces-247753-greg=kroah.com@vger.kernel.org Fri May 15 14:05:57 2026 +From: Ahmed Elaidy +Date: Fri, 15 May 2026 15:42:16 +0300 +Subject: mm: set the VM_MAYBE_GUARD flag on guard region install +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes , Vlastimil Babka , Baolin Wang , Barry Song , "David Hildenbrand (Red Hat)" , Dev Jain , Jann Horn , Jonathan Corbet , Lance Yang , Liam Howlett , "Masami Hiramatsu (Google)" , Mathieu Desnoyers , Michal Hocko , Mike Rapoport , Nico Pache , Pedro Falcato , Ryan Roberts , Steven Rostedt , Suren Baghdasaryan , Zi Yan , Ahmed Elaidy +Message-ID: <20260515124218.151966-8-elaidya225@gmail.com> + +From: Lorenzo Stoakes + +commit 49e14dabed7a294427588d4b315f57fbfcab9990 upstream. + +Now we have established the VM_MAYBE_GUARD flag and added the capacity to +set it atomically, do so upon MADV_GUARD_INSTALL. + +The places where this flag is used currently and matter are: + +* VMA merge - performed under mmap/VMA write lock, therefore excluding + racing writes. + +* /proc/$pid/smaps - can race the write, however this isn't meaningful + as the flag write is performed at the point of the guard region being + established, and thus an smaps reader can't reasonably expect to avoid + races. Due to atomicity, a reader will observe either the flag being + set or not. Therefore consistency will be maintained. + +In all other cases the flag being set is irrelevant and atomicity +guarantees other flags will be read correctly. + +Note that non-atomic updates of unrelated flags do not cause an issue with +this flag being set atomically, as writes of other flags are performed +under mmap/VMA write lock, and these atomic writes are performed under +mmap/VMA read lock, which excludes the write, avoiding RMW races. + +Note that we do not encounter issues with KCSAN by adjusting this flag +atomically, as we are only updating a single bit in the flag bitmap and +therefore we do not need to annotate these changes. + +We intentionally set this flag in advance of actually updating the page +tables, to ensure that any racing atomic read of this flag will only +return false prior to page tables being updated, to allow for +serialisation via page table locks. + +Note that we set vma->anon_vma for anonymous mappings. This is because +the expectation for anonymous mappings is that an anon_vma is established +should they possess any page table mappings. This is also consistent with +what we were doing prior to this patch (unconditionally setting anon_vma +on guard region installation). + +We also need to update retract_page_tables() to ensure that madvise(..., +MADV_COLLAPSE) doesn't incorrectly collapse file-backed ranges contain +guard regions. + +This was previously guarded by anon_vma being set to catch MAP_PRIVATE +cases, but the introduction of VM_MAYBE_GUARD necessitates that we check +this flag instead. + +We utilise vma_flag_test_atomic() to do so - we first perform an +optimistic check, then after the PTE page table lock is held, we can check +again safely, as upon guard marker install the flag is set atomically +prior to the page table lock being taken to actually apply it. + +So if the initial check fails either: + +* Page table retraction acquires page table lock prior to VM_MAYBE_GUARD + being set - guard marker installation will be blocked until page table + retraction is complete. + +OR: + +* Guard marker installation acquires page table lock after setting + VM_MAYBE_GUARD, which raced and didn't pick this up in the initial + optimistic check, blocking page table retraction until the guard regions + are installed - the second VM_MAYBE_GUARD check will prevent page table + retraction. + +Either way we're safe. + +We refactor the retraction checks into a single +file_backed_vma_is_retractable(), there doesn't seem to be any reason that +the checks were separated as before. + +Note that VM_MAYBE_GUARD being set atomically remains correct as +vma_needs_copy() is invoked with the mmap and VMA write locks held, +excluding any race with madvise_guard_install(). + +Link: https://lkml.kernel.org/r/e9e9ce95b6ac17497de7f60fc110c7dd9e489e8d.1763460113.git.ljs@kernel.org +Signed-off-by: Lorenzo Stoakes +Reviewed-by: Vlastimil Babka +Cc: Andrei Vagin +Cc: Baolin Wang +Cc: Barry Song +Cc: David Hildenbrand (Red Hat) +Cc: Dev Jain +Cc: Jann Horn +Cc: Jonathan Corbet +Cc: Lance Yang +Cc: Liam Howlett +Cc: "Masami Hiramatsu (Google)" +Cc: Mathieu Desnoyers +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Nico Pache +Cc: Pedro Falcato +Cc: Ryan Roberts +Cc: Steven Rostedt +Cc: Suren Baghdasaryan +Cc: Zi Yan +Signed-off-by: Andrew Morton +Signed-off-by: Ahmed Elaidy +Signed-off-by: Greg Kroah-Hartman +--- + mm/khugepaged.c | 71 +++++++++++++++++++++++++++++++++++++------------------- + mm/madvise.c | 22 +++++++++++------ + 2 files changed, 61 insertions(+), 32 deletions(-) + +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -1715,6 +1715,43 @@ drop_folio: + return result; + } + ++/* Can we retract page tables for this file-backed VMA? */ ++static bool file_backed_vma_is_retractable(struct vm_area_struct *vma) ++{ ++ /* ++ * Check vma->anon_vma to exclude MAP_PRIVATE mappings that ++ * got written to. These VMAs are likely not worth removing ++ * page tables from, as PMD-mapping is likely to be split later. ++ */ ++ if (READ_ONCE(vma->anon_vma)) ++ return false; ++ ++ /* ++ * When a vma is registered with uffd-wp, we cannot recycle ++ * the page table because there may be pte markers installed. ++ * Other vmas can still have the same file mapped hugely, but ++ * skip this one: it will always be mapped in small page size ++ * for uffd-wp registered ranges. ++ */ ++ if (userfaultfd_wp(vma)) ++ return false; ++ ++ /* ++ * If the VMA contains guard regions then we can't collapse it. ++ * ++ * This is set atomically on guard marker installation under mmap/VMA ++ * read lock, and here we may not hold any VMA or mmap lock at all. ++ * ++ * This is therefore serialised on the PTE page table lock, which is ++ * obtained on guard region installation after the flag is set, so this ++ * check being performed under this lock excludes races. ++ */ ++ if (vma_flag_test_atomic(vma, VM_MAYBE_GUARD_BIT)) ++ return false; ++ ++ return true; ++} ++ + static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) + { + struct vm_area_struct *vma; +@@ -1729,14 +1766,6 @@ static void retract_page_tables(struct a + spinlock_t *ptl; + bool success = false; + +- /* +- * Check vma->anon_vma to exclude MAP_PRIVATE mappings that +- * got written to. These VMAs are likely not worth removing +- * page tables from, as PMD-mapping is likely to be split later. +- */ +- if (READ_ONCE(vma->anon_vma)) +- continue; +- + addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + if (addr & ~HPAGE_PMD_MASK || + vma->vm_end < addr + HPAGE_PMD_SIZE) +@@ -1748,14 +1777,8 @@ static void retract_page_tables(struct a + + if (hpage_collapse_test_exit(mm)) + continue; +- /* +- * When a vma is registered with uffd-wp, we cannot recycle +- * the page table because there may be pte markers installed. +- * Other vmas can still have the same file mapped hugely, but +- * skip this one: it will always be mapped in small page size +- * for uffd-wp registered ranges. +- */ +- if (userfaultfd_wp(vma)) ++ ++ if (!file_backed_vma_is_retractable(vma)) + continue; + + /* PTEs were notified when unmapped; but now for the PMD? */ +@@ -1782,15 +1805,15 @@ static void retract_page_tables(struct a + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); + + /* +- * Huge page lock is still held, so normally the page table +- * must remain empty; and we have already skipped anon_vma +- * and userfaultfd_wp() vmas. But since the mmap_lock is not +- * held, it is still possible for a racing userfaultfd_ioctl() +- * to have inserted ptes or markers. Now that we hold ptlock, +- * repeating the anon_vma check protects from one category, +- * and repeating the userfaultfd_wp() check from another. ++ * Huge page lock is still held, so normally the page table must ++ * remain empty; and we have already skipped anon_vma and ++ * userfaultfd_wp() vmas. But since the mmap_lock is not held, ++ * it is still possible for a racing userfaultfd_ioctl() or ++ * madvise() to have inserted ptes or markers. Now that we hold ++ * ptlock, repeating the retractable checks protects us from ++ * races against the prior checks. + */ +- if (likely(!vma->anon_vma && !userfaultfd_wp(vma))) { ++ if (likely(file_backed_vma_is_retractable(vma))) { + pgt_pmd = pmdp_collapse_flush(vma, addr, pmd); + pmdp_get_lockless_sync(); + success = true; +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -1141,15 +1141,21 @@ static long madvise_guard_install(struct + return -EINVAL; + + /* +- * If we install guard markers, then the range is no longer +- * empty from a page table perspective and therefore it's +- * appropriate to have an anon_vma. +- * +- * This ensures that on fork, we copy page tables correctly. ++ * Set atomically under read lock. All pertinent readers will need to ++ * acquire an mmap/VMA write lock to read it. All remaining readers may ++ * or may not see the flag set, but we don't care. + */ +- err = anon_vma_prepare(vma); +- if (err) +- return err; ++ vma_flag_set_atomic(vma, VM_MAYBE_GUARD_BIT); ++ ++ /* ++ * If anonymous and we are establishing page tables the VMA ought to ++ * have an anon_vma associated with it. ++ */ ++ if (vma_is_anonymous(vma)) { ++ err = anon_vma_prepare(vma); ++ if (err) ++ return err; ++ } + + /* + * Optimistically try to install the guard marker pages first. If any diff --git a/queue-6.18/mm-update-vma_modify_flags-to-handle-residual-flags-document.patch b/queue-6.18/mm-update-vma_modify_flags-to-handle-residual-flags-document.patch new file mode 100644 index 0000000000..c2aff3080e --- /dev/null +++ b/queue-6.18/mm-update-vma_modify_flags-to-handle-residual-flags-document.patch @@ -0,0 +1,375 @@ +From stable+bounces-247750-greg=kroah.com@vger.kernel.org Fri May 15 14:05:39 2026 +From: Ahmed Elaidy +Date: Fri, 15 May 2026 15:42:13 +0300 +Subject: mm: update vma_modify_flags() to handle residual flags, document +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes , Pedro Falcato , Vlastimil Babka , Baolin Wang , Barry Song , "David Hildenbrand (Red Hat)" , Dev Jain , Jann Horn , Jonathan Corbet , Lance Yang , Liam Howlett , "Masami Hiramatsu (Google)" , Mathieu Desnoyers , Michal Hocko , Mike Rapoport , Nico Pache , Ryan Roberts , Steven Rostedt , Suren Baghdasaryan , Zi Yan , Ahmed Elaidy +Message-ID: <20260515124218.151966-5-elaidya225@gmail.com> + +From: Lorenzo Stoakes + +commit 9119d6c2095bb20292cb9812dd70d37f17e3bd37 upstream. + +The vma_modify_*() family of functions each either perform splits, a merge +or no changes at all in preparation for the requested modification to +occur. + +When doing so for a VMA flags change, we currently don't account for any +flags which may remain (for instance, VM_SOFTDIRTY) despite the requested +change in the case that a merge succeeded. + +This is made more important by subsequent patches which will introduce the +concept of sticky VMA flags which rely on this behaviour. + +This patch fixes this by passing the VMA flags parameter as a pointer and +updating it accordingly on merge and updating callers to accommodate for +this. + +Additionally, while we are here, we add kdocs for each of the +vma_modify_*() functions, as the fact that the requested modification is +not performed is confusing so it is useful to make this abundantly clear. + +We also update the VMA userland tests to account for this change. + +Link: https://lkml.kernel.org/r/23b5b549b0eaefb2922625626e58c2a352f3e93c.1763460113.git.ljs@kernel.org +Signed-off-by: Lorenzo Stoakes +Reviewed-by: Pedro Falcato +Reviewed-by: Vlastimil Babka +Cc: Andrei Vagin +Cc: Baolin Wang +Cc: Barry Song +Cc: David Hildenbrand (Red Hat) +Cc: Dev Jain +Cc: Jann Horn +Cc: Jonathan Corbet +Cc: Lance Yang +Cc: Liam Howlett +Cc: "Masami Hiramatsu (Google)" +Cc: Mathieu Desnoyers +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Nico Pache +Cc: Ryan Roberts +Cc: Steven Rostedt +Cc: Suren Baghdasaryan +Cc: Zi Yan +Signed-off-by: Andrew Morton +Signed-off-by: Ahmed Elaidy +Signed-off-by: Greg Kroah-Hartman +--- + mm/madvise.c | 2 + mm/mlock.c | 2 + mm/mprotect.c | 2 + mm/mseal.c | 7 +- + mm/vma.c | 56 ++++++++++--------- + mm/vma.h | 138 +++++++++++++++++++++++++++++++++++------------- + tools/testing/vma/vma.c | 3 - + 7 files changed, 142 insertions(+), 68 deletions(-) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -167,7 +167,7 @@ static int madvise_update_vma(vm_flags_t + range->start, range->end, anon_name); + else + vma = vma_modify_flags(&vmi, madv_behavior->prev, vma, +- range->start, range->end, new_flags); ++ range->start, range->end, &new_flags); + + if (IS_ERR(vma)) + return PTR_ERR(vma); +--- a/mm/mlock.c ++++ b/mm/mlock.c +@@ -480,7 +480,7 @@ static int mlock_fixup(struct vma_iterat + */ + goto out; + +- vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags); ++ vma = vma_modify_flags(vmi, *prev, vma, start, end, &newflags); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out; +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -813,7 +813,7 @@ mprotect_fixup(struct vma_iterator *vmi, + newflags &= ~VM_ACCOUNT; + } + +- vma = vma_modify_flags(vmi, *pprev, vma, start, end, newflags); ++ vma = vma_modify_flags(vmi, *pprev, vma, start, end, &newflags); + if (IS_ERR(vma)) { + error = PTR_ERR(vma); + goto fail; +--- a/mm/mseal.c ++++ b/mm/mseal.c +@@ -69,9 +69,10 @@ static int mseal_apply(struct mm_struct + const unsigned long curr_end = MIN(vma->vm_end, end); + + if (!(vma->vm_flags & VM_SEALED)) { +- vma = vma_modify_flags(&vmi, prev, vma, +- curr_start, curr_end, +- vma->vm_flags | VM_SEALED); ++ vm_flags_t vm_flags = vma->vm_flags | VM_SEALED; ++ ++ vma = vma_modify_flags(&vmi, prev, vma, curr_start, ++ curr_end, &vm_flags); + if (IS_ERR(vma)) + return PTR_ERR(vma); + vm_flags_set(vma, VM_SEALED); +--- a/mm/vma.c ++++ b/mm/vma.c +@@ -1676,25 +1676,35 @@ static struct vm_area_struct *vma_modify + return vma; + } + +-struct vm_area_struct *vma_modify_flags( +- struct vma_iterator *vmi, struct vm_area_struct *prev, +- struct vm_area_struct *vma, unsigned long start, unsigned long end, +- vm_flags_t vm_flags) ++struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi, ++ struct vm_area_struct *prev, struct vm_area_struct *vma, ++ unsigned long start, unsigned long end, ++ vm_flags_t *vm_flags_ptr) + { + VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); ++ const vm_flags_t vm_flags = *vm_flags_ptr; ++ struct vm_area_struct *ret; + + vmg.vm_flags = vm_flags; + +- return vma_modify(&vmg); ++ ret = vma_modify(&vmg); ++ if (IS_ERR(ret)) ++ return ret; ++ ++ /* ++ * For a merge to succeed, the flags must match those requested. For ++ * flags which do not obey typical merge rules (i.e. do not need to ++ * match), we must let the caller know about them. ++ */ ++ if (vmg.state == VMA_MERGE_SUCCESS) ++ *vm_flags_ptr = ret->vm_flags; ++ return ret; + } + +-struct vm_area_struct +-*vma_modify_name(struct vma_iterator *vmi, +- struct vm_area_struct *prev, +- struct vm_area_struct *vma, +- unsigned long start, +- unsigned long end, +- struct anon_vma_name *new_name) ++struct vm_area_struct *vma_modify_name(struct vma_iterator *vmi, ++ struct vm_area_struct *prev, struct vm_area_struct *vma, ++ unsigned long start, unsigned long end, ++ struct anon_vma_name *new_name) + { + VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); + +@@ -1703,12 +1713,10 @@ struct vm_area_struct + return vma_modify(&vmg); + } + +-struct vm_area_struct +-*vma_modify_policy(struct vma_iterator *vmi, +- struct vm_area_struct *prev, +- struct vm_area_struct *vma, +- unsigned long start, unsigned long end, +- struct mempolicy *new_pol) ++struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi, ++ struct vm_area_struct *prev, struct vm_area_struct *vma, ++ unsigned long start, unsigned long end, ++ struct mempolicy *new_pol) + { + VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); + +@@ -1717,14 +1725,10 @@ struct vm_area_struct + return vma_modify(&vmg); + } + +-struct vm_area_struct +-*vma_modify_flags_uffd(struct vma_iterator *vmi, +- struct vm_area_struct *prev, +- struct vm_area_struct *vma, +- unsigned long start, unsigned long end, +- vm_flags_t vm_flags, +- struct vm_userfaultfd_ctx new_ctx, +- bool give_up_on_oom) ++struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi, ++ struct vm_area_struct *prev, struct vm_area_struct *vma, ++ unsigned long start, unsigned long end, vm_flags_t vm_flags, ++ struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom) + { + VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); + +--- a/mm/vma.h ++++ b/mm/vma.h +@@ -266,47 +266,115 @@ void remove_vma(struct vm_area_struct *v + void unmap_region(struct ma_state *mas, struct vm_area_struct *vma, + struct vm_area_struct *prev, struct vm_area_struct *next); + +-/* We are about to modify the VMA's flags. */ +-__must_check struct vm_area_struct +-*vma_modify_flags(struct vma_iterator *vmi, ++/** ++ * vma_modify_flags() - Peform any necessary split/merge in preparation for ++ * setting VMA flags to *@vm_flags in the range @start to @end contained within ++ * @vma. ++ * @vmi: Valid VMA iterator positioned at @vma. ++ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first. ++ * @vma: The VMA containing the range @start to @end to be updated. ++ * @start: The start of the range to update. May be offset within @vma. ++ * @end: The exclusive end of the range to update, may be offset within @vma. ++ * @vm_flags_ptr: A pointer to the VMA flags that the @start to @end range is ++ * about to be set to. On merge, this will be updated to include any additional ++ * flags which remain in place. ++ * ++ * IMPORTANT: The actual modification being requested here is NOT applied, ++ * rather the VMA is perhaps split, perhaps merged to accommodate the change, ++ * and the caller is expected to perform the actual modification. ++ * ++ * In order to account for VMA flags which may persist (e.g. soft-dirty), the ++ * @vm_flags_ptr parameter points to the requested flags which are then updated ++ * so the caller, should they overwrite any existing flags, correctly retains ++ * these. ++ * ++ * Returns: A VMA which contains the range @start to @end ready to have its ++ * flags altered to *@vm_flags. ++ */ ++__must_check struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi, + struct vm_area_struct *prev, struct vm_area_struct *vma, + unsigned long start, unsigned long end, +- vm_flags_t vm_flags); ++ vm_flags_t *vm_flags_ptr); + +-/* We are about to modify the VMA's anon_name. */ +-__must_check struct vm_area_struct +-*vma_modify_name(struct vma_iterator *vmi, +- struct vm_area_struct *prev, +- struct vm_area_struct *vma, +- unsigned long start, +- unsigned long end, +- struct anon_vma_name *new_name); +- +-/* We are about to modify the VMA's memory policy. */ +-__must_check struct vm_area_struct +-*vma_modify_policy(struct vma_iterator *vmi, +- struct vm_area_struct *prev, +- struct vm_area_struct *vma, ++/** ++ * vma_modify_name() - Peform any necessary split/merge in preparation for ++ * setting anonymous VMA name to @new_name in the range @start to @end contained ++ * within @vma. ++ * @vmi: Valid VMA iterator positioned at @vma. ++ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first. ++ * @vma: The VMA containing the range @start to @end to be updated. ++ * @start: The start of the range to update. May be offset within @vma. ++ * @end: The exclusive end of the range to update, may be offset within @vma. ++ * @new_name: The anonymous VMA name that the @start to @end range is about to ++ * be set to. ++ * ++ * IMPORTANT: The actual modification being requested here is NOT applied, ++ * rather the VMA is perhaps split, perhaps merged to accommodate the change, ++ * and the caller is expected to perform the actual modification. ++ * ++ * Returns: A VMA which contains the range @start to @end ready to have its ++ * anonymous VMA name changed to @new_name. ++ */ ++__must_check struct vm_area_struct *vma_modify_name(struct vma_iterator *vmi, ++ struct vm_area_struct *prev, struct vm_area_struct *vma, ++ unsigned long start, unsigned long end, ++ struct anon_vma_name *new_name); ++ ++/** ++ * vma_modify_policy() - Peform any necessary split/merge in preparation for ++ * setting NUMA policy to @new_pol in the range @start to @end contained ++ * within @vma. ++ * @vmi: Valid VMA iterator positioned at @vma. ++ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first. ++ * @vma: The VMA containing the range @start to @end to be updated. ++ * @start: The start of the range to update. May be offset within @vma. ++ * @end: The exclusive end of the range to update, may be offset within @vma. ++ * @new_pol: The NUMA policy that the @start to @end range is about to be set ++ * to. ++ * ++ * IMPORTANT: The actual modification being requested here is NOT applied, ++ * rather the VMA is perhaps split, perhaps merged to accommodate the change, ++ * and the caller is expected to perform the actual modification. ++ * ++ * Returns: A VMA which contains the range @start to @end ready to have its ++ * NUMA policy changed to @new_pol. ++ */ ++__must_check struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi, ++ struct vm_area_struct *prev, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct mempolicy *new_pol); + +-/* We are about to modify the VMA's flags and/or uffd context. */ +-__must_check struct vm_area_struct +-*vma_modify_flags_uffd(struct vma_iterator *vmi, +- struct vm_area_struct *prev, +- struct vm_area_struct *vma, +- unsigned long start, unsigned long end, +- vm_flags_t vm_flags, +- struct vm_userfaultfd_ctx new_ctx, +- bool give_up_on_oom); +- +-__must_check struct vm_area_struct +-*vma_merge_new_range(struct vma_merge_struct *vmg); +- +-__must_check struct vm_area_struct +-*vma_merge_extend(struct vma_iterator *vmi, +- struct vm_area_struct *vma, +- unsigned long delta); ++/** ++ * vma_modify_flags_uffd() - Peform any necessary split/merge in preparation for ++ * setting VMA flags to @vm_flags and UFFD context to @new_ctx in the range ++ * @start to @end contained within @vma. ++ * @vmi: Valid VMA iterator positioned at @vma. ++ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first. ++ * @vma: The VMA containing the range @start to @end to be updated. ++ * @start: The start of the range to update. May be offset within @vma. ++ * @end: The exclusive end of the range to update, may be offset within @vma. ++ * @vm_flags: The VMA flags that the @start to @end range is about to be set to. ++ * @new_ctx: The userfaultfd context that the @start to @end range is about to ++ * be set to. ++ * @give_up_on_oom: If an out of memory condition occurs on merge, simply give ++ * up on it and treat the merge as best-effort. ++ * ++ * IMPORTANT: The actual modification being requested here is NOT applied, ++ * rather the VMA is perhaps split, perhaps merged to accommodate the change, ++ * and the caller is expected to perform the actual modification. ++ * ++ * Returns: A VMA which contains the range @start to @end ready to have its VMA ++ * flags changed to @vm_flags and its userfaultfd context changed to @new_ctx. ++ */ ++__must_check struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi, ++ struct vm_area_struct *prev, struct vm_area_struct *vma, ++ unsigned long start, unsigned long end, vm_flags_t vm_flags, ++ struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom); ++ ++__must_check struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg); ++ ++__must_check struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi, ++ struct vm_area_struct *vma, unsigned long delta); + + void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb); + +--- a/tools/testing/vma/vma.c ++++ b/tools/testing/vma/vma.c +@@ -339,6 +339,7 @@ static bool test_simple_modify(void) + struct mm_struct mm = {}; + struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags); + VMA_ITERATOR(vmi, &mm, 0x1000); ++ vm_flags_t flags = VM_READ | VM_MAYREAD; + + ASSERT_FALSE(attach_vma(&mm, init_vma)); + +@@ -347,7 +348,7 @@ static bool test_simple_modify(void) + * performs the merge/split only. + */ + vma = vma_modify_flags(&vmi, init_vma, init_vma, +- 0x1000, 0x2000, VM_READ | VM_MAYREAD); ++ 0x1000, 0x2000, &flags); + ASSERT_NE(vma, NULL); + /* We modify the provided VMA, and on split allocate new VMAs. */ + ASSERT_EQ(vma, init_vma); diff --git a/queue-6.18/series b/queue-6.18/series index 53d62583b1..af05470989 100644 --- a/queue-6.18/series +++ b/queue-6.18/series @@ -31,3 +31,11 @@ hv-utils-handle-and-propagate-errors-in-kvp_register.patch drivers-hv-vmbus-improve-the-logic-of-reserving-fb_mmio-on-gen2-vms.patch firmware-samsung-acpm-fix-cross-thread-rx-length-corruption.patch sctp-disable-bh-before-calling-udp_tunnel_xmit_skb.patch +mm-introduce-vm_maybe_guard-and-make-visible-in-proc-pid-smaps.patch +mm-add-atomic-vma-flags-and-set-vm_maybe_guard-as-such.patch +mm-update-vma_modify_flags-to-handle-residual-flags-document.patch +mm-implement-sticky-vma-flags.patch +mm-introduce-copy-on-fork-vmas-and-make-vm_maybe_guard-one.patch +mm-set-the-vm_maybe_guard-flag-on-guard-region-install.patch +mm-propagate-vm_softdirty-on-merge.patch +testing-selftests-mm-add-soft-dirty-merge-self-test.patch diff --git a/queue-6.18/testing-selftests-mm-add-soft-dirty-merge-self-test.patch b/queue-6.18/testing-selftests-mm-add-soft-dirty-merge-self-test.patch new file mode 100644 index 0000000000..908ca4ebb7 --- /dev/null +++ b/queue-6.18/testing-selftests-mm-add-soft-dirty-merge-self-test.patch @@ -0,0 +1,198 @@ +From stable+bounces-247756-greg=kroah.com@vger.kernel.org Fri May 15 14:06:06 2026 +From: Ahmed Elaidy +Date: Fri, 15 May 2026 15:42:19 +0300 +Subject: testing/selftests/mm: add soft-dirty merge self-test +To: stable@vger.kernel.org +Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes , "David Hildenbrand (Red Hat)" , Jann Horn , Liam Howlett , Michal Hocko , Mike Rapoport , Pedro Falcato , Suren Baghdasaryan , Vlastimil Babka , Cyrill Gorcunov , Ahmed Elaidy +Message-ID: <20260515124218.151966-11-elaidya225@gmail.com> + +From: Lorenzo Stoakes + +commit c7ba92bcfea34f6b4afc744c3b65c8f7420fefe0 upstream. + +Assert that we correctly merge VMAs containing VM_SOFTDIRTY flags now that +we correctly handle these as sticky. + +In order to do so, we have to account for the fact the pagemap interface +checks soft dirty PTEs and additionally that newly merged VMAs are marked +VM_SOFTDIRTY. + +We do this by using use unfaulted anon VMAs, establishing one and clearing +references on that one, before establishing another and merging the two +before checking that soft-dirty is propagated as expected. + +We check that this functions correctly with mremap() and mprotect() as +sample cases, because VMA merge of adjacent newly mapped VMAs will +automatically be made soft-dirty due to existing logic which does so. + +We are therefore exercising other means of merging VMAs. + +Link: https://lkml.kernel.org/r/d5a0f735783fb4f30a604f570ede02ccc5e29be9.1763399675.git.ljs@kernel.org +Signed-off-by: Lorenzo Stoakes +Cc: Andrey Vagin +Cc: David Hildenbrand (Red Hat) +Cc: Jann Horn +Cc: Liam Howlett +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Pedro Falcato +Cc: Suren Baghdasaryan +Cc: Vlastimil Babka +Cc: Cyrill Gorcunov +Signed-off-by: Andrew Morton +Signed-off-by: Ahmed Elaidy +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/soft-dirty.c | 127 +++++++++++++++++++++++++++++++- + 1 file changed, 126 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/mm/soft-dirty.c ++++ b/tools/testing/selftests/mm/soft-dirty.c +@@ -184,6 +184,130 @@ static void test_mprotect(int pagemap_fd + close(test_fd); + } + ++static void test_merge(int pagemap_fd, int pagesize) ++{ ++ char *reserved, *map, *map2; ++ ++ /* ++ * Reserve space for tests: ++ * ++ * ---padding to --- ++ * | avoid adj. | ++ * v merge v ++ * |---|---|---|---|---| ++ * | | 1 | 2 | 3 | | ++ * |---|---|---|---|---| ++ */ ++ reserved = mmap(NULL, 5 * pagesize, PROT_NONE, ++ MAP_ANON | MAP_PRIVATE, -1, 0); ++ if (reserved == MAP_FAILED) ++ ksft_exit_fail_msg("mmap failed\n"); ++ munmap(reserved, 4 * pagesize); ++ ++ /* ++ * Establish initial VMA: ++ * ++ * S/D ++ * |---|---|---|---|---| ++ * | | 1 | | | | ++ * |---|---|---|---|---| ++ */ ++ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE, ++ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); ++ if (map == MAP_FAILED) ++ ksft_exit_fail_msg("mmap failed\n"); ++ ++ /* This will clear VM_SOFTDIRTY too. */ ++ clear_softdirty(); ++ ++ /* ++ * Now place a new mapping which will be marked VM_SOFTDIRTY. Away from ++ * map: ++ * ++ * - S/D ++ * |---|---|---|---|---| ++ * | | 1 | | 2 | | ++ * |---|---|---|---|---| ++ */ ++ map2 = mmap(&reserved[3 * pagesize], pagesize, PROT_READ | PROT_WRITE, ++ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); ++ if (map2 == MAP_FAILED) ++ ksft_exit_fail_msg("mmap failed\n"); ++ ++ /* ++ * Now remap it immediately adjacent to map, if the merge correctly ++ * propagates VM_SOFTDIRTY, we should then observe the VMA as a whole ++ * being marked soft-dirty: ++ * ++ * merge ++ * S/D ++ * |---|-------|---|---| ++ * | | 1 | | | ++ * |---|-------|---|---| ++ */ ++ map2 = mremap(map2, pagesize, pagesize, MREMAP_FIXED | MREMAP_MAYMOVE, ++ &reserved[2 * pagesize]); ++ if (map2 == MAP_FAILED) ++ ksft_exit_fail_msg("mremap failed\n"); ++ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1, ++ "Test %s-anon soft-dirty after remap merge 1st pg\n", ++ __func__); ++ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1, ++ "Test %s-anon soft-dirty after remap merge 2nd pg\n", ++ __func__); ++ ++ munmap(map, 2 * pagesize); ++ ++ /* ++ * Now establish another VMA: ++ * ++ * S/D ++ * |---|---|---|---|---| ++ * | | 1 | | | | ++ * |---|---|---|---|---| ++ */ ++ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE, ++ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); ++ if (map == MAP_FAILED) ++ ksft_exit_fail_msg("mmap failed\n"); ++ ++ /* Clear VM_SOFTDIRTY... */ ++ clear_softdirty(); ++ /* ...and establish incompatible adjacent VMA: ++ * ++ * - S/D ++ * |---|---|---|---|---| ++ * | | 1 | 2 | | | ++ * |---|---|---|---|---| ++ */ ++ map2 = mmap(&reserved[2 * pagesize], pagesize, ++ PROT_READ | PROT_WRITE | PROT_EXEC, ++ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); ++ if (map2 == MAP_FAILED) ++ ksft_exit_fail_msg("mmap failed\n"); ++ ++ /* ++ * Now mprotect() VMA 1 so it's compatible with 2 and therefore merges: ++ * ++ * merge ++ * S/D ++ * |---|-------|---|---| ++ * | | 1 | | | ++ * |---|-------|---|---| ++ */ ++ if (mprotect(map, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC)) ++ ksft_exit_fail_msg("mprotect failed\n"); ++ ++ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1, ++ "Test %s-anon soft-dirty after mprotect merge 1st pg\n", ++ __func__); ++ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1, ++ "Test %s-anon soft-dirty after mprotect merge 2nd pg\n", ++ __func__); ++ ++ munmap(map, 2 * pagesize); ++} ++ + static void test_mprotect_anon(int pagemap_fd, int pagesize) + { + test_mprotect(pagemap_fd, pagesize, true); +@@ -204,7 +328,7 @@ int main(int argc, char **argv) + if (!softdirty_supported()) + ksft_exit_skip("soft-dirty is not support\n"); + +- ksft_set_plan(15); ++ ksft_set_plan(19); + pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH); +@@ -216,6 +340,7 @@ int main(int argc, char **argv) + test_hugepage(pagemap_fd, pagesize); + test_mprotect_anon(pagemap_fd, pagesize); + test_mprotect_file(pagemap_fd, pagesize); ++ test_merge(pagemap_fd, pagesize); + + close(pagemap_fd); +