--- /dev/null
+From stable+bounces-247749-greg=kroah.com@vger.kernel.org Fri May 15 14:05:33 2026
+From: Ahmed Elaidy <elaidya225@gmail.com>
+Date: Fri, 15 May 2026 15:42:12 +0300
+Subject: mm: add atomic VMA flags and set VM_MAYBE_GUARD as such
+To: stable@vger.kernel.org
+Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, Pedro Falcato <pfalcato@suse.de>, Vlastimil Babka <vbabka@suse.cz>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Lance Yang <lance.yang@linux.dev>, Baolin Wang <baolin.wang@linux.alibaba.com>, Barry Song <baohua@kernel.org>, Dev Jain <dev.jain@arm.com>, Jann Horn <jannh@google.com>, Jonathan Corbet <corbet@lwn.net>, Liam Howlett <liam.howlett@oracle.com>, "Masami Hiramatsu (Google)" <mhiramat@kernel.org>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>, Nico Pache <npache@redhat.com>, Ryan Roberts <ryan.roberts@arm.com>, Steven Rostedt <rostedt@goodmis.org>, Suren Baghdasaryan <surenb@google.com>, Zi Yan <ziy@nvidia.com>, Ahmed Elaidy <elaidya225@gmail.com>
+Message-ID: <20260515124218.151966-4-elaidya225@gmail.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+commit 568822502383acd57d7cc1c72ee43932c45a9524 upstream.
+
+This patch adds the ability to atomically set VMA flags with only the mmap
+read/VMA read lock held.
+
+As this could be hugely problematic for VMA flags in general given that
+all other accesses are non-atomic and serialised by the mmap/VMA locks, we
+implement this with a strict allow-list - that is, only designated flags
+are allowed to do this.
+
+We make VM_MAYBE_GUARD one of these flags.
+
+Link: https://lkml.kernel.org/r/97e57abed09f2663077ed7a36fb8206e243171a9.1763460113.git.ljs@kernel.org
+Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
+Reviewed-by: Lance Yang <lance.yang@linux.dev>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Nico Pache <npache@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ahmed Elaidy <elaidya225@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 44 insertions(+)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -501,6 +501,9 @@ extern unsigned int kobjsize(const void
+ /* This mask represents all the VMA flag bits used by mlock */
+ #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
+
++/* These flags can be updated atomically via VMA/mmap read lock. */
++#define VM_ATOMIC_SET_ALLOWED VM_MAYBE_GUARD
++
+ /* Arch-specific flags to clear when updating VM flags on protection change */
+ #ifndef VM_ARCH_CLEAR
+ # define VM_ARCH_CLEAR VM_NONE
+@@ -843,6 +846,47 @@ static inline void vm_flags_mod(struct v
+ __vm_flags_mod(vma, set, clear);
+ }
+
++static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma,
++ int bit)
++{
++ const vm_flags_t mask = BIT(bit);
++
++ /* Only specific flags are permitted */
++ if (WARN_ON_ONCE(!(mask & VM_ATOMIC_SET_ALLOWED)))
++ return false;
++
++ return true;
++}
++
++/*
++ * Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific
++ * valid flags are allowed to do this.
++ */
++static inline void vma_flag_set_atomic(struct vm_area_struct *vma, int bit)
++{
++ /* mmap read lock/VMA read lock must be held. */
++ if (!rwsem_is_locked(&vma->vm_mm->mmap_lock))
++ vma_assert_locked(vma);
++
++ if (__vma_flag_atomic_valid(vma, bit))
++ set_bit(bit, &ACCESS_PRIVATE(vma, __vm_flags));
++}
++
++/*
++ * Test for VMA flag atomically. Requires no locks. Only specific valid flags
++ * are allowed to do this.
++ *
++ * This is necessarily racey, so callers must ensure that serialisation is
++ * achieved through some other means, or that races are permissible.
++ */
++static inline bool vma_flag_test_atomic(struct vm_area_struct *vma, int bit)
++{
++ if (__vma_flag_atomic_valid(vma, bit))
++ return test_bit(bit, &vma->vm_flags);
++
++ return false;
++}
++
+ static inline void vma_set_anonymous(struct vm_area_struct *vma)
+ {
+ vma->vm_ops = NULL;
--- /dev/null
+From stable+bounces-247751-greg=kroah.com@vger.kernel.org Fri May 15 14:05:45 2026
+From: Ahmed Elaidy <elaidya225@gmail.com>
+Date: Fri, 15 May 2026 15:42:14 +0300
+Subject: mm: implement sticky VMA flags
+To: stable@vger.kernel.org
+Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, Pedro Falcato <pfalcato@suse.de>, Vlastimil Babka <vbabka@suse.cz>, Baolin Wang <baolin.wang@linux.alibaba.com>, Barry Song <baohua@kernel.org>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Dev Jain <dev.jain@arm.com>, Jann Horn <jannh@google.com>, Jonathan Corbet <corbet@lwn.net>, Lance Yang <lance.yang@linux.dev>, Liam Howlett <liam.howlett@oracle.com>, "Masami Hiramatsu (Google)" <mhiramat@kernel.org>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>, Nico Pache <npache@redhat.com>, Ryan Roberts <ryan.roberts@arm.com>, Steven Rostedt <rostedt@goodmis.org>, Suren Baghdasaryan <surenb@google.com>, Zi Yan <ziy@nvidia.com>, Ahmed Elaidy <elaidya225@gmail.com>
+Message-ID: <20260515124218.151966-6-elaidya225@gmail.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+commit 64212ba02e66e705cabce188453ba4e61e9d7325 upstream.
+
+It is useful to be able to designate that certain flags are 'sticky', that
+is, if two VMAs are merged one with a flag of this nature and one without,
+the merged VMA sets this flag.
+
+As a result we ignore these flags for the purposes of determining VMA flag
+differences between VMAs being considered for merge.
+
+This patch therefore updates the VMA merge logic to perform this action,
+with flags possessing this property being described in the VM_STICKY
+bitmap.
+
+Those flags which ought to be ignored for the purposes of VMA merge are
+described in the VM_IGNORE_MERGE bitmap, which the VMA merge logic is also
+updated to use.
+
+As part of this change we place VM_SOFTDIRTY in VM_IGNORE_MERGE as it
+already had this behaviour, alongside VM_STICKY as sticky flags by
+implication must not disallow merge.
+
+Ultimately it seems that we should make VM_SOFTDIRTY a sticky flag in its
+own right, but this change is out of scope for this series.
+
+The only sticky flag designated as such is VM_MAYBE_GUARD, so as a result
+of this change, once the VMA flag is set upon guard region installation,
+VMAs with guard ranges will now not have their merge behaviour impacted as
+a result and can be freely merged with other VMAs without VM_MAYBE_GUARD
+set.
+
+Also update the comments for vma_modify_flags() to directly reference
+sticky flags now we have established the concept.
+
+We also update the VMA userland tests to account for the changes.
+
+Link: https://lkml.kernel.org/r/22ad5269f7669d62afb42ce0c79bad70b994c58d.1763460113.git.ljs@kernel.org
+Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: David Hildenbrand (Red Hat) <david@kernel.org>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Lance Yang <lance.yang@linux.dev>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Nico Pache <npache@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ahmed Elaidy <elaidya225@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h | 28 ++++++++++++++++++++++++++++
+ mm/vma.c | 31 +++++++++++++++++--------------
+ mm/vma.h | 10 ++++------
+ tools/testing/vma/vma_internal.h | 28 ++++++++++++++++++++++++++++
+ 4 files changed, 77 insertions(+), 20 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -511,6 +511,34 @@ extern unsigned int kobjsize(const void
+ #define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR)
+
+ /*
++ * Flags which should be 'sticky' on merge - that is, flags which, when one VMA
++ * possesses it but the other does not, the merged VMA should nonetheless have
++ * applied to it:
++ *
++ * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that
++ * mapped page tables may contain metadata not described by the
++ * VMA and thus any merged VMA may also contain this metadata,
++ * and thus we must make this flag sticky.
++ */
++#define VM_STICKY VM_MAYBE_GUARD
++
++/*
++ * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
++ * of these flags and the other not does not preclude a merge.
++ *
++ * VM_SOFTDIRTY - Should not prevent from VMA merging, if we match the flags but
++ * dirty bit -- the caller should mark merged VMA as dirty. If
++ * dirty bit won't be excluded from comparison, we increase
++ * pressure on the memory system forcing the kernel to generate
++ * new VMAs when old one could be extended instead.
++ *
++ * VM_STICKY - When merging VMAs, VMA flags must match, unless they are
++ * 'sticky'. If any sticky flags exist in either VMA, we simply
++ * set all of them on the merged VMA.
++ */
++#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY)
++
++/*
+ * mapping from the currently active vm_flags protection bits (the
+ * low four bits) to a page protection mask..
+ */
+--- a/mm/vma.c
++++ b/mm/vma.c
+@@ -82,15 +82,7 @@ static inline bool is_mergeable_vma(stru
+
+ if (!mpol_equal(vmg->policy, vma_policy(vma)))
+ return false;
+- /*
+- * VM_SOFTDIRTY should not prevent from VMA merging, if we
+- * match the flags but dirty bit -- the caller should mark
+- * merged VMA as dirty. If dirty bit won't be excluded from
+- * comparison, we increase pressure on the memory system forcing
+- * the kernel to generate new VMAs when old one could be
+- * extended instead.
+- */
+- if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_SOFTDIRTY)
++ if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_IGNORE_MERGE)
+ return false;
+ if (vma->vm_file != vmg->file)
+ return false;
+@@ -810,6 +802,7 @@ static bool can_merge_remove_vma(struct
+ static __must_check struct vm_area_struct *vma_merge_existing_range(
+ struct vma_merge_struct *vmg)
+ {
++ vm_flags_t sticky_flags = vmg->vm_flags & VM_STICKY;
+ struct vm_area_struct *middle = vmg->middle;
+ struct vm_area_struct *prev = vmg->prev;
+ struct vm_area_struct *next;
+@@ -904,11 +897,13 @@ static __must_check struct vm_area_struc
+ if (merge_right) {
+ vma_start_write(next);
+ vmg->target = next;
++ sticky_flags |= (next->vm_flags & VM_STICKY);
+ }
+
+ if (merge_left) {
+ vma_start_write(prev);
+ vmg->target = prev;
++ sticky_flags |= (prev->vm_flags & VM_STICKY);
+ }
+
+ if (merge_both) {
+@@ -978,6 +973,7 @@ static __must_check struct vm_area_struc
+ if (err || commit_merge(vmg))
+ goto abort;
+
++ vm_flags_set(vmg->target, sticky_flags);
+ khugepaged_enter_vma(vmg->target, vmg->vm_flags);
+ vmg->state = VMA_MERGE_SUCCESS;
+ return vmg->target;
+@@ -1156,14 +1152,20 @@ int vma_expand(struct vma_merge_struct *
+ struct vm_area_struct *target = vmg->target;
+ struct vm_area_struct *next = vmg->next;
+ int ret = 0;
++ vm_flags_t sticky_flags;
++
++ sticky_flags = vmg->vm_flags & VM_STICKY;
++ sticky_flags |= target->vm_flags & VM_STICKY;
+
+ VM_WARN_ON_VMG(!target, vmg);
+
+ mmap_assert_write_locked(vmg->mm);
+ vma_start_write(target);
+
+- if (next && target != next && vmg->end == next->vm_end)
++ if (next && target != next && vmg->end == next->vm_end) {
++ sticky_flags |= next->vm_flags & VM_STICKY;
+ remove_next = true;
++ }
+
+ /* We must have a target. */
+ VM_WARN_ON_VMG(!target, vmg);
+@@ -1197,6 +1199,7 @@ int vma_expand(struct vma_merge_struct *
+ if (commit_merge(vmg))
+ goto nomem;
+
++ vm_flags_set(target, sticky_flags);
+ return 0;
+
+ nomem:
+@@ -1692,9 +1695,9 @@ struct vm_area_struct *vma_modify_flags(
+ return ret;
+
+ /*
+- * For a merge to succeed, the flags must match those requested. For
+- * flags which do not obey typical merge rules (i.e. do not need to
+- * match), we must let the caller know about them.
++ * For a merge to succeed, the flags must match those
++ * requested. However, sticky flags may have been retained, so propagate
++ * them to the caller.
+ */
+ if (vmg.state == VMA_MERGE_SUCCESS)
+ *vm_flags_ptr = ret->vm_flags;
+@@ -1959,7 +1962,7 @@ static int anon_vma_compatible(struct vm
+ return a->vm_end == b->vm_start &&
+ mpol_equal(vma_policy(a), vma_policy(b)) &&
+ a->vm_file == b->vm_file &&
+- !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
++ !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_IGNORE_MERGE)) &&
+ b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
+ }
+
+--- a/mm/vma.h
++++ b/mm/vma.h
+@@ -276,17 +276,15 @@ void unmap_region(struct ma_state *mas,
+ * @start: The start of the range to update. May be offset within @vma.
+ * @end: The exclusive end of the range to update, may be offset within @vma.
+ * @vm_flags_ptr: A pointer to the VMA flags that the @start to @end range is
+- * about to be set to. On merge, this will be updated to include any additional
+- * flags which remain in place.
++ * about to be set to. On merge, this will be updated to include sticky flags.
+ *
+ * IMPORTANT: The actual modification being requested here is NOT applied,
+ * rather the VMA is perhaps split, perhaps merged to accommodate the change,
+ * and the caller is expected to perform the actual modification.
+ *
+- * In order to account for VMA flags which may persist (e.g. soft-dirty), the
+- * @vm_flags_ptr parameter points to the requested flags which are then updated
+- * so the caller, should they overwrite any existing flags, correctly retains
+- * these.
++ * In order to account for sticky VMA flags, the @vm_flags_ptr parameter points
++ * to the requested flags which are then updated so the caller, should they
++ * overwrite any existing flags, correctly retains these.
+ *
+ * Returns: A VMA which contains the range @start to @end ready to have its
+ * flags altered to *@vm_flags.
+--- a/tools/testing/vma/vma_internal.h
++++ b/tools/testing/vma/vma_internal.h
+@@ -117,6 +117,34 @@ extern unsigned long dac_mmap_min_addr;
+ #define VM_SEALED VM_NONE
+ #endif
+
++/*
++ * Flags which should be 'sticky' on merge - that is, flags which, when one VMA
++ * possesses it but the other does not, the merged VMA should nonetheless have
++ * applied to it:
++ *
++ * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that
++ * mapped page tables may contain metadata not described by the
++ * VMA and thus any merged VMA may also contain this metadata,
++ * and thus we must make this flag sticky.
++ */
++#define VM_STICKY VM_MAYBE_GUARD
++
++/*
++ * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
++ * of these flags and the other not does not preclude a merge.
++ *
++ * VM_SOFTDIRTY - Should not prevent from VMA merging, if we match the flags but
++ * dirty bit -- the caller should mark merged VMA as dirty. If
++ * dirty bit won't be excluded from comparison, we increase
++ * pressure on the memory system forcing the kernel to generate
++ * new VMAs when old one could be extended instead.
++ *
++ * VM_STICKY - When merging VMAs, VMA flags must match, unless they are
++ * 'sticky'. If any sticky flags exist in either VMA, we simply
++ * set all of them on the merged VMA.
++ */
++#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY)
++
+ #define FIRST_USER_ADDRESS 0UL
+ #define USER_PGTABLES_CEILING 0UL
+
--- /dev/null
+From stable+bounces-247752-greg=kroah.com@vger.kernel.org Fri May 15 14:05:45 2026
+From: Ahmed Elaidy <elaidya225@gmail.com>
+Date: Fri, 15 May 2026 15:42:15 +0300
+Subject: mm: introduce copy-on-fork VMAs and make VM_MAYBE_GUARD one
+To: stable@vger.kernel.org
+Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, Pedro Falcato <pfalcato@suse.de>, Vlastimil Babka <vbabka@suse.cz>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Baolin Wang <baolin.wang@linux.alibaba.com>, Barry Song <baohua@kernel.org>, Dev Jain <dev.jain@arm.com>, Jann Horn <jannh@google.com>, Jonathan Corbet <corbet@lwn.net>, Lance Yang <lance.yang@linux.dev>, Liam Howlett <liam.howlett@oracle.com>, "Masami Hiramatsu (Google)" <mhiramat@kernel.org>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>, Nico Pache <npache@redhat.com>, Ryan Roberts <ryan.roberts@arm.com>, Steven Rostedt <rostedt@goodmis.org>, Suren Baghdasaryan <surenb@google.com>, Zi Yan <ziy@nvidia.com>, Ahmed Elaidy <elaidya225@gmail.com>
+Message-ID: <20260515124218.151966-7-elaidya225@gmail.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+commit ab04b530e7e8bd5cf9fb0c1ad20e0deee8f569ec upstream.
+
+Gather all the VMA flags whose presence implies that page tables must be
+copied on fork into a single bitmap - VM_COPY_ON_FORK - and use this
+rather than specifying individual flags in vma_needs_copy().
+
+We also add VM_MAYBE_GUARD to this list, as it being set on a VMA implies
+that there may be metadata contained in the page tables (that is - guard
+markers) which would will not and cannot be propagated upon fork.
+
+This was already being done manually previously in vma_needs_copy(), but
+this makes it very explicit, alongside VM_PFNMAP, VM_MIXEDMAP and
+VM_UFFD_WP all of which imply the same.
+
+Note that VM_STICKY flags ought generally to be marked VM_COPY_ON_FORK too
+- because equally a flag being VM_STICKY indicates that the VMA contains
+metadat that is not propagated by being faulted in - i.e. that the VMA
+metadata does not fully describe the VMA alone, and thus we must propagate
+whatever metadata there is on a fork.
+
+However, for maximum flexibility, we do not make this necessarily the case
+here.
+
+Link: https://lkml.kernel.org/r/5d41b24e7bc622cda0af92b6d558d7f4c0d1bc8c.1763460113.git.ljs@kernel.org
+Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Lance Yang <lance.yang@linux.dev>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Nico Pache <npache@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ahmed Elaidy <elaidya225@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h | 26 ++++++++++++++++++++++++++
+ mm/memory.c | 18 ++++--------------
+ tools/testing/vma/vma_internal.h | 26 ++++++++++++++++++++++++++
+ 3 files changed, 56 insertions(+), 14 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -539,6 +539,32 @@ extern unsigned int kobjsize(const void
+ #define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY)
+
+ /*
++ * Flags which should result in page tables being copied on fork. These are
++ * flags which indicate that the VMA maps page tables which cannot be
++ * reconsistuted upon page fault, so necessitate page table copying upon
++ *
++ * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
++ * reasonably reconstructed on page fault.
++ *
++ * VM_UFFD_WP - Encodes metadata about an installed uffd
++ * write protect handler, which cannot be
++ * reconstructed on page fault.
++ *
++ * We always copy pgtables when dst_vma has uffd-wp
++ * enabled even if it's file-backed
++ * (e.g. shmem). Because when uffd-wp is enabled,
++ * pgtable contains uffd-wp protection information,
++ * that's something we can't retrieve from page cache,
++ * and skip copying will lose those info.
++ *
++ * VM_MAYBE_GUARD - Could contain page guard region markers which
++ * by design are a property of the page tables
++ * only and thus cannot be reconstructed on page
++ * fault.
++ */
++#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
++
++/*
+ * mapping from the currently active vm_flags protection bits (the
+ * low four bits) to a page protection mask..
+ */
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1479,25 +1479,15 @@ copy_p4d_range(struct vm_area_struct *ds
+ static bool
+ vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
+ {
++ if (src_vma->vm_flags & VM_COPY_ON_FORK)
++ return true;
+ /*
+- * Always copy pgtables when dst_vma has uffd-wp enabled even if it's
+- * file-backed (e.g. shmem). Because when uffd-wp is enabled, pgtable
+- * contains uffd-wp protection information, that's something we can't
+- * retrieve from page cache, and skip copying will lose those info.
++ * The presence of an anon_vma indicates an anonymous VMA has page
++ * tables which naturally cannot be reconstituted on page fault.
+ */
+- if (userfaultfd_wp(dst_vma))
+- return true;
+-
+- if (src_vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
+- return true;
+-
+ if (src_vma->anon_vma)
+ return true;
+
+- /* Guard regions have modified page tables that require copying. */
+- if (src_vma->vm_flags & VM_MAYBE_GUARD)
+- return true;
+-
+ /*
+ * Don't copy ptes where a page fault will fill them correctly. Fork
+ * becomes much lighter when there are big shared or private readonly
+--- a/tools/testing/vma/vma_internal.h
++++ b/tools/testing/vma/vma_internal.h
+@@ -145,6 +145,32 @@ extern unsigned long dac_mmap_min_addr;
+ */
+ #define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY)
+
++/*
++ * Flags which should result in page tables being copied on fork. These are
++ * flags which indicate that the VMA maps page tables which cannot be
++ * reconsistuted upon page fault, so necessitate page table copying upon
++ *
++ * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
++ * reasonably reconstructed on page fault.
++ *
++ * VM_UFFD_WP - Encodes metadata about an installed uffd
++ * write protect handler, which cannot be
++ * reconstructed on page fault.
++ *
++ * We always copy pgtables when dst_vma has uffd-wp
++ * enabled even if it's file-backed
++ * (e.g. shmem). Because when uffd-wp is enabled,
++ * pgtable contains uffd-wp protection information,
++ * that's something we can't retrieve from page cache,
++ * and skip copying will lose those info.
++ *
++ * VM_MAYBE_GUARD - Could contain page guard region markers which
++ * by design are a property of the page tables
++ * only and thus cannot be reconstructed on page
++ * fault.
++ */
++#define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
++
+ #define FIRST_USER_ADDRESS 0UL
+ #define USER_PGTABLES_CEILING 0UL
+
--- /dev/null
+From stable+bounces-247748-greg=kroah.com@vger.kernel.org Fri May 15 14:05:27 2026
+From: Ahmed Elaidy <elaidya225@gmail.com>
+Date: Fri, 15 May 2026 15:42:11 +0300
+Subject: mm: introduce VM_MAYBE_GUARD and make visible in /proc/$pid/smaps
+To: stable@vger.kernel.org
+Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, Pedro Falcato <pfalcato@suse.de>, Vlastimil Babka <vbabka@suse.cz>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Lance Yang <lance.yang@linux.dev>, Baolin Wang <baolin.wang@linux.alibaba.com>, Barry Song <baohua@kernel.org>, Dev Jain <dev.jain@arm.com>, Jann Horn <jannh@google.com>, Jonathan Corbet <corbet@lwn.net>, Liam Howlett <liam.howlett@oracle.com>, "Masami Hiramatsu (Google)" <mhiramat@kernel.org>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>, Nico Pache <npache@redhat.com>, Ryan Roberts <ryan.roberts@arm.com>, Steven Rostedt <rostedt@goodmis.org>, Suren Baghdasaryan <surenb@google.com>, Zi Yan <ziy@nvidia.com>, Ahmed Elaidy <elaidya225@gmail.com>
+Message-ID: <20260515124218.151966-3-elaidya225@gmail.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+commit 5dba5cc2e0ffa76f2f6c8922a04469dc9602c396 upstream.
+
+Patch series "introduce VM_MAYBE_GUARD and make it sticky", v4.
+
+Currently, guard regions are not visible to users except through
+/proc/$pid/pagemap, with no explicit visibility at the VMA level.
+
+This makes the feature less useful, as it isn't entirely apparent which
+VMAs may have these entries present, especially when performing actions
+which walk through memory regions such as those performed by CRIU.
+
+This series addresses this issue by introducing the VM_MAYBE_GUARD flag
+which fulfils this role, updating the smaps logic to display an entry for
+these.
+
+The semantics of this flag are that a guard region MAY be present if set
+(we cannot be sure, as we can't efficiently track whether an
+MADV_GUARD_REMOVE finally removes all the guard regions in a VMA) - but if
+not set the VMA definitely does NOT have any guard regions present.
+
+It's problematic to establish this flag without further action, because
+that means that VMAs with guard regions in them become non-mergeable with
+adjacent VMAs for no especially good reason.
+
+To work around this, this series also introduces the concept of 'sticky'
+VMA flags - that is flags which:
+
+a. if set in one VMA and not in another still permit those VMAs to be
+ merged (if otherwise compatible).
+
+b. When they are merged, the resultant VMA must have the flag set.
+
+The VMA logic is updated to propagate these flags correctly.
+
+Additionally, VM_MAYBE_GUARD being an explicit VMA flag allows us to solve
+an issue with file-backed guard regions - previously these established an
+anon_vma object for file-backed mappings solely to have vma_needs_copy()
+correctly propagate guard region mappings to child processes.
+
+We introduce a new flag alias VM_COPY_ON_FORK (which currently only
+specifies VM_MAYBE_GUARD) and update vma_needs_copy() to check explicitly
+for this flag and to copy page tables if it is present, which resolves
+this issue.
+
+Additionally, we add the ability for allow-listed VMA flags to be
+atomically writable with only mmap/VMA read locks held.
+
+The only flag we allow so far is VM_MAYBE_GUARD, which we carefully ensure
+does not cause any races by being allowed to do so.
+
+This allows us to maintain guard region installation as a read-locked
+operation and not endure the overhead of obtaining a write lock here.
+
+Finally we introduce extensive VMA userland tests to assert that the
+sticky VMA logic behaves correctly as well as guard region self tests to
+assert that smaps visibility is correctly implemented.
+
+This patch (of 9):
+
+Currently, if a user needs to determine if guard regions are present in a
+range, they have to scan all VMAs (or have knowledge of which ones might
+have guard regions).
+
+Since commit 8e2f2aeb8b48 ("fs/proc/task_mmu: add guard region bit to
+pagemap") and the related commit a516403787e0 ("fs/proc: extend the
+PAGEMAP_SCAN ioctl to report guard regions"), users can use either
+/proc/$pid/pagemap or the PAGEMAP_SCAN functionality to perform this
+operation at a virtual address level.
+
+This is not ideal, and it gives no visibility at a /proc/$pid/smaps level
+that guard regions exist in ranges.
+
+This patch remedies the situation by establishing a new VMA flag,
+VM_MAYBE_GUARD, to indicate that a VMA may contain guard regions (it is
+uncertain because we cannot reasonably determine whether a
+MADV_GUARD_REMOVE call has removed all of the guard regions in a VMA, and
+additionally VMAs may change across merge/split).
+
+We utilise 0x800 for this flag which makes it available to 32-bit
+architectures also, a flag that was previously used by VM_DENYWRITE, which
+was removed in commit 8d0920bde5eb ("mm: remove VM_DENYWRITE") and hasn't
+bee reused yet.
+
+We also update the smaps logic and documentation to identify these VMAs.
+
+Another major use of this functionality is that we can use it to identify
+that we ought to copy page tables on fork.
+
+We do not actually implement usage of this flag in mm/madvise.c yet as we
+need to allow some VMA flags to be applied atomically under mmap/VMA read
+lock in order to avoid the need to acquire a write lock for this purpose.
+
+Link: https://lkml.kernel.org/r/cover.1763460113.git.ljs@kernel.org
+Link: https://lkml.kernel.org/r/cf8ef821eba29b6c5b5e138fffe95d6dcabdedb9.1763460113.git.ljs@kernel.org
+Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
+Reviewed-by: Lance Yang <lance.yang@linux.dev>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Nico Pache <npache@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ahmed Elaidy <elaidya225@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/filesystems/proc.rst | 5 +++--
+ fs/proc/task_mmu.c | 1 +
+ include/linux/mm.h | 3 +++
+ include/trace/events/mmflags.h | 1 +
+ mm/memory.c | 4 ++++
+ tools/testing/vma/vma_internal.h | 1 +
+ 6 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/Documentation/filesystems/proc.rst
++++ b/Documentation/filesystems/proc.rst
+@@ -553,7 +553,7 @@ otherwise.
+ kernel flags associated with the particular virtual memory area in two letter
+ encoded manner. The codes are the following:
+
+- == =======================================
++ == =============================================================
+ rd readable
+ wr writeable
+ ex executable
+@@ -591,7 +591,8 @@ encoded manner. The codes are the follow
+ sl sealed
+ lf lock on fault pages
+ dp always lazily freeable mapping
+- == =======================================
++ gu maybe contains guard regions (if not set, definitely doesn't)
++ == =============================================================
+
+ Note that there is no guarantee that every flag and associated mnemonic will
+ be present in all further kernel releases. Things get changed, the flags may
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -1159,6 +1159,7 @@ static void show_smap_vma_flags(struct s
+ [ilog2(VM_MAYSHARE)] = "ms",
+ [ilog2(VM_GROWSDOWN)] = "gd",
+ [ilog2(VM_PFNMAP)] = "pf",
++ [ilog2(VM_MAYBE_GUARD)] = "gu",
+ [ilog2(VM_LOCKED)] = "lo",
+ [ilog2(VM_IO)] = "io",
+ [ilog2(VM_SEQ_READ)] = "sr",
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -269,6 +269,8 @@ extern struct rw_semaphore nommu_region_
+ extern unsigned int kobjsize(const void *objp);
+ #endif
+
++#define VM_MAYBE_GUARD_BIT 11
++
+ /*
+ * vm_flags in vm_area_struct, see mm_types.h.
+ * When changing, update also include/trace/events/mmflags.h
+@@ -294,6 +296,7 @@ extern unsigned int kobjsize(const void
+ #define VM_UFFD_MISSING 0
+ #endif /* CONFIG_MMU */
+ #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
++#define VM_MAYBE_GUARD BIT(VM_MAYBE_GUARD_BIT) /* The VMA maybe contains guard regions. */
+ #define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */
+
+ #define VM_LOCKED 0x00002000
+--- a/include/trace/events/mmflags.h
++++ b/include/trace/events/mmflags.h
+@@ -213,6 +213,7 @@ IF_HAVE_PG_ARCH_3(arch_3)
+ {VM_UFFD_MISSING, "uffd_missing" }, \
+ IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \
+ {VM_PFNMAP, "pfnmap" }, \
++ {VM_MAYBE_GUARD, "maybe_guard" }, \
+ {VM_UFFD_WP, "uffd_wp" }, \
+ {VM_LOCKED, "locked" }, \
+ {VM_IO, "io" }, \
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1494,6 +1494,10 @@ vma_needs_copy(struct vm_area_struct *ds
+ if (src_vma->anon_vma)
+ return true;
+
++ /* Guard regions have modified page tables that require copying. */
++ if (src_vma->vm_flags & VM_MAYBE_GUARD)
++ return true;
++
+ /*
+ * Don't copy ptes where a page fault will fill them correctly. Fork
+ * becomes much lighter when there are big shared or private readonly
+--- a/tools/testing/vma/vma_internal.h
++++ b/tools/testing/vma/vma_internal.h
+@@ -56,6 +56,7 @@ extern unsigned long dac_mmap_min_addr;
+ #define VM_MAYEXEC 0x00000040
+ #define VM_GROWSDOWN 0x00000100
+ #define VM_PFNMAP 0x00000400
++#define VM_MAYBE_GUARD 0x00000800
+ #define VM_LOCKED 0x00002000
+ #define VM_IO 0x00004000
+ #define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
--- /dev/null
+From stable+bounces-247755-greg=kroah.com@vger.kernel.org Fri May 15 14:06:04 2026
+From: Ahmed Elaidy <elaidya225@gmail.com>
+Date: Fri, 15 May 2026 15:42:18 +0300
+Subject: mm: propagate VM_SOFTDIRTY on merge
+To: stable@vger.kernel.org
+Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, Vlastimil Babka <vbabka@suse.cz>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Pedro Falcato <pfalcato@suse.de>, Cyrill Gorcunov <gorcunov@gmail.com>, Jann Horn <jannh@google.com>, Liam Howlett <liam.howlett@oracle.com>, Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>, Suren Baghdasaryan <surenb@google.com>, Ahmed Elaidy <elaidya225@gmail.com>
+Message-ID: <20260515124218.151966-10-elaidya225@gmail.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+commit 6707915e030a3258868355f989b80140c1a45bbe upstream.
+
+Patch series "make VM_SOFTDIRTY a sticky VMA flag", v2.
+
+Currently we set VM_SOFTDIRTY when a new mapping is set up (whether by
+establishing a new VMA, or via merge) as implemented in __mmap_complete()
+and do_brk_flags().
+
+However, when performing a merge of existing mappings such as when
+performing mprotect(), we may lose the VM_SOFTDIRTY flag.
+
+Now we have the concept of making VMA flags 'sticky', that is that they
+both don't prevent merge and, importantly, are propagated to merged VMAs,
+this seems a sensible alternative to the existing special-casing of
+VM_SOFTDIRTY.
+
+We additionally add a self-test that demonstrates that this logic behaves
+as expected.
+
+This patch (of 2):
+
+Currently we set VM_SOFTDIRTY when a new mapping is set up (whether by
+establishing a new VMA, or via merge) as implemented in __mmap_complete()
+and do_brk_flags().
+
+However, when performing a merge of existing mappings such as when
+performing mprotect(), we may lose the VM_SOFTDIRTY flag.
+
+This is because currently we simply ignore VM_SOFTDIRTY for the purposes
+of merge, so one VMA may possess the flag and another not, and whichever
+happens to be the target VMA will be the one upon which the merge is
+performed which may or may not have VM_SOFTDIRTY set.
+
+Now we have the concept of 'sticky' VMA flags, let's make VM_SOFTDIRTY one
+which solves this issue.
+
+Additionally update VMA userland tests to propagate changes.
+
+[akpm@linux-foundation.org: update comments, per Lorenzo]
+ Link: https://lkml.kernel.org/r/0019e0b8-ee1e-4359-b5ee-94225cbe5588@lucifer.local
+Link: https://lkml.kernel.org/r/cover.1763399675.git.ljs@kernel.org
+Link: https://lkml.kernel.org/r/955478b5170715c895d1ef3b7f68e0cd77f76868.1763399675.git.ljs@kernel.org
+Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
+Suggested-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Acked-by: Andrey Vagin <avagin@gmail.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Cyrill Gorcunov <gorcunov@gmail.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ahmed Elaidy <elaidya225@gmail.com>
+Fixes: 34228d473efe ("mm: ignore VM_SOFTDIRTY on VMA merging")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h | 15 +++++++--------
+ tools/testing/vma/vma_internal.h | 18 ++++++------------
+ 2 files changed, 13 insertions(+), 20 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -515,28 +515,27 @@ extern unsigned int kobjsize(const void
+ * possesses it but the other does not, the merged VMA should nonetheless have
+ * applied to it:
+ *
++ * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
++ * references cleared via /proc/$pid/clear_refs, any merged VMA
++ * should be considered soft-dirty also as it operates at a VMA
++ * granularity.
++ *
+ * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that
+ * mapped page tables may contain metadata not described by the
+ * VMA and thus any merged VMA may also contain this metadata,
+ * and thus we must make this flag sticky.
+ */
+-#define VM_STICKY VM_MAYBE_GUARD
++#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
+
+ /*
+ * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
+ * of these flags and the other not does not preclude a merge.
+ *
+- * VM_SOFTDIRTY - Should not prevent from VMA merging, if we match the flags but
+- * dirty bit -- the caller should mark merged VMA as dirty. If
+- * dirty bit won't be excluded from comparison, we increase
+- * pressure on the memory system forcing the kernel to generate
+- * new VMAs when old one could be extended instead.
+- *
+ * VM_STICKY - When merging VMAs, VMA flags must match, unless they are
+ * 'sticky'. If any sticky flags exist in either VMA, we simply
+ * set all of them on the merged VMA.
+ */
+-#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY)
++#define VM_IGNORE_MERGE VM_STICKY
+
+ /*
+ * Flags which should result in page tables being copied on fork. These are
+--- a/tools/testing/vma/vma_internal.h
++++ b/tools/testing/vma/vma_internal.h
+@@ -122,28 +122,22 @@ extern unsigned long dac_mmap_min_addr;
+ * possesses it but the other does not, the merged VMA should nonetheless have
+ * applied to it:
+ *
+- * VM_MAYBE_GUARD - If a VMA may have guard regions in place it implies that
+- * mapped page tables may contain metadata not described by the
+- * VMA and thus any merged VMA may also contain this metadata,
+- * and thus we must make this flag sticky.
++ * VM_SOFTDIRTY - if a VMA is marked soft-dirty, that is has not had its
++ * references cleared via /proc/$pid/clear_refs, any merged VMA
++ * should be considered soft-dirty also as it operates at a VMA
++ * granularity.
+ */
+-#define VM_STICKY VM_MAYBE_GUARD
++#define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
+
+ /*
+ * VMA flags we ignore for the purposes of merge, i.e. one VMA possessing one
+ * of these flags and the other not does not preclude a merge.
+ *
+- * VM_SOFTDIRTY - Should not prevent from VMA merging, if we match the flags but
+- * dirty bit -- the caller should mark merged VMA as dirty. If
+- * dirty bit won't be excluded from comparison, we increase
+- * pressure on the memory system forcing the kernel to generate
+- * new VMAs when old one could be extended instead.
+- *
+ * VM_STICKY - When merging VMAs, VMA flags must match, unless they are
+ * 'sticky'. If any sticky flags exist in either VMA, we simply
+ * set all of them on the merged VMA.
+ */
+-#define VM_IGNORE_MERGE (VM_SOFTDIRTY | VM_STICKY)
++#define VM_IGNORE_MERGE VM_STICKY
+
+ /*
+ * Flags which should result in page tables being copied on fork. These are
--- /dev/null
+From stable+bounces-247753-greg=kroah.com@vger.kernel.org Fri May 15 14:05:57 2026
+From: Ahmed Elaidy <elaidya225@gmail.com>
+Date: Fri, 15 May 2026 15:42:16 +0300
+Subject: mm: set the VM_MAYBE_GUARD flag on guard region install
+To: stable@vger.kernel.org
+Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, Vlastimil Babka <vbabka@suse.cz>, Baolin Wang <baolin.wang@linux.alibaba.com>, Barry Song <baohua@kernel.org>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Dev Jain <dev.jain@arm.com>, Jann Horn <jannh@google.com>, Jonathan Corbet <corbet@lwn.net>, Lance Yang <lance.yang@linux.dev>, Liam Howlett <liam.howlett@oracle.com>, "Masami Hiramatsu (Google)" <mhiramat@kernel.org>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>, Nico Pache <npache@redhat.com>, Pedro Falcato <pfalcato@suse.de>, Ryan Roberts <ryan.roberts@arm.com>, Steven Rostedt <rostedt@goodmis.org>, Suren Baghdasaryan <surenb@google.com>, Zi Yan <ziy@nvidia.com>, Ahmed Elaidy <elaidya225@gmail.com>
+Message-ID: <20260515124218.151966-8-elaidya225@gmail.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+commit 49e14dabed7a294427588d4b315f57fbfcab9990 upstream.
+
+Now we have established the VM_MAYBE_GUARD flag and added the capacity to
+set it atomically, do so upon MADV_GUARD_INSTALL.
+
+The places where this flag is used currently and matter are:
+
+* VMA merge - performed under mmap/VMA write lock, therefore excluding
+ racing writes.
+
+* /proc/$pid/smaps - can race the write, however this isn't meaningful
+ as the flag write is performed at the point of the guard region being
+ established, and thus an smaps reader can't reasonably expect to avoid
+ races. Due to atomicity, a reader will observe either the flag being
+ set or not. Therefore consistency will be maintained.
+
+In all other cases the flag being set is irrelevant and atomicity
+guarantees other flags will be read correctly.
+
+Note that non-atomic updates of unrelated flags do not cause an issue with
+this flag being set atomically, as writes of other flags are performed
+under mmap/VMA write lock, and these atomic writes are performed under
+mmap/VMA read lock, which excludes the write, avoiding RMW races.
+
+Note that we do not encounter issues with KCSAN by adjusting this flag
+atomically, as we are only updating a single bit in the flag bitmap and
+therefore we do not need to annotate these changes.
+
+We intentionally set this flag in advance of actually updating the page
+tables, to ensure that any racing atomic read of this flag will only
+return false prior to page tables being updated, to allow for
+serialisation via page table locks.
+
+Note that we set vma->anon_vma for anonymous mappings. This is because
+the expectation for anonymous mappings is that an anon_vma is established
+should they possess any page table mappings. This is also consistent with
+what we were doing prior to this patch (unconditionally setting anon_vma
+on guard region installation).
+
+We also need to update retract_page_tables() to ensure that madvise(...,
+MADV_COLLAPSE) doesn't incorrectly collapse file-backed ranges contain
+guard regions.
+
+This was previously guarded by anon_vma being set to catch MAP_PRIVATE
+cases, but the introduction of VM_MAYBE_GUARD necessitates that we check
+this flag instead.
+
+We utilise vma_flag_test_atomic() to do so - we first perform an
+optimistic check, then after the PTE page table lock is held, we can check
+again safely, as upon guard marker install the flag is set atomically
+prior to the page table lock being taken to actually apply it.
+
+So if the initial check fails either:
+
+* Page table retraction acquires page table lock prior to VM_MAYBE_GUARD
+ being set - guard marker installation will be blocked until page table
+ retraction is complete.
+
+OR:
+
+* Guard marker installation acquires page table lock after setting
+ VM_MAYBE_GUARD, which raced and didn't pick this up in the initial
+ optimistic check, blocking page table retraction until the guard regions
+ are installed - the second VM_MAYBE_GUARD check will prevent page table
+ retraction.
+
+Either way we're safe.
+
+We refactor the retraction checks into a single
+file_backed_vma_is_retractable(), there doesn't seem to be any reason that
+the checks were separated as before.
+
+Note that VM_MAYBE_GUARD being set atomically remains correct as
+vma_needs_copy() is invoked with the mmap and VMA write locks held,
+excluding any race with madvise_guard_install().
+
+Link: https://lkml.kernel.org/r/e9e9ce95b6ac17497de7f60fc110c7dd9e489e8d.1763460113.git.ljs@kernel.org
+Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: David Hildenbrand (Red Hat) <david@kernel.org>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Lance Yang <lance.yang@linux.dev>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Nico Pache <npache@redhat.com>
+Cc: Pedro Falcato <pfalcato@suse.de>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ahmed Elaidy <elaidya225@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/khugepaged.c | 71 +++++++++++++++++++++++++++++++++++++-------------------
+ mm/madvise.c | 22 +++++++++++------
+ 2 files changed, 61 insertions(+), 32 deletions(-)
+
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -1715,6 +1715,43 @@ drop_folio:
+ return result;
+ }
+
++/* Can we retract page tables for this file-backed VMA? */
++static bool file_backed_vma_is_retractable(struct vm_area_struct *vma)
++{
++ /*
++ * Check vma->anon_vma to exclude MAP_PRIVATE mappings that
++ * got written to. These VMAs are likely not worth removing
++ * page tables from, as PMD-mapping is likely to be split later.
++ */
++ if (READ_ONCE(vma->anon_vma))
++ return false;
++
++ /*
++ * When a vma is registered with uffd-wp, we cannot recycle
++ * the page table because there may be pte markers installed.
++ * Other vmas can still have the same file mapped hugely, but
++ * skip this one: it will always be mapped in small page size
++ * for uffd-wp registered ranges.
++ */
++ if (userfaultfd_wp(vma))
++ return false;
++
++ /*
++ * If the VMA contains guard regions then we can't collapse it.
++ *
++ * This is set atomically on guard marker installation under mmap/VMA
++ * read lock, and here we may not hold any VMA or mmap lock at all.
++ *
++ * This is therefore serialised on the PTE page table lock, which is
++ * obtained on guard region installation after the flag is set, so this
++ * check being performed under this lock excludes races.
++ */
++ if (vma_flag_test_atomic(vma, VM_MAYBE_GUARD_BIT))
++ return false;
++
++ return true;
++}
++
+ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
+ {
+ struct vm_area_struct *vma;
+@@ -1729,14 +1766,6 @@ static void retract_page_tables(struct a
+ spinlock_t *ptl;
+ bool success = false;
+
+- /*
+- * Check vma->anon_vma to exclude MAP_PRIVATE mappings that
+- * got written to. These VMAs are likely not worth removing
+- * page tables from, as PMD-mapping is likely to be split later.
+- */
+- if (READ_ONCE(vma->anon_vma))
+- continue;
+-
+ addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ if (addr & ~HPAGE_PMD_MASK ||
+ vma->vm_end < addr + HPAGE_PMD_SIZE)
+@@ -1748,14 +1777,8 @@ static void retract_page_tables(struct a
+
+ if (hpage_collapse_test_exit(mm))
+ continue;
+- /*
+- * When a vma is registered with uffd-wp, we cannot recycle
+- * the page table because there may be pte markers installed.
+- * Other vmas can still have the same file mapped hugely, but
+- * skip this one: it will always be mapped in small page size
+- * for uffd-wp registered ranges.
+- */
+- if (userfaultfd_wp(vma))
++
++ if (!file_backed_vma_is_retractable(vma))
+ continue;
+
+ /* PTEs were notified when unmapped; but now for the PMD? */
+@@ -1782,15 +1805,15 @@ static void retract_page_tables(struct a
+ spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
+
+ /*
+- * Huge page lock is still held, so normally the page table
+- * must remain empty; and we have already skipped anon_vma
+- * and userfaultfd_wp() vmas. But since the mmap_lock is not
+- * held, it is still possible for a racing userfaultfd_ioctl()
+- * to have inserted ptes or markers. Now that we hold ptlock,
+- * repeating the anon_vma check protects from one category,
+- * and repeating the userfaultfd_wp() check from another.
++ * Huge page lock is still held, so normally the page table must
++ * remain empty; and we have already skipped anon_vma and
++ * userfaultfd_wp() vmas. But since the mmap_lock is not held,
++ * it is still possible for a racing userfaultfd_ioctl() or
++ * madvise() to have inserted ptes or markers. Now that we hold
++ * ptlock, repeating the retractable checks protects us from
++ * races against the prior checks.
+ */
+- if (likely(!vma->anon_vma && !userfaultfd_wp(vma))) {
++ if (likely(file_backed_vma_is_retractable(vma))) {
+ pgt_pmd = pmdp_collapse_flush(vma, addr, pmd);
+ pmdp_get_lockless_sync();
+ success = true;
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -1141,15 +1141,21 @@ static long madvise_guard_install(struct
+ return -EINVAL;
+
+ /*
+- * If we install guard markers, then the range is no longer
+- * empty from a page table perspective and therefore it's
+- * appropriate to have an anon_vma.
+- *
+- * This ensures that on fork, we copy page tables correctly.
++ * Set atomically under read lock. All pertinent readers will need to
++ * acquire an mmap/VMA write lock to read it. All remaining readers may
++ * or may not see the flag set, but we don't care.
+ */
+- err = anon_vma_prepare(vma);
+- if (err)
+- return err;
++ vma_flag_set_atomic(vma, VM_MAYBE_GUARD_BIT);
++
++ /*
++ * If anonymous and we are establishing page tables the VMA ought to
++ * have an anon_vma associated with it.
++ */
++ if (vma_is_anonymous(vma)) {
++ err = anon_vma_prepare(vma);
++ if (err)
++ return err;
++ }
+
+ /*
+ * Optimistically try to install the guard marker pages first. If any
--- /dev/null
+From stable+bounces-247750-greg=kroah.com@vger.kernel.org Fri May 15 14:05:39 2026
+From: Ahmed Elaidy <elaidya225@gmail.com>
+Date: Fri, 15 May 2026 15:42:13 +0300
+Subject: mm: update vma_modify_flags() to handle residual flags, document
+To: stable@vger.kernel.org
+Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, Pedro Falcato <pfalcato@suse.de>, Vlastimil Babka <vbabka@suse.cz>, Baolin Wang <baolin.wang@linux.alibaba.com>, Barry Song <baohua@kernel.org>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Dev Jain <dev.jain@arm.com>, Jann Horn <jannh@google.com>, Jonathan Corbet <corbet@lwn.net>, Lance Yang <lance.yang@linux.dev>, Liam Howlett <liam.howlett@oracle.com>, "Masami Hiramatsu (Google)" <mhiramat@kernel.org>, Mathieu Desnoyers <mathieu.desnoyers@efficios.com>, Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>, Nico Pache <npache@redhat.com>, Ryan Roberts <ryan.roberts@arm.com>, Steven Rostedt <rostedt@goodmis.org>, Suren Baghdasaryan <surenb@google.com>, Zi Yan <ziy@nvidia.com>, Ahmed Elaidy <elaidya225@gmail.com>
+Message-ID: <20260515124218.151966-5-elaidya225@gmail.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+commit 9119d6c2095bb20292cb9812dd70d37f17e3bd37 upstream.
+
+The vma_modify_*() family of functions each either perform splits, a merge
+or no changes at all in preparation for the requested modification to
+occur.
+
+When doing so for a VMA flags change, we currently don't account for any
+flags which may remain (for instance, VM_SOFTDIRTY) despite the requested
+change in the case that a merge succeeded.
+
+This is made more important by subsequent patches which will introduce the
+concept of sticky VMA flags which rely on this behaviour.
+
+This patch fixes this by passing the VMA flags parameter as a pointer and
+updating it accordingly on merge and updating callers to accommodate for
+this.
+
+Additionally, while we are here, we add kdocs for each of the
+vma_modify_*() functions, as the fact that the requested modification is
+not performed is confusing so it is useful to make this abundantly clear.
+
+We also update the VMA userland tests to account for this change.
+
+Link: https://lkml.kernel.org/r/23b5b549b0eaefb2922625626e58c2a352f3e93c.1763460113.git.ljs@kernel.org
+Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: David Hildenbrand (Red Hat) <david@kernel.org>
+Cc: Dev Jain <dev.jain@arm.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Lance Yang <lance.yang@linux.dev>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Nico Pache <npache@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ahmed Elaidy <elaidya225@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/madvise.c | 2
+ mm/mlock.c | 2
+ mm/mprotect.c | 2
+ mm/mseal.c | 7 +-
+ mm/vma.c | 56 ++++++++++---------
+ mm/vma.h | 138 +++++++++++++++++++++++++++++++++++-------------
+ tools/testing/vma/vma.c | 3 -
+ 7 files changed, 142 insertions(+), 68 deletions(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -167,7 +167,7 @@ static int madvise_update_vma(vm_flags_t
+ range->start, range->end, anon_name);
+ else
+ vma = vma_modify_flags(&vmi, madv_behavior->prev, vma,
+- range->start, range->end, new_flags);
++ range->start, range->end, &new_flags);
+
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -480,7 +480,7 @@ static int mlock_fixup(struct vma_iterat
+ */
+ goto out;
+
+- vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags);
++ vma = vma_modify_flags(vmi, *prev, vma, start, end, &newflags);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto out;
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -813,7 +813,7 @@ mprotect_fixup(struct vma_iterator *vmi,
+ newflags &= ~VM_ACCOUNT;
+ }
+
+- vma = vma_modify_flags(vmi, *pprev, vma, start, end, newflags);
++ vma = vma_modify_flags(vmi, *pprev, vma, start, end, &newflags);
+ if (IS_ERR(vma)) {
+ error = PTR_ERR(vma);
+ goto fail;
+--- a/mm/mseal.c
++++ b/mm/mseal.c
+@@ -69,9 +69,10 @@ static int mseal_apply(struct mm_struct
+ const unsigned long curr_end = MIN(vma->vm_end, end);
+
+ if (!(vma->vm_flags & VM_SEALED)) {
+- vma = vma_modify_flags(&vmi, prev, vma,
+- curr_start, curr_end,
+- vma->vm_flags | VM_SEALED);
++ vm_flags_t vm_flags = vma->vm_flags | VM_SEALED;
++
++ vma = vma_modify_flags(&vmi, prev, vma, curr_start,
++ curr_end, &vm_flags);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+ vm_flags_set(vma, VM_SEALED);
+--- a/mm/vma.c
++++ b/mm/vma.c
+@@ -1676,25 +1676,35 @@ static struct vm_area_struct *vma_modify
+ return vma;
+ }
+
+-struct vm_area_struct *vma_modify_flags(
+- struct vma_iterator *vmi, struct vm_area_struct *prev,
+- struct vm_area_struct *vma, unsigned long start, unsigned long end,
+- vm_flags_t vm_flags)
++struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
++ struct vm_area_struct *prev, struct vm_area_struct *vma,
++ unsigned long start, unsigned long end,
++ vm_flags_t *vm_flags_ptr)
+ {
+ VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
++ const vm_flags_t vm_flags = *vm_flags_ptr;
++ struct vm_area_struct *ret;
+
+ vmg.vm_flags = vm_flags;
+
+- return vma_modify(&vmg);
++ ret = vma_modify(&vmg);
++ if (IS_ERR(ret))
++ return ret;
++
++ /*
++ * For a merge to succeed, the flags must match those requested. For
++ * flags which do not obey typical merge rules (i.e. do not need to
++ * match), we must let the caller know about them.
++ */
++ if (vmg.state == VMA_MERGE_SUCCESS)
++ *vm_flags_ptr = ret->vm_flags;
++ return ret;
+ }
+
+-struct vm_area_struct
+-*vma_modify_name(struct vma_iterator *vmi,
+- struct vm_area_struct *prev,
+- struct vm_area_struct *vma,
+- unsigned long start,
+- unsigned long end,
+- struct anon_vma_name *new_name)
++struct vm_area_struct *vma_modify_name(struct vma_iterator *vmi,
++ struct vm_area_struct *prev, struct vm_area_struct *vma,
++ unsigned long start, unsigned long end,
++ struct anon_vma_name *new_name)
+ {
+ VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
+
+@@ -1703,12 +1713,10 @@ struct vm_area_struct
+ return vma_modify(&vmg);
+ }
+
+-struct vm_area_struct
+-*vma_modify_policy(struct vma_iterator *vmi,
+- struct vm_area_struct *prev,
+- struct vm_area_struct *vma,
+- unsigned long start, unsigned long end,
+- struct mempolicy *new_pol)
++struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi,
++ struct vm_area_struct *prev, struct vm_area_struct *vma,
++ unsigned long start, unsigned long end,
++ struct mempolicy *new_pol)
+ {
+ VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
+
+@@ -1717,14 +1725,10 @@ struct vm_area_struct
+ return vma_modify(&vmg);
+ }
+
+-struct vm_area_struct
+-*vma_modify_flags_uffd(struct vma_iterator *vmi,
+- struct vm_area_struct *prev,
+- struct vm_area_struct *vma,
+- unsigned long start, unsigned long end,
+- vm_flags_t vm_flags,
+- struct vm_userfaultfd_ctx new_ctx,
+- bool give_up_on_oom)
++struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi,
++ struct vm_area_struct *prev, struct vm_area_struct *vma,
++ unsigned long start, unsigned long end, vm_flags_t vm_flags,
++ struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom)
+ {
+ VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
+
+--- a/mm/vma.h
++++ b/mm/vma.h
+@@ -266,47 +266,115 @@ void remove_vma(struct vm_area_struct *v
+ void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
+ struct vm_area_struct *prev, struct vm_area_struct *next);
+
+-/* We are about to modify the VMA's flags. */
+-__must_check struct vm_area_struct
+-*vma_modify_flags(struct vma_iterator *vmi,
++/**
++ * vma_modify_flags() - Peform any necessary split/merge in preparation for
++ * setting VMA flags to *@vm_flags in the range @start to @end contained within
++ * @vma.
++ * @vmi: Valid VMA iterator positioned at @vma.
++ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first.
++ * @vma: The VMA containing the range @start to @end to be updated.
++ * @start: The start of the range to update. May be offset within @vma.
++ * @end: The exclusive end of the range to update, may be offset within @vma.
++ * @vm_flags_ptr: A pointer to the VMA flags that the @start to @end range is
++ * about to be set to. On merge, this will be updated to include any additional
++ * flags which remain in place.
++ *
++ * IMPORTANT: The actual modification being requested here is NOT applied,
++ * rather the VMA is perhaps split, perhaps merged to accommodate the change,
++ * and the caller is expected to perform the actual modification.
++ *
++ * In order to account for VMA flags which may persist (e.g. soft-dirty), the
++ * @vm_flags_ptr parameter points to the requested flags which are then updated
++ * so the caller, should they overwrite any existing flags, correctly retains
++ * these.
++ *
++ * Returns: A VMA which contains the range @start to @end ready to have its
++ * flags altered to *@vm_flags.
++ */
++__must_check struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
+ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+- vm_flags_t vm_flags);
++ vm_flags_t *vm_flags_ptr);
+
+-/* We are about to modify the VMA's anon_name. */
+-__must_check struct vm_area_struct
+-*vma_modify_name(struct vma_iterator *vmi,
+- struct vm_area_struct *prev,
+- struct vm_area_struct *vma,
+- unsigned long start,
+- unsigned long end,
+- struct anon_vma_name *new_name);
+-
+-/* We are about to modify the VMA's memory policy. */
+-__must_check struct vm_area_struct
+-*vma_modify_policy(struct vma_iterator *vmi,
+- struct vm_area_struct *prev,
+- struct vm_area_struct *vma,
++/**
++ * vma_modify_name() - Peform any necessary split/merge in preparation for
++ * setting anonymous VMA name to @new_name in the range @start to @end contained
++ * within @vma.
++ * @vmi: Valid VMA iterator positioned at @vma.
++ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first.
++ * @vma: The VMA containing the range @start to @end to be updated.
++ * @start: The start of the range to update. May be offset within @vma.
++ * @end: The exclusive end of the range to update, may be offset within @vma.
++ * @new_name: The anonymous VMA name that the @start to @end range is about to
++ * be set to.
++ *
++ * IMPORTANT: The actual modification being requested here is NOT applied,
++ * rather the VMA is perhaps split, perhaps merged to accommodate the change,
++ * and the caller is expected to perform the actual modification.
++ *
++ * Returns: A VMA which contains the range @start to @end ready to have its
++ * anonymous VMA name changed to @new_name.
++ */
++__must_check struct vm_area_struct *vma_modify_name(struct vma_iterator *vmi,
++ struct vm_area_struct *prev, struct vm_area_struct *vma,
++ unsigned long start, unsigned long end,
++ struct anon_vma_name *new_name);
++
++/**
++ * vma_modify_policy() - Peform any necessary split/merge in preparation for
++ * setting NUMA policy to @new_pol in the range @start to @end contained
++ * within @vma.
++ * @vmi: Valid VMA iterator positioned at @vma.
++ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first.
++ * @vma: The VMA containing the range @start to @end to be updated.
++ * @start: The start of the range to update. May be offset within @vma.
++ * @end: The exclusive end of the range to update, may be offset within @vma.
++ * @new_pol: The NUMA policy that the @start to @end range is about to be set
++ * to.
++ *
++ * IMPORTANT: The actual modification being requested here is NOT applied,
++ * rather the VMA is perhaps split, perhaps merged to accommodate the change,
++ * and the caller is expected to perform the actual modification.
++ *
++ * Returns: A VMA which contains the range @start to @end ready to have its
++ * NUMA policy changed to @new_pol.
++ */
++__must_check struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi,
++ struct vm_area_struct *prev, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct mempolicy *new_pol);
+
+-/* We are about to modify the VMA's flags and/or uffd context. */
+-__must_check struct vm_area_struct
+-*vma_modify_flags_uffd(struct vma_iterator *vmi,
+- struct vm_area_struct *prev,
+- struct vm_area_struct *vma,
+- unsigned long start, unsigned long end,
+- vm_flags_t vm_flags,
+- struct vm_userfaultfd_ctx new_ctx,
+- bool give_up_on_oom);
+-
+-__must_check struct vm_area_struct
+-*vma_merge_new_range(struct vma_merge_struct *vmg);
+-
+-__must_check struct vm_area_struct
+-*vma_merge_extend(struct vma_iterator *vmi,
+- struct vm_area_struct *vma,
+- unsigned long delta);
++/**
++ * vma_modify_flags_uffd() - Peform any necessary split/merge in preparation for
++ * setting VMA flags to @vm_flags and UFFD context to @new_ctx in the range
++ * @start to @end contained within @vma.
++ * @vmi: Valid VMA iterator positioned at @vma.
++ * @prev: The VMA immediately prior to @vma or NULL if @vma is the first.
++ * @vma: The VMA containing the range @start to @end to be updated.
++ * @start: The start of the range to update. May be offset within @vma.
++ * @end: The exclusive end of the range to update, may be offset within @vma.
++ * @vm_flags: The VMA flags that the @start to @end range is about to be set to.
++ * @new_ctx: The userfaultfd context that the @start to @end range is about to
++ * be set to.
++ * @give_up_on_oom: If an out of memory condition occurs on merge, simply give
++ * up on it and treat the merge as best-effort.
++ *
++ * IMPORTANT: The actual modification being requested here is NOT applied,
++ * rather the VMA is perhaps split, perhaps merged to accommodate the change,
++ * and the caller is expected to perform the actual modification.
++ *
++ * Returns: A VMA which contains the range @start to @end ready to have its VMA
++ * flags changed to @vm_flags and its userfaultfd context changed to @new_ctx.
++ */
++__must_check struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi,
++ struct vm_area_struct *prev, struct vm_area_struct *vma,
++ unsigned long start, unsigned long end, vm_flags_t vm_flags,
++ struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom);
++
++__must_check struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg);
++
++__must_check struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
++ struct vm_area_struct *vma, unsigned long delta);
+
+ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb);
+
+--- a/tools/testing/vma/vma.c
++++ b/tools/testing/vma/vma.c
+@@ -339,6 +339,7 @@ static bool test_simple_modify(void)
+ struct mm_struct mm = {};
+ struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags);
+ VMA_ITERATOR(vmi, &mm, 0x1000);
++ vm_flags_t flags = VM_READ | VM_MAYREAD;
+
+ ASSERT_FALSE(attach_vma(&mm, init_vma));
+
+@@ -347,7 +348,7 @@ static bool test_simple_modify(void)
+ * performs the merge/split only.
+ */
+ vma = vma_modify_flags(&vmi, init_vma, init_vma,
+- 0x1000, 0x2000, VM_READ | VM_MAYREAD);
++ 0x1000, 0x2000, &flags);
+ ASSERT_NE(vma, NULL);
+ /* We modify the provided VMA, and on split allocate new VMAs. */
+ ASSERT_EQ(vma, init_vma);
drivers-hv-vmbus-improve-the-logic-of-reserving-fb_mmio-on-gen2-vms.patch
firmware-samsung-acpm-fix-cross-thread-rx-length-corruption.patch
sctp-disable-bh-before-calling-udp_tunnel_xmit_skb.patch
+mm-introduce-vm_maybe_guard-and-make-visible-in-proc-pid-smaps.patch
+mm-add-atomic-vma-flags-and-set-vm_maybe_guard-as-such.patch
+mm-update-vma_modify_flags-to-handle-residual-flags-document.patch
+mm-implement-sticky-vma-flags.patch
+mm-introduce-copy-on-fork-vmas-and-make-vm_maybe_guard-one.patch
+mm-set-the-vm_maybe_guard-flag-on-guard-region-install.patch
+mm-propagate-vm_softdirty-on-merge.patch
+testing-selftests-mm-add-soft-dirty-merge-self-test.patch
--- /dev/null
+From stable+bounces-247756-greg=kroah.com@vger.kernel.org Fri May 15 14:06:06 2026
+From: Ahmed Elaidy <elaidya225@gmail.com>
+Date: Fri, 15 May 2026 15:42:19 +0300
+Subject: testing/selftests/mm: add soft-dirty merge self-test
+To: stable@vger.kernel.org
+Cc: linux-mm@kvack.org, akpm@linux-foundation.org, ljs@kernel.org, avagin@gmail.com, Lorenzo Stoakes <lorenzo.stoakes@oracle.com>, "David Hildenbrand (Red Hat)" <david@kernel.org>, Jann Horn <jannh@google.com>, Liam Howlett <liam.howlett@oracle.com>, Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>, Pedro Falcato <pfalcato@suse.de>, Suren Baghdasaryan <surenb@google.com>, Vlastimil Babka <vbabka@suse.cz>, Cyrill Gorcunov <gorcunov@gmail.com>, Ahmed Elaidy <elaidya225@gmail.com>
+Message-ID: <20260515124218.151966-11-elaidya225@gmail.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+commit c7ba92bcfea34f6b4afc744c3b65c8f7420fefe0 upstream.
+
+Assert that we correctly merge VMAs containing VM_SOFTDIRTY flags now that
+we correctly handle these as sticky.
+
+In order to do so, we have to account for the fact the pagemap interface
+checks soft dirty PTEs and additionally that newly merged VMAs are marked
+VM_SOFTDIRTY.
+
+We do this by using use unfaulted anon VMAs, establishing one and clearing
+references on that one, before establishing another and merging the two
+before checking that soft-dirty is propagated as expected.
+
+We check that this functions correctly with mremap() and mprotect() as
+sample cases, because VMA merge of adjacent newly mapped VMAs will
+automatically be made soft-dirty due to existing logic which does so.
+
+We are therefore exercising other means of merging VMAs.
+
+Link: https://lkml.kernel.org/r/d5a0f735783fb4f30a604f570ede02ccc5e29be9.1763399675.git.ljs@kernel.org
+Signed-off-by: Lorenzo Stoakes <ljs@kernel.org>
+Cc: Andrey Vagin <avagin@gmail.com>
+Cc: David Hildenbrand (Red Hat) <david@kernel.org>
+Cc: Jann Horn <jannh@google.com>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Pedro Falcato <pfalcato@suse.de>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Cyrill Gorcunov <gorcunov@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ahmed Elaidy <elaidya225@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/mm/soft-dirty.c | 127 +++++++++++++++++++++++++++++++-
+ 1 file changed, 126 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/mm/soft-dirty.c
++++ b/tools/testing/selftests/mm/soft-dirty.c
+@@ -184,6 +184,130 @@ static void test_mprotect(int pagemap_fd
+ close(test_fd);
+ }
+
++static void test_merge(int pagemap_fd, int pagesize)
++{
++ char *reserved, *map, *map2;
++
++ /*
++ * Reserve space for tests:
++ *
++ * ---padding to ---
++ * | avoid adj. |
++ * v merge v
++ * |---|---|---|---|---|
++ * | | 1 | 2 | 3 | |
++ * |---|---|---|---|---|
++ */
++ reserved = mmap(NULL, 5 * pagesize, PROT_NONE,
++ MAP_ANON | MAP_PRIVATE, -1, 0);
++ if (reserved == MAP_FAILED)
++ ksft_exit_fail_msg("mmap failed\n");
++ munmap(reserved, 4 * pagesize);
++
++ /*
++ * Establish initial VMA:
++ *
++ * S/D
++ * |---|---|---|---|---|
++ * | | 1 | | | |
++ * |---|---|---|---|---|
++ */
++ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE,
++ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
++ if (map == MAP_FAILED)
++ ksft_exit_fail_msg("mmap failed\n");
++
++ /* This will clear VM_SOFTDIRTY too. */
++ clear_softdirty();
++
++ /*
++ * Now place a new mapping which will be marked VM_SOFTDIRTY. Away from
++ * map:
++ *
++ * - S/D
++ * |---|---|---|---|---|
++ * | | 1 | | 2 | |
++ * |---|---|---|---|---|
++ */
++ map2 = mmap(&reserved[3 * pagesize], pagesize, PROT_READ | PROT_WRITE,
++ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
++ if (map2 == MAP_FAILED)
++ ksft_exit_fail_msg("mmap failed\n");
++
++ /*
++ * Now remap it immediately adjacent to map, if the merge correctly
++ * propagates VM_SOFTDIRTY, we should then observe the VMA as a whole
++ * being marked soft-dirty:
++ *
++ * merge
++ * S/D
++ * |---|-------|---|---|
++ * | | 1 | | |
++ * |---|-------|---|---|
++ */
++ map2 = mremap(map2, pagesize, pagesize, MREMAP_FIXED | MREMAP_MAYMOVE,
++ &reserved[2 * pagesize]);
++ if (map2 == MAP_FAILED)
++ ksft_exit_fail_msg("mremap failed\n");
++ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
++ "Test %s-anon soft-dirty after remap merge 1st pg\n",
++ __func__);
++ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
++ "Test %s-anon soft-dirty after remap merge 2nd pg\n",
++ __func__);
++
++ munmap(map, 2 * pagesize);
++
++ /*
++ * Now establish another VMA:
++ *
++ * S/D
++ * |---|---|---|---|---|
++ * | | 1 | | | |
++ * |---|---|---|---|---|
++ */
++ map = mmap(&reserved[pagesize], pagesize, PROT_READ | PROT_WRITE,
++ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
++ if (map == MAP_FAILED)
++ ksft_exit_fail_msg("mmap failed\n");
++
++ /* Clear VM_SOFTDIRTY... */
++ clear_softdirty();
++ /* ...and establish incompatible adjacent VMA:
++ *
++ * - S/D
++ * |---|---|---|---|---|
++ * | | 1 | 2 | | |
++ * |---|---|---|---|---|
++ */
++ map2 = mmap(&reserved[2 * pagesize], pagesize,
++ PROT_READ | PROT_WRITE | PROT_EXEC,
++ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
++ if (map2 == MAP_FAILED)
++ ksft_exit_fail_msg("mmap failed\n");
++
++ /*
++ * Now mprotect() VMA 1 so it's compatible with 2 and therefore merges:
++ *
++ * merge
++ * S/D
++ * |---|-------|---|---|
++ * | | 1 | | |
++ * |---|-------|---|---|
++ */
++ if (mprotect(map, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC))
++ ksft_exit_fail_msg("mprotect failed\n");
++
++ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
++ "Test %s-anon soft-dirty after mprotect merge 1st pg\n",
++ __func__);
++ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
++ "Test %s-anon soft-dirty after mprotect merge 2nd pg\n",
++ __func__);
++
++ munmap(map, 2 * pagesize);
++}
++
+ static void test_mprotect_anon(int pagemap_fd, int pagesize)
+ {
+ test_mprotect(pagemap_fd, pagesize, true);
+@@ -204,7 +328,7 @@ int main(int argc, char **argv)
+ if (!softdirty_supported())
+ ksft_exit_skip("soft-dirty is not support\n");
+
+- ksft_set_plan(15);
++ ksft_set_plan(19);
+ pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH);
+@@ -216,6 +340,7 @@ int main(int argc, char **argv)
+ test_hugepage(pagemap_fd, pagesize);
+ test_mprotect_anon(pagemap_fd, pagesize);
+ test_mprotect_file(pagemap_fd, pagesize);
++ test_merge(pagemap_fd, pagesize);
+
+ close(pagemap_fd);
+