]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 Aug 2025 13:28:09 +0000 (15:28 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 Aug 2025 13:28:09 +0000 (15:28 +0200)
added patches:
mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch
mm-reinstate-ability-to-map-write-sealed-memfd-mappings-read-only.patch
mm-update-memfd-seal-write-check-to-include-f_seal_write.patch
selftests-memfd-add-test-for-mapping-write-sealed-memfd-read-only.patch

queue-5.15/mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch [new file with mode: 0644]
queue-5.15/mm-reinstate-ability-to-map-write-sealed-memfd-mappings-read-only.patch [new file with mode: 0644]
queue-5.15/mm-update-memfd-seal-write-check-to-include-f_seal_write.patch [new file with mode: 0644]
queue-5.15/selftests-memfd-add-test-for-mapping-write-sealed-memfd-read-only.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch b/queue-5.15/mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch
new file mode 100644 (file)
index 0000000..5c0c6dd
--- /dev/null
@@ -0,0 +1,195 @@
+From stable+bounces-165169-greg=kroah.com@vger.kernel.org Wed Jul 30 03:54:58 2025
+From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
+Date: Tue, 29 Jul 2025 18:53:30 -0700
+Subject: mm: drop the assumption that VM_SHARED always implies writable
+To: lorenzo.stoakes@oracle.com, gregkh@linuxfoundation.org,  Alexander Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,  Andrew Morton <akpm@linux-foundation.org>, David Hildenbrand <david@redhat.com>,  "Liam R. Howlett" <Liam.Howlett@oracle.com>, Vlastimil Babka <vbabka@suse.cz>,  Mike Rapoport <rppt@kernel.org>, Suren Baghdasaryan <surenb@google.com>, Michal Hocko <mhocko@suse.com>,  Kees Cook <kees@kernel.org>, Ingo Molnar <mingo@redhat.com>,  Peter Zijlstra <peterz@infradead.org>, Juri Lelli <juri.lelli@redhat.com>,  Vincent Guittot <vincent.guittot@linaro.org>, Dietmar Eggemann <dietmar.eggemann@arm.com>,  Steven Rostedt <rostedt@goodmis.org>, Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,  Valentin Schneider <vschneid@redhat.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Jann Horn <jannh@google.com>,  Pedro Falcato <pfalcato@suse.de>
+Cc: aliceryhl@google.com, stable@vger.kernel.org,  "Isaac J. Manjarres" <isaacmanjarres@google.com>, kernel-team@android.com,  Lorenzo Stoakes <lstoakes@gmail.com>, Andy Lutomirski <luto@kernel.org>, Hugh Dickins <hughd@google.com>,  Mike Kravetz <mike.kravetz@oracle.com>, Muchun Song <muchun.song@linux.dev>,  linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,  linux-mm@kvack.org
+Message-ID: <20250730015337.31730-2-isaacmanjarres@google.com>
+
+From: Lorenzo Stoakes <lstoakes@gmail.com>
+
+[ Upstream commit e8e17ee90eaf650c855adb0a3e5e965fd6692ff1 ]
+
+Patch series "permit write-sealed memfd read-only shared mappings", v4.
+
+The man page for fcntl() describing memfd file seals states the following
+about F_SEAL_WRITE:-
+
+    Furthermore, trying to create new shared, writable memory-mappings via
+    mmap(2) will also fail with EPERM.
+
+With emphasis on 'writable'.  In turns out in fact that currently the
+kernel simply disallows all new shared memory mappings for a memfd with
+F_SEAL_WRITE applied, rendering this documentation inaccurate.
+
+This matters because users are therefore unable to obtain a shared mapping
+to a memfd after write sealing altogether, which limits their usefulness.
+This was reported in the discussion thread [1] originating from a bug
+report [2].
+
+This is a product of both using the struct address_space->i_mmap_writable
+atomic counter to determine whether writing may be permitted, and the
+kernel adjusting this counter when any VM_SHARED mapping is performed and
+more generally implicitly assuming VM_SHARED implies writable.
+
+It seems sensible that we should only update this mapping if VM_MAYWRITE
+is specified, i.e.  whether it is possible that this mapping could at any
+point be written to.
+
+If we do so then all we need to do to permit write seals to function as
+documented is to clear VM_MAYWRITE when mapping read-only.  It turns out
+this functionality already exists for F_SEAL_FUTURE_WRITE - we can
+therefore simply adapt this logic to do the same for F_SEAL_WRITE.
+
+We then hit a chicken and egg situation in mmap_region() where the check
+for VM_MAYWRITE occurs before we are able to clear this flag.  To work
+around this, perform this check after we invoke call_mmap(), with careful
+consideration of error paths.
+
+Thanks to Andy Lutomirski for the suggestion!
+
+[1]:https://lore.kernel.org/all/20230324133646.16101dfa666f253c4715d965@linux-foundation.org/
+[2]:https://bugzilla.kernel.org/show_bug.cgi?id=217238
+
+This patch (of 3):
+
+There is a general assumption that VMAs with the VM_SHARED flag set are
+writable.  If the VM_MAYWRITE flag is not set, then this is simply not the
+case.
+
+Update those checks which affect the struct address_space->i_mmap_writable
+field to explicitly test for this by introducing
+[vma_]is_shared_maywrite() helper functions.
+
+This remains entirely conservative, as the lack of VM_MAYWRITE guarantees
+that the VMA cannot be written to.
+
+Link: https://lkml.kernel.org/r/cover.1697116581.git.lstoakes@gmail.com
+Link: https://lkml.kernel.org/r/d978aefefa83ec42d18dfa964ad180dbcde34795.1697116581.git.lstoakes@gmail.com
+Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
+Suggested-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+[isaacmanjarres: resolved merge conflicts due to
+due to refactoring that happened in upstream commit
+5de195060b2e ("mm: resolve faulty mmap_region() error path behaviour")]
+Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/fs.h |    4 ++--
+ include/linux/mm.h |   11 +++++++++++
+ kernel/fork.c      |    2 +-
+ mm/filemap.c       |    2 +-
+ mm/madvise.c       |    2 +-
+ mm/mmap.c          |    6 +++---
+ 6 files changed, 19 insertions(+), 8 deletions(-)
+
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -444,7 +444,7 @@ int pagecache_write_end(struct file *, s
+  *   It is also used to block modification of page cache contents through
+  *   memory mappings.
+  * @gfp_mask: Memory allocation flags to use for allocating pages.
+- * @i_mmap_writable: Number of VM_SHARED mappings.
++ * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
+  * @nr_thps: Number of THPs in the pagecache (non-shmem only).
+  * @i_mmap: Tree of private and shared mappings.
+  * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
+@@ -542,7 +542,7 @@ static inline int mapping_mapped(struct
+ /*
+  * Might pages of this file have been modified in userspace?
+- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
++ * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
+  * marks vma as VM_SHARED if it is shared, and the file was opened for
+  * writing i.e. vma may be mprotected writable even if now readonly.
+  *
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -706,6 +706,17 @@ static inline bool vma_is_accessible(str
+       return vma->vm_flags & VM_ACCESS_FLAGS;
+ }
++static inline bool is_shared_maywrite(vm_flags_t vm_flags)
++{
++      return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
++              (VM_SHARED | VM_MAYWRITE);
++}
++
++static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
++{
++      return is_shared_maywrite(vma->vm_flags);
++}
++
+ #ifdef CONFIG_SHMEM
+ /*
+  * The vma_is_shmem is not inline because it is used only by slow
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -580,7 +580,7 @@ static __latent_entropy int dup_mmap(str
+                       get_file(file);
+                       i_mmap_lock_write(mapping);
+-                      if (tmp->vm_flags & VM_SHARED)
++                      if (vma_is_shared_maywrite(tmp))
+                               mapping_allow_writable(mapping);
+                       flush_dcache_mmap_lock(mapping);
+                       /* insert tmp into the share list, just after mpnt */
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3408,7 +3408,7 @@ int generic_file_mmap(struct file *file,
+  */
+ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+-      if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
++      if (vma_is_shared_maywrite(vma))
+               return -EINVAL;
+       return generic_file_mmap(file, vma);
+ }
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -908,7 +908,7 @@ static long madvise_remove(struct vm_are
+                       return -EINVAL;
+       }
+-      if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
++      if (!vma_is_shared_maywrite(vma))
+               return -EACCES;
+       offset = (loff_t)(start - vma->vm_start)
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -148,7 +148,7 @@ void vma_set_page_prot(struct vm_area_st
+ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
+               struct file *file, struct address_space *mapping)
+ {
+-      if (vma->vm_flags & VM_SHARED)
++      if (vma_is_shared_maywrite(vma))
+               mapping_unmap_writable(mapping);
+       flush_dcache_mmap_lock(mapping);
+@@ -664,7 +664,7 @@ static void __vma_link_file(struct vm_ar
+       if (file) {
+               struct address_space *mapping = file->f_mapping;
+-              if (vma->vm_flags & VM_SHARED)
++              if (vma_is_shared_maywrite(vma))
+                       mapping_allow_writable(mapping);
+               flush_dcache_mmap_lock(mapping);
+@@ -2918,7 +2918,7 @@ unsigned long mmap_region(struct file *f
+               return -EINVAL;
+       /* Map writable and ensure this isn't a sealed memfd. */
+-      if (file && (vm_flags & VM_SHARED)) {
++      if (file && is_shared_maywrite(vm_flags)) {
+               int error = mapping_map_writable(file->f_mapping);
+               if (error)
diff --git a/queue-5.15/mm-reinstate-ability-to-map-write-sealed-memfd-mappings-read-only.patch b/queue-5.15/mm-reinstate-ability-to-map-write-sealed-memfd-mappings-read-only.patch
new file mode 100644 (file)
index 0000000..ed05453
--- /dev/null
@@ -0,0 +1,234 @@
+From stable+bounces-165171-greg=kroah.com@vger.kernel.org Wed Jul 30 03:55:14 2025
+From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
+Date: Tue, 29 Jul 2025 18:53:32 -0700
+Subject: mm: reinstate ability to map write-sealed memfd mappings read-only
+To: lorenzo.stoakes@oracle.com, gregkh@linuxfoundation.org,  Hugh Dickins <hughd@google.com>, Baolin Wang <baolin.wang@linux.alibaba.com>,  Andrew Morton <akpm@linux-foundation.org>, David Hildenbrand <david@redhat.com>,  "Liam R. Howlett" <Liam.Howlett@oracle.com>, Vlastimil Babka <vbabka@suse.cz>,  Mike Rapoport <rppt@kernel.org>, Suren Baghdasaryan <surenb@google.com>, Michal Hocko <mhocko@suse.com>,  Jann Horn <jannh@google.com>, Pedro Falcato <pfalcato@suse.de>
+Cc: aliceryhl@google.com, stable@vger.kernel.org,  "Isaac J. Manjarres" <isaacmanjarres@google.com>, kernel-team@android.com,  Julian Orth <ju.orth@gmail.com>, "Liam R. Howlett" <Liam.Howlett@Oracle.com>,  Linus Torvalds <torvalds@linux-foundation.org>, Shuah Khan <shuah@kernel.org>, linux-mm@kvack.org,  linux-kernel@vger.kernel.org
+Message-ID: <20250730015337.31730-4-isaacmanjarres@google.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+[ Upstream commit 8ec396d05d1b737c87311fb7311f753b02c2a6b1 ]
+
+Patch series "mm: reinstate ability to map write-sealed memfd mappings
+read-only".
+
+In commit 158978945f31 ("mm: perform the mapping_map_writable() check
+after call_mmap()") (and preceding changes in the same series) it became
+possible to mmap() F_SEAL_WRITE sealed memfd mappings read-only.
+
+Commit 5de195060b2e ("mm: resolve faulty mmap_region() error path
+behaviour") unintentionally undid this logic by moving the
+mapping_map_writable() check before the shmem_mmap() hook is invoked,
+thereby regressing this change.
+
+This series reworks how we both permit write-sealed mappings being mapped
+read-only and disallow mprotect() from undoing the write-seal, fixing this
+regression.
+
+We also add a regression test to ensure that we do not accidentally
+regress this in future.
+
+Thanks to Julian Orth for reporting this regression.
+
+This patch (of 2):
+
+In commit 158978945f31 ("mm: perform the mapping_map_writable() check
+after call_mmap()") (and preceding changes in the same series) it became
+possible to mmap() F_SEAL_WRITE sealed memfd mappings read-only.
+
+This was previously unnecessarily disallowed, despite the man page
+documentation indicating that it would be, thereby limiting the usefulness
+of F_SEAL_WRITE logic.
+
+We fixed this by adapting logic that existed for the F_SEAL_FUTURE_WRITE
+seal (one which disallows future writes to the memfd) to also be used for
+F_SEAL_WRITE.
+
+For background - the F_SEAL_FUTURE_WRITE seal clears VM_MAYWRITE for a
+read-only mapping to disallow mprotect() from overriding the seal - an
+operation performed by seal_check_write(), invoked from shmem_mmap(), the
+f_op->mmap() hook used by shmem mappings.
+
+By extending this to F_SEAL_WRITE and critically - checking
+mapping_map_writable() to determine if we may map the memfd AFTER we
+invoke shmem_mmap() - the desired logic becomes possible.  This is because
+mapping_map_writable() explicitly checks for VM_MAYWRITE, which we will
+have cleared.
+
+Commit 5de195060b2e ("mm: resolve faulty mmap_region() error path
+behaviour") unintentionally undid this logic by moving the
+mapping_map_writable() check before the shmem_mmap() hook is invoked,
+thereby regressing this change.
+
+We reinstate this functionality by moving the check out of shmem_mmap()
+and instead performing it in do_mmap() at the point at which VMA flags are
+being determined, which seems in any case to be a more appropriate place
+in which to make this determination.
+
+In order to achieve this we rework memfd seal logic to allow us access to
+this information using existing logic and eliminate the clearing of
+VM_MAYWRITE from seal_check_write() which we are performing in do_mmap()
+instead.
+
+Link: https://lkml.kernel.org/r/99fc35d2c62bd2e05571cf60d9f8b843c56069e0.1732804776.git.lorenzo.stoakes@oracle.com
+Fixes: 5de195060b2e ("mm: resolve faulty mmap_region() error path behaviour")
+Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reported-by: Julian Orth <ju.orth@gmail.com>
+Closes: https://lore.kernel.org/all/CAHijbEUMhvJTN9Xw1GmbM266FXXv=U7s4L_Jem5x3AaPZxrYpQ@mail.gmail.com/
+Cc: Jann Horn <jannh@google.com>
+Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memfd.h |   14 ++++++++++++
+ include/linux/mm.h    |   58 ++++++++++++++++++++++++++++++++++----------------
+ mm/memfd.c            |    2 -
+ mm/mmap.c             |    4 +++
+ 4 files changed, 59 insertions(+), 19 deletions(-)
+
+--- a/include/linux/memfd.h
++++ b/include/linux/memfd.h
+@@ -6,11 +6,25 @@
+ #ifdef CONFIG_MEMFD_CREATE
+ extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
++unsigned int *memfd_file_seals_ptr(struct file *file);
+ #else
+ static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a)
+ {
+       return -EINVAL;
+ }
++
++static inline unsigned int *memfd_file_seals_ptr(struct file *file)
++{
++      return NULL;
++}
+ #endif
++/* Retrieve memfd seals associated with the file, if any. */
++static inline unsigned int memfd_file_seals(struct file *file)
++{
++      unsigned int *sealsp = memfd_file_seals_ptr(file);
++
++      return sealsp ? *sealsp : 0;
++}
++
+ #endif /* __LINUX_MEMFD_H */
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -3286,6 +3286,37 @@ void mem_dump_obj(void *object);
+ static inline void mem_dump_obj(void *object) {}
+ #endif
++static inline bool is_write_sealed(int seals)
++{
++      return seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE);
++}
++
++/**
++ * is_readonly_sealed - Checks whether write-sealed but mapped read-only,
++ *                      in which case writes should be disallowing moving
++ *                      forwards.
++ * @seals: the seals to check
++ * @vm_flags: the VMA flags to check
++ *
++ * Returns whether readonly sealed, in which case writess should be disallowed
++ * going forward.
++ */
++static inline bool is_readonly_sealed(int seals, vm_flags_t vm_flags)
++{
++      /*
++       * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as
++       * MAP_SHARED and read-only, take care to not allow mprotect to
++       * revert protections on such mappings. Do this only for shared
++       * mappings. For private mappings, don't need to mask
++       * VM_MAYWRITE as we still want them to be COW-writable.
++       */
++      if (is_write_sealed(seals) &&
++          ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_SHARED))
++              return true;
++
++      return false;
++}
++
+ /**
+  * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and
+  *                    handle them.
+@@ -3297,24 +3328,15 @@ static inline void mem_dump_obj(void *ob
+  */
+ static inline int seal_check_write(int seals, struct vm_area_struct *vma)
+ {
+-      if (seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
+-              /*
+-               * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+-               * write seals are active.
+-               */
+-              if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+-                      return -EPERM;
+-
+-              /*
+-               * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as
+-               * MAP_SHARED and read-only, take care to not allow mprotect to
+-               * revert protections on such mappings. Do this only for shared
+-               * mappings. For private mappings, don't need to mask
+-               * VM_MAYWRITE as we still want them to be COW-writable.
+-               */
+-              if (vma->vm_flags & VM_SHARED)
+-                      vma->vm_flags &= ~(VM_MAYWRITE);
+-      }
++      if (!is_write_sealed(seals))
++              return 0;
++
++      /*
++       * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
++       * write seals are active.
++       */
++      if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
++              return -EPERM;
+       return 0;
+ }
+--- a/mm/memfd.c
++++ b/mm/memfd.c
+@@ -133,7 +133,7 @@ static int memfd_wait_for_pins(struct ad
+       return error;
+ }
+-static unsigned int *memfd_file_seals_ptr(struct file *file)
++unsigned int *memfd_file_seals_ptr(struct file *file)
+ {
+       if (shmem_file(file))
+               return &SHMEM_I(file_inode(file))->seals;
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -47,6 +47,7 @@
+ #include <linux/pkeys.h>
+ #include <linux/oom.h>
+ #include <linux/sched/mm.h>
++#include <linux/memfd.h>
+ #include <linux/uaccess.h>
+ #include <asm/cacheflush.h>
+@@ -1486,6 +1487,7 @@ unsigned long do_mmap(struct file *file,
+       if (file) {
+               struct inode *inode = file_inode(file);
++              unsigned int seals = memfd_file_seals(file);
+               unsigned long flags_mask;
+               if (!file_mmap_ok(file, inode, pgoff, len))
+@@ -1524,6 +1526,8 @@ unsigned long do_mmap(struct file *file,
+                       vm_flags |= VM_SHARED | VM_MAYSHARE;
+                       if (!(file->f_mode & FMODE_WRITE))
+                               vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
++                      else if (is_readonly_sealed(seals, vm_flags))
++                              vm_flags &= ~VM_MAYWRITE;
+                       fallthrough;
+               case MAP_PRIVATE:
+                       if (!(file->f_mode & FMODE_READ))
diff --git a/queue-5.15/mm-update-memfd-seal-write-check-to-include-f_seal_write.patch b/queue-5.15/mm-update-memfd-seal-write-check-to-include-f_seal_write.patch
new file mode 100644 (file)
index 0000000..55b9e22
--- /dev/null
@@ -0,0 +1,104 @@
+From stable+bounces-165170-greg=kroah.com@vger.kernel.org Wed Jul 30 03:55:00 2025
+From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
+Date: Tue, 29 Jul 2025 18:53:31 -0700
+Subject: mm: update memfd seal write check to include F_SEAL_WRITE
+To: lorenzo.stoakes@oracle.com, gregkh@linuxfoundation.org,  Muchun Song <muchun.song@linux.dev>, Oscar Salvador <osalvador@suse.de>,  David Hildenbrand <david@redhat.com>, Andrew Morton <akpm@linux-foundation.org>,  "Liam R. Howlett" <Liam.Howlett@oracle.com>, Vlastimil Babka <vbabka@suse.cz>,  Mike Rapoport <rppt@kernel.org>, Suren Baghdasaryan <surenb@google.com>, Michal Hocko <mhocko@suse.com>,  Hugh Dickins <hughd@google.com>, Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: aliceryhl@google.com, stable@vger.kernel.org,  "Isaac J. Manjarres" <isaacmanjarres@google.com>, kernel-team@android.com,  Lorenzo Stoakes <lstoakes@gmail.com>, Jan Kara <jack@suse.cz>,  Alexander Viro <viro@zeniv.linux.org.uk>, Andy Lutomirski <luto@kernel.org>,  Christian Brauner <brauner@kernel.org>, "Matthew Wilcox (Oracle)" <willy@infradead.org>,  Mike Kravetz <mike.kravetz@oracle.com>, linux-mm@kvack.org, linux-kernel@vger.kernel.org
+Message-ID: <20250730015337.31730-3-isaacmanjarres@google.com>
+
+From: Lorenzo Stoakes <lstoakes@gmail.com>
+
+[ Upstream commit 28464bbb2ddc199433383994bcb9600c8034afa1 ]
+
+The seal_check_future_write() function is called by shmem_mmap() or
+hugetlbfs_file_mmap() to disallow any future writable mappings of an memfd
+sealed this way.
+
+The F_SEAL_WRITE flag is not checked here, as that is handled via the
+mapping->i_mmap_writable mechanism and so any attempt at a mapping would
+fail before this could be run.
+
+However we intend to change this, meaning this check can be performed for
+F_SEAL_WRITE mappings also.
+
+The logic here is equally applicable to both flags, so update this
+function to accommodate both and rename it accordingly.
+
+Link: https://lkml.kernel.org/r/913628168ce6cce77df7d13a63970bae06a526e0.1697116581.git.lstoakes@gmail.com
+Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c |    2 +-
+ include/linux/mm.h   |   15 ++++++++-------
+ mm/shmem.c           |    2 +-
+ 3 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -148,7 +148,7 @@ static int hugetlbfs_file_mmap(struct fi
+       vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+       vma->vm_ops = &hugetlb_vm_ops;
+-      ret = seal_check_future_write(info->seals, vma);
++      ret = seal_check_write(info->seals, vma);
+       if (ret)
+               return ret;
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -3287,25 +3287,26 @@ static inline void mem_dump_obj(void *ob
+ #endif
+ /**
+- * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
++ * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and
++ *                    handle them.
+  * @seals: the seals to check
+  * @vma: the vma to operate on
+  *
+- * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
+- * the vma flags.  Return 0 if check pass, or <0 for errors.
++ * Check whether F_SEAL_WRITE or F_SEAL_FUTURE_WRITE are set; if so, do proper
++ * check/handling on the vma flags.  Return 0 if check pass, or <0 for errors.
+  */
+-static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
++static inline int seal_check_write(int seals, struct vm_area_struct *vma)
+ {
+-      if (seals & F_SEAL_FUTURE_WRITE) {
++      if (seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
+               /*
+                * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+-               * "future write" seal active.
++               * write seals are active.
+                */
+               if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+                       return -EPERM;
+               /*
+-               * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
++               * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as
+                * MAP_SHARED and read-only, take care to not allow mprotect to
+                * revert protections on such mappings. Do this only for shared
+                * mappings. For private mappings, don't need to mask
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2262,7 +2262,7 @@ static int shmem_mmap(struct file *file,
+       struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+       int ret;
+-      ret = seal_check_future_write(info->seals, vma);
++      ret = seal_check_write(info->seals, vma);
+       if (ret)
+               return ret;
diff --git a/queue-5.15/selftests-memfd-add-test-for-mapping-write-sealed-memfd-read-only.patch b/queue-5.15/selftests-memfd-add-test-for-mapping-write-sealed-memfd-read-only.patch
new file mode 100644 (file)
index 0000000..ac0a958
--- /dev/null
@@ -0,0 +1,98 @@
+From 3MnuJaA4KBpkBL335F3GC3KK7L9HH9E7.5HF9K79DAEBGNQ8HNG63MBHG.HK9@flex--isaacmanjarres.bounces.google.com Wed Jul 30 03:53:55 2025
+From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
+Date: Tue, 29 Jul 2025 18:53:33 -0700
+Subject: selftests/memfd: add test for mapping write-sealed memfd read-only
+To: lorenzo.stoakes@oracle.com, gregkh@linuxfoundation.org,  Shuah Khan <shuah@kernel.org>
+Cc: aliceryhl@google.com, surenb@google.com, stable@vger.kernel.org,  "Isaac J. Manjarres" <isaacmanjarres@google.com>, kernel-team@android.com,  Jann Horn <jannh@google.com>, Julian Orth <ju.orth@gmail.com>,  "Liam R. Howlett" <Liam.Howlett@Oracle.com>, Linus Torvalds <torvalds@linux-foundation.org>,  Vlastimil Babka <vbabka@suse.cz>, Andrew Morton <akpm@linux-foundation.org>,  linux-kselftest@vger.kernel.org, linux-kernel@vger.kernel.org
+Message-ID: <20250730015337.31730-5-isaacmanjarres@google.com>
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+[ Upstream commit ea0916e01d0b0f2cce1369ac1494239a79827270 ]
+
+Now we have reinstated the ability to map F_SEAL_WRITE mappings read-only,
+assert that we are able to do this in a test to ensure that we do not
+regress this again.
+
+Link: https://lkml.kernel.org/r/a6377ec470b14c0539b4600cf8fa24bf2e4858ae.1732804776.git.lorenzo.stoakes@oracle.com
+Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Julian Orth <ju.orth@gmail.com>
+Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/memfd/memfd_test.c |   43 +++++++++++++++++++++++++++++
+ 1 file changed, 43 insertions(+)
+
+--- a/tools/testing/selftests/memfd/memfd_test.c
++++ b/tools/testing/selftests/memfd/memfd_test.c
+@@ -186,6 +186,24 @@ static void *mfd_assert_mmap_shared(int
+       return p;
+ }
++static void *mfd_assert_mmap_read_shared(int fd)
++{
++      void *p;
++
++      p = mmap(NULL,
++               mfd_def_size,
++               PROT_READ,
++               MAP_SHARED,
++               fd,
++               0);
++      if (p == MAP_FAILED) {
++              printf("mmap() failed: %m\n");
++              abort();
++      }
++
++      return p;
++}
++
+ static void *mfd_assert_mmap_private(int fd)
+ {
+       void *p;
+@@ -802,6 +820,30 @@ static void test_seal_future_write(void)
+       close(fd);
+ }
++static void test_seal_write_map_read_shared(void)
++{
++      int fd;
++      void *p;
++
++      printf("%s SEAL-WRITE-MAP-READ\n", memfd_str);
++
++      fd = mfd_assert_new("kern_memfd_seal_write_map_read",
++                          mfd_def_size,
++                          MFD_CLOEXEC | MFD_ALLOW_SEALING);
++
++      mfd_assert_add_seals(fd, F_SEAL_WRITE);
++      mfd_assert_has_seals(fd, F_SEAL_WRITE);
++
++      p = mfd_assert_mmap_read_shared(fd);
++
++      mfd_assert_read(fd);
++      mfd_assert_read_shared(fd);
++      mfd_fail_write(fd);
++
++      munmap(p, mfd_def_size);
++      close(fd);
++}
++
+ /*
+  * Test SEAL_SHRINK
+  * Test whether SEAL_SHRINK actually prevents shrinking
+@@ -1056,6 +1098,7 @@ int main(int argc, char **argv)
+       test_seal_write();
+       test_seal_future_write();
++      test_seal_write_map_read_shared();
+       test_seal_shrink();
+       test_seal_grow();
+       test_seal_resize();
index c4a683fac0e92d5a385a3ac5ef203c6799c22daf..1aef91c7613ab5eccc2b91a043840842416c398a 100644 (file)
@@ -503,3 +503,7 @@ mptcp-pm-kernel-flush-do-not-reset-add_addr-limit.patch
 sch_htb-make-htb_qlen_notify-idempotent.patch
 sch_hfsc-make-hfsc_qlen_notify-idempotent.patch
 sch_qfq-make-qfq_qlen_notify-idempotent.patch
+mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch
+mm-update-memfd-seal-write-check-to-include-f_seal_write.patch
+mm-reinstate-ability-to-map-write-sealed-memfd-mappings-read-only.patch
+selftests-memfd-add-test-for-mapping-write-sealed-memfd-read-only.patch