]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 21 Nov 2025 10:15:25 +0000 (11:15 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 21 Nov 2025 10:15:25 +0000 (11:15 +0100)
added patches:
filemap-cap-pte-range-to-be-created-to-allowed-zero-fill-in-folio_map_range.patch
mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch
mm-secretmem-fix-use-after-free-race-in-fault-handler.patch
mm-truncate-unmap-large-folio-on-split-failure.patch

queue-6.6/filemap-cap-pte-range-to-be-created-to-allowed-zero-fill-in-folio_map_range.patch [new file with mode: 0644]
queue-6.6/mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch [new file with mode: 0644]
queue-6.6/mm-secretmem-fix-use-after-free-race-in-fault-handler.patch [new file with mode: 0644]
queue-6.6/mm-truncate-unmap-large-folio-on-split-failure.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/filemap-cap-pte-range-to-be-created-to-allowed-zero-fill-in-folio_map_range.patch b/queue-6.6/filemap-cap-pte-range-to-be-created-to-allowed-zero-fill-in-folio_map_range.patch
new file mode 100644 (file)
index 0000000..fb3737f
--- /dev/null
@@ -0,0 +1,71 @@
+From 743a2753a02e805347969f6f89f38b736850d808 Mon Sep 17 00:00:00 2001
+From: Pankaj Raghav <p.raghav@samsung.com>
+Date: Thu, 22 Aug 2024 15:50:13 +0200
+Subject: filemap: cap PTE range to be created to allowed zero fill in folio_map_range()
+
+From: Pankaj Raghav <p.raghav@samsung.com>
+
+commit 743a2753a02e805347969f6f89f38b736850d808 upstream.
+
+Usually the page cache does not extend beyond the size of the inode,
+therefore, no PTEs are created for folios that extend beyond the size.
+
+But with LBS support, we might extend page cache beyond the size of the
+inode as we need to guarantee folios of minimum order. While doing a
+read, do_fault_around() can create PTEs for pages that lie beyond the
+EOF leading to incorrect error return when accessing a page beyond the
+mapped file.
+
+Cap the PTE range to be created for the page cache up to the end of
+file(EOF) in filemap_map_pages() so that return error codes are consistent
+with POSIX[1] for LBS configurations.
+
+generic/749 has been created to trigger this edge case. This also fixes
+generic/749 for tmpfs with huge=always on systems with 4k base page size.
+
+[1](from mmap(2))  SIGBUS
+    Attempted access to a page of the buffer that lies beyond the end
+    of the mapped file.  For an explanation of the treatment  of  the
+    bytes  in  the  page that corresponds to the end of a mapped file
+    that is not a multiple of the page size, see NOTES.
+
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
+Link: https://lore.kernel.org/r/20240822135018.1931258-6-kernel@pankajraghav.com
+Tested-by: David Howells <dhowells@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/filemap.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3608,7 +3608,7 @@ vm_fault_t filemap_map_pages(struct vm_f
+       struct vm_area_struct *vma = vmf->vma;
+       struct file *file = vma->vm_file;
+       struct address_space *mapping = file->f_mapping;
+-      pgoff_t last_pgoff = start_pgoff;
++      pgoff_t file_end, last_pgoff = start_pgoff;
+       unsigned long addr;
+       XA_STATE(xas, &mapping->i_pages, start_pgoff);
+       struct folio *folio;
+@@ -3632,6 +3632,11 @@ vm_fault_t filemap_map_pages(struct vm_f
+               folio_put(folio);
+               goto out;
+       }
++
++      file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
++      if (end_pgoff > file_end)
++              end_pgoff = file_end;
++
+       do {
+               unsigned long end;
diff --git a/queue-6.6/mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch b/queue-6.6/mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch
new file mode 100644 (file)
index 0000000..e1d4b1f
--- /dev/null
@@ -0,0 +1,200 @@
+From 74207de2ba10c2973334906822dc94d2e859ffc5 Mon Sep 17 00:00:00 2001
+From: Kiryl Shutsemau <kas@kernel.org>
+Date: Mon, 27 Oct 2025 11:56:35 +0000
+Subject: mm/memory: do not populate page table entries beyond i_size
+
+From: Kiryl Shutsemau <kas@kernel.org>
+
+commit 74207de2ba10c2973334906822dc94d2e859ffc5 upstream.
+
+Patch series "Fix SIGBUS semantics with large folios", v3.
+
+Accessing memory within a VMA, but beyond i_size rounded up to the next
+page size, is supposed to generate SIGBUS.
+
+Darrick reported[1] an xfstests regression in v6.18-rc1.  generic/749
+failed due to missing SIGBUS.  This was caused by my recent changes that
+try to fault in the whole folio where possible:
+
+        19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
+        357b92761d94 ("mm/filemap: map entire large folio faultaround")
+
+These changes did not consider i_size when setting up PTEs, leading to
+xfstest breakage.
+
+However, the problem has been present in the kernel for a long time -
+since huge tmpfs was introduced in 2016.  The kernel happily maps
+PMD-sized folios as PMD without checking i_size.  And huge=always tmpfs
+allocates PMD-size folios on any writes.
+
+I considered this corner case when I implemented a large tmpfs, and my
+conclusion was that no one in their right mind should rely on receiving a
+SIGBUS signal when accessing beyond i_size.  I cannot imagine how it could
+be useful for the workload.
+
+But apparently filesystem folks care a lot about preserving strict SIGBUS
+semantics.
+
+Generic/749 was introduced last year with reference to POSIX, but no real
+workloads were mentioned.  It also acknowledged the tmpfs deviation from
+the test case.
+
+POSIX indeed says[3]:
+
+        References within the address range starting at pa and
+        continuing for len bytes to whole pages following the end of an
+        object shall result in delivery of a SIGBUS signal.
+
+The patchset fixes the regression introduced by recent changes as well as
+more subtle SIGBUS breakage due to split failure on truncation.
+
+
+This patch (of 2):
+
+Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
+supposed to generate SIGBUS.
+
+Recent changes attempted to fault in full folio where possible.  They did
+not respect i_size, which led to populating PTEs beyond i_size and
+breaking SIGBUS semantics.
+
+Darrick reported generic/749 breakage because of this.
+
+However, the problem existed before the recent changes.  With huge=always
+tmpfs, any write to a file leads to PMD-size allocation.  Following the
+fault-in of the folio will install PMD mapping regardless of i_size.
+
+Fix filemap_map_pages() and finish_fault() to not install:
+  - PTEs beyond i_size;
+  - PMD mappings across i_size;
+
+Make an exception for shmem/tmpfs that for long time intentionally
+mapped with PMDs across i_size.
+
+Link: https://lkml.kernel.org/r/20251027115636.82382-1-kirill@shutemov.name
+Link: https://lkml.kernel.org/r/20251027115636.82382-2-kirill@shutemov.name
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Fixes: 6795801366da ("xfs: Support large folios")
+Reported-by: "Darrick J. Wong" <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Shakeel Butt <shakeel.butt@linux.dev>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/filemap.c |   20 +++++++++++++++-----
+ mm/memory.c  |   24 +++++++++++++++++++++++-
+ 2 files changed, 38 insertions(+), 6 deletions(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3614,13 +3614,27 @@ vm_fault_t filemap_map_pages(struct vm_f
+       struct folio *folio;
+       vm_fault_t ret = 0;
+       unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved;
++      bool can_map_large;
+       rcu_read_lock();
+       folio = next_uptodate_folio(&xas, mapping, end_pgoff);
+       if (!folio)
+               goto out;
+-      if (filemap_map_pmd(vmf, folio, start_pgoff)) {
++      file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
++      end_pgoff = min(end_pgoff, file_end);
++
++      /*
++       * Do not allow to map with PTEs beyond i_size and with PMD
++       * across i_size to preserve SIGBUS semantics.
++       *
++       * Make an exception for shmem/tmpfs that for long time
++       * intentionally mapped with PMDs across i_size.
++       */
++      can_map_large = shmem_mapping(mapping) ||
++              file_end >= folio_next_index(folio);
++
++      if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) {
+               ret = VM_FAULT_NOPAGE;
+               goto out;
+       }
+@@ -3633,10 +3647,6 @@ vm_fault_t filemap_map_pages(struct vm_f
+               goto out;
+       }
+-      file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+-      if (end_pgoff > file_end)
+-              end_pgoff = file_end;
+-
+       do {
+               unsigned long end;
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -67,6 +67,7 @@
+ #include <linux/gfp.h>
+ #include <linux/migrate.h>
+ #include <linux/string.h>
++#include <linux/shmem_fs.h>
+ #include <linux/memory-tiers.h>
+ #include <linux/debugfs.h>
+ #include <linux/userfaultfd_k.h>
+@@ -4435,6 +4436,8 @@ static bool vmf_pte_changed(struct vm_fa
+ vm_fault_t finish_fault(struct vm_fault *vmf)
+ {
+       struct vm_area_struct *vma = vmf->vma;
++      bool needs_fallback = false;
++      struct folio *folio;
+       struct page *page;
+       vm_fault_t ret;
+@@ -4444,6 +4447,8 @@ vm_fault_t finish_fault(struct vm_fault
+       else
+               page = vmf->page;
++      folio = page_folio(page);
++
+       /*
+        * check even for read faults because we might have lost our CoWed
+        * page
+@@ -4454,8 +4459,25 @@ vm_fault_t finish_fault(struct vm_fault
+                       return ret;
+       }
++      if (!needs_fallback && vma->vm_file) {
++              struct address_space *mapping = vma->vm_file->f_mapping;
++              pgoff_t file_end;
++
++              file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
++
++              /*
++               * Do not allow to map with PTEs beyond i_size and with PMD
++               * across i_size to preserve SIGBUS semantics.
++               *
++               * Make an exception for shmem/tmpfs that for long time
++               * intentionally mapped with PMDs across i_size.
++               */
++              needs_fallback = !shmem_mapping(mapping) &&
++                      file_end < folio_next_index(folio);
++      }
++
+       if (pmd_none(*vmf->pmd)) {
+-              if (PageTransCompound(page)) {
++              if (!needs_fallback && PageTransCompound(page)) {
+                       ret = do_set_pmd(vmf, page);
+                       if (ret != VM_FAULT_FALLBACK)
+                               return ret;
diff --git a/queue-6.6/mm-secretmem-fix-use-after-free-race-in-fault-handler.patch b/queue-6.6/mm-secretmem-fix-use-after-free-race-in-fault-handler.patch
new file mode 100644 (file)
index 0000000..09ab0ed
--- /dev/null
@@ -0,0 +1,62 @@
+From 6f86d0534fddfbd08687fa0f01479d4226bc3c3d Mon Sep 17 00:00:00 2001
+From: Lance Yang <lance.yang@linux.dev>
+Date: Fri, 31 Oct 2025 20:09:55 +0800
+Subject: mm/secretmem: fix use-after-free race in fault handler
+
+From: Lance Yang <lance.yang@linux.dev>
+
+commit 6f86d0534fddfbd08687fa0f01479d4226bc3c3d upstream.
+
+When a page fault occurs in a secret memory file created with
+`memfd_secret(2)`, the kernel will allocate a new folio for it, mark the
+underlying page as not-present in the direct map, and add it to the file
+mapping.
+
+If two tasks cause a fault in the same page concurrently, both could end
+up allocating a folio and removing the page from the direct map, but only
+one would succeed in adding the folio to the file mapping.  The task that
+failed undoes the effects of its attempt by (a) freeing the folio again
+and (b) putting the page back into the direct map.  However, by doing
+these two operations in this order, the page becomes available to the
+allocator again before it is placed back in the direct mapping.
+
+If another task attempts to allocate the page between (a) and (b), and the
+kernel tries to access it via the direct map, it would result in a
+supervisor not-present page fault.
+
+Fix the ordering to restore the direct map before the folio is freed.
+
+Link: https://lkml.kernel.org/r/20251031120955.92116-1-lance.yang@linux.dev
+Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
+Signed-off-by: Lance Yang <lance.yang@linux.dev>
+Reported-by: Google Big Sleep <big-sleep-vuln-reports@google.com>
+Closes: https://lore.kernel.org/linux-mm/CAEXGt5QeDpiHTu3K9tvjUTPqo+d-=wuCNYPa+6sWKrdQJ-ATdg@mail.gmail.com/
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/secretmem.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/secretmem.c
++++ b/mm/secretmem.c
+@@ -84,13 +84,13 @@ retry:
+               __folio_mark_uptodate(folio);
+               err = filemap_add_folio(mapping, folio, offset, gfp);
+               if (unlikely(err)) {
+-                      folio_put(folio);
+                       /*
+                        * If a split of large page was required, it
+                        * already happened when we marked the page invalid
+                        * which guarantees that this call won't fail
+                        */
+                       set_direct_map_default_noflush(page);
++                      folio_put(folio);
+                       if (err == -EEXIST)
+                               goto retry;
diff --git a/queue-6.6/mm-truncate-unmap-large-folio-on-split-failure.patch b/queue-6.6/mm-truncate-unmap-large-folio-on-split-failure.patch
new file mode 100644 (file)
index 0000000..476515c
--- /dev/null
@@ -0,0 +1,99 @@
+From fa04f5b60fda62c98a53a60de3a1e763f11feb41 Mon Sep 17 00:00:00 2001
+From: Kiryl Shutsemau <kas@kernel.org>
+Date: Mon, 27 Oct 2025 11:56:36 +0000
+Subject: mm/truncate: unmap large folio on split failure
+
+From: Kiryl Shutsemau <kas@kernel.org>
+
+commit fa04f5b60fda62c98a53a60de3a1e763f11feb41 upstream.
+
+Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
+supposed to generate SIGBUS.
+
+This behavior might not be respected on truncation.
+
+During truncation, the kernel splits a large folio in order to reclaim
+memory.  As a side effect, it unmaps the folio and destroys PMD mappings
+of the folio.  The folio will be refaulted as PTEs and SIGBUS semantics
+are preserved.
+
+However, if the split fails, PMD mappings are preserved and the user will
+not receive SIGBUS on any accesses within the PMD.
+
+Unmap the folio on split failure.  It will lead to refault as PTEs and
+preserve SIGBUS semantics.
+
+Make an exception for shmem/tmpfs that for long time intentionally mapped
+with PMDs across i_size.
+
+Link: https://lkml.kernel.org/r/20251027115636.82382-3-kirill@shutemov.name
+Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios")
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: "Darrick J. Wong" <djwong@kernel.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Shakeel Butt <shakeel.butt@linux.dev>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/truncate.c |   27 ++++++++++++++++++++++++++-
+ 1 file changed, 26 insertions(+), 1 deletion(-)
+
+--- a/mm/truncate.c
++++ b/mm/truncate.c
+@@ -196,6 +196,31 @@ int truncate_inode_folio(struct address_
+       return 0;
+ }
++static int try_folio_split_or_unmap(struct folio *folio)
++{
++      enum ttu_flags ttu_flags =
++              TTU_SYNC |
++              TTU_SPLIT_HUGE_PMD |
++              TTU_IGNORE_MLOCK;
++      int ret;
++
++      ret = split_folio(folio);
++
++      /*
++       * If the split fails, unmap the folio, so it will be refaulted
++       * with PTEs to respect SIGBUS semantics.
++       *
++       * Make an exception for shmem/tmpfs that for long time
++       * intentionally mapped with PMDs across i_size.
++       */
++      if (ret && !shmem_mapping(folio->mapping)) {
++              try_to_unmap(folio, ttu_flags);
++              WARN_ON(folio_mapped(folio));
++      }
++
++      return ret;
++}
++
+ /*
+  * Handle partial folios.  The folio may be entirely within the
+  * range if a split has raced with us.  If not, we zero the part of the
+@@ -239,7 +264,7 @@ bool truncate_inode_partial_folio(struct
+               folio_invalidate(folio, offset, length);
+       if (!folio_test_large(folio))
+               return true;
+-      if (split_folio(folio) == 0)
++      if (try_folio_split_or_unmap(folio) == 0)
+               return true;
+       if (folio_test_dirty(folio))
+               return false;
index 0bc28ad41ba653dfcf448dd51681408eda9f2e5e..17ab305f5f9f175a573c1a287306bc683217143c 100644 (file)
@@ -515,3 +515,7 @@ mm-memcg-move-vmstats-structs-definition-above-flushing-code.patch
 mm-memcg-make-stats-flushing-threshold-per-memcg.patch
 mm-workingset-move-the-stats-flush-into-workingset_test_recent.patch
 mm-memcg-restore-subtree-stats-flushing.patch
+filemap-cap-pte-range-to-be-created-to-allowed-zero-fill-in-folio_map_range.patch
+mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch
+mm-truncate-unmap-large-folio-on-split-failure.patch
+mm-secretmem-fix-use-after-free-race-in-fault-handler.patch