]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for all trees
authorSasha Levin <sashal@kernel.org>
Thu, 27 Nov 2025 14:22:38 +0000 (09:22 -0500)
committerSasha Levin <sashal@kernel.org>
Thu, 27 Nov 2025 14:23:28 +0000 (09:23 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-6.1/filemap-cap-pte-range-to-be-created-to-allowed-zero-.patch [new file with mode: 0644]
queue-6.1/mm-memory-do-not-populate-page-table-entries-beyond-.patch [new file with mode: 0644]
queue-6.1/series

diff --git a/queue-6.1/filemap-cap-pte-range-to-be-created-to-allowed-zero-.patch b/queue-6.1/filemap-cap-pte-range-to-be-created-to-allowed-zero-.patch
new file mode 100644 (file)
index 0000000..9a0563f
--- /dev/null
@@ -0,0 +1,77 @@
+From 9fa2ee29c40e2b592a95ba7bf431ee2b31e2e603 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Nov 2025 13:50:56 +0000
+Subject: filemap: cap PTE range to be created to allowed zero fill in
+ folio_map_range()
+
+From: Pankaj Raghav <p.raghav@samsung.com>
+
+[ Upstream commit 743a2753a02e805347969f6f89f38b736850d808 ]
+
+Usually the page cache does not extend beyond the size of the inode,
+therefore, no PTEs are created for folios that extend beyond the size.
+
+But with LBS support, we might extend page cache beyond the size of the
+inode as we need to guarantee folios of minimum order. While doing a
+read, do_fault_around() can create PTEs for pages that lie beyond the
+EOF leading to incorrect error return when accessing a page beyond the
+mapped file.
+
+Cap the PTE range to be created for the page cache up to the end of
+file(EOF) in filemap_map_pages() so that return error codes are consistent
+with POSIX[1] for LBS configurations.
+
+generic/749 has been created to trigger this edge case. This also fixes
+generic/749 for tmpfs with huge=always on systems with 4k base page size.
+
+[1](from mmap(2))  SIGBUS
+    Attempted access to a page of the buffer that lies beyond the end
+    of the mapped file.  For an explanation of the treatment  of  the
+    bytes  in  the  page that corresponds to the end of a mapped file
+    that is not a multiple of the page size, see NOTES.
+
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
+Link: https://lore.kernel.org/r/20240822135018.1931258-6-kernel@pankajraghav.com
+Tested-by: David Howells <dhowells@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Daniel Gomez <da.gomez@samsung.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/filemap.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/mm/filemap.c b/mm/filemap.c
+index b77f534dfad35..40c186c7210bf 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3460,7 +3460,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
+       struct vm_area_struct *vma = vmf->vma;
+       struct file *file = vma->vm_file;
+       struct address_space *mapping = file->f_mapping;
+-      pgoff_t last_pgoff = start_pgoff;
++      pgoff_t file_end, last_pgoff = start_pgoff;
+       unsigned long addr;
+       XA_STATE(xas, &mapping->i_pages, start_pgoff);
+       struct folio *folio;
+@@ -3480,6 +3480,11 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
+       addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
++
++      file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
++      if (end_pgoff > file_end)
++              end_pgoff = file_end;
++
+       do {
+ again:
+               page = folio_file_page(folio, xas.xa_index);
+-- 
+2.51.0
+
diff --git a/queue-6.1/mm-memory-do-not-populate-page-table-entries-beyond-.patch b/queue-6.1/mm-memory-do-not-populate-page-table-entries-beyond-.patch
new file mode 100644 (file)
index 0000000..89bbbfb
--- /dev/null
@@ -0,0 +1,206 @@
+From 88bcba15d6acd2a7c362d3b593d098a2d3be9dcc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Nov 2025 13:50:57 +0000
+Subject: mm/memory: do not populate page table entries beyond i_size
+
+From: Kiryl Shutsemau <kas@kernel.org>
+
+[ Upstream commit 74207de2ba10c2973334906822dc94d2e859ffc5 ]
+
+Patch series "Fix SIGBUS semantics with large folios", v3.
+
+Accessing memory within a VMA, but beyond i_size rounded up to the next
+page size, is supposed to generate SIGBUS.
+
+Darrick reported[1] an xfstests regression in v6.18-rc1.  generic/749
+failed due to missing SIGBUS.  This was caused by my recent changes that
+try to fault in the whole folio where possible:
+
+        19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
+        357b92761d94 ("mm/filemap: map entire large folio faultaround")
+
+These changes did not consider i_size when setting up PTEs, leading to
+xfstest breakage.
+
+However, the problem has been present in the kernel for a long time -
+since huge tmpfs was introduced in 2016.  The kernel happily maps
+PMD-sized folios as PMD without checking i_size.  And huge=always tmpfs
+allocates PMD-size folios on any writes.
+
+I considered this corner case when I implemented a large tmpfs, and my
+conclusion was that no one in their right mind should rely on receiving a
+SIGBUS signal when accessing beyond i_size.  I cannot imagine how it could
+be useful for the workload.
+
+But apparently filesystem folks care a lot about preserving strict SIGBUS
+semantics.
+
+Generic/749 was introduced last year with reference to POSIX, but no real
+workloads were mentioned.  It also acknowledged the tmpfs deviation from
+the test case.
+
+POSIX indeed says[3]:
+
+        References within the address range starting at pa and
+        continuing for len bytes to whole pages following the end of an
+        object shall result in delivery of a SIGBUS signal.
+
+The patchset fixes the regression introduced by recent changes as well as
+more subtle SIGBUS breakage due to split failure on truncation.
+
+This patch (of 2):
+
+Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
+supposed to generate SIGBUS.
+
+Recent changes attempted to fault in full folio where possible.  They did
+not respect i_size, which led to populating PTEs beyond i_size and
+breaking SIGBUS semantics.
+
+Darrick reported generic/749 breakage because of this.
+
+However, the problem existed before the recent changes.  With huge=always
+tmpfs, any write to a file leads to PMD-size allocation.  Following the
+fault-in of the folio will install PMD mapping regardless of i_size.
+
+Fix filemap_map_pages() and finish_fault() to not install:
+  - PTEs beyond i_size;
+  - PMD mappings across i_size;
+
+Make an exception for shmem/tmpfs that for long time intentionally
+mapped with PMDs across i_size.
+
+Link: https://lkml.kernel.org/r/20251027115636.82382-1-kirill@shutemov.name
+Link: https://lkml.kernel.org/r/20251027115636.82382-2-kirill@shutemov.name
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Fixes: 6795801366da ("xfs: Support large folios")
+Reported-by: "Darrick J. Wong" <djwong@kernel.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Liam Howlett <liam.howlett@oracle.com>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Shakeel Butt <shakeel.butt@linux.dev>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/filemap.c | 20 +++++++++++++++-----
+ mm/memory.c  | 24 +++++++++++++++++++++++-
+ 2 files changed, 38 insertions(+), 6 deletions(-)
+
+diff --git a/mm/filemap.c b/mm/filemap.c
+index 40c186c7210bf..4088d1a5360fa 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3467,13 +3467,27 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
+       struct page *page;
+       unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
+       vm_fault_t ret = 0;
++      bool can_map_large;
+       rcu_read_lock();
+       folio = first_map_page(mapping, &xas, end_pgoff);
+       if (!folio)
+               goto out;
+-      if (filemap_map_pmd(vmf, &folio->page)) {
++      file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
++      end_pgoff = min(end_pgoff, file_end);
++
++      /*
++       * Do not allow to map with PTEs beyond i_size and with PMD
++       * across i_size to preserve SIGBUS semantics.
++       *
++       * Make an exception for shmem/tmpfs that for long time
++       * intentionally mapped with PMDs across i_size.
++       */
++      can_map_large = shmem_mapping(mapping) ||
++              file_end >= folio_next_index(folio);
++
++      if (can_map_large && filemap_map_pmd(vmf, &folio->page)) {
+               ret = VM_FAULT_NOPAGE;
+               goto out;
+       }
+@@ -3481,10 +3495,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
+       addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
+-      file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+-      if (end_pgoff > file_end)
+-              end_pgoff = file_end;
+-
+       do {
+ again:
+               page = folio_file_page(folio, xas.xa_index);
+diff --git a/mm/memory.c b/mm/memory.c
+index 454d918449b3a..f0b506acfcc5d 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -67,6 +67,7 @@
+ #include <linux/gfp.h>
+ #include <linux/migrate.h>
+ #include <linux/string.h>
++#include <linux/shmem_fs.h>
+ #include <linux/memory-tiers.h>
+ #include <linux/debugfs.h>
+ #include <linux/userfaultfd_k.h>
+@@ -4452,6 +4453,8 @@ static bool vmf_pte_changed(struct vm_fault *vmf)
+ vm_fault_t finish_fault(struct vm_fault *vmf)
+ {
+       struct vm_area_struct *vma = vmf->vma;
++      bool needs_fallback = false;
++      struct folio *folio;
+       struct page *page;
+       vm_fault_t ret;
+@@ -4461,6 +4464,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
+       else
+               page = vmf->page;
++      folio = page_folio(page);
++
+       /*
+        * check even for read faults because we might have lost our CoWed
+        * page
+@@ -4471,8 +4476,25 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
+                       return ret;
+       }
++      if (!needs_fallback && vma->vm_file) {
++              struct address_space *mapping = vma->vm_file->f_mapping;
++              pgoff_t file_end;
++
++              file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
++
++              /*
++               * Do not allow to map with PTEs beyond i_size and with PMD
++               * across i_size to preserve SIGBUS semantics.
++               *
++               * Make an exception for shmem/tmpfs that for long time
++               * intentionally mapped with PMDs across i_size.
++               */
++              needs_fallback = !shmem_mapping(mapping) &&
++                      file_end < folio_next_index(folio);
++      }
++
+       if (pmd_none(*vmf->pmd)) {
+-              if (PageTransCompound(page)) {
++              if (!needs_fallback && PageTransCompound(page)) {
+                       ret = do_set_pmd(vmf, page);
+                       if (ret != VM_FAULT_FALLBACK)
+                               return ret;
+-- 
+2.51.0
+
index 1a4a1c212d1b1293ef92d776a58e0a3d05d4ba2b..4a4f329f5914a55a4eff11b017c8f3718c9a7f9b 100644 (file)
@@ -492,3 +492,5 @@ selftests-mptcp-connect-fix-fallback-note-due-to-ooo.patch
 pmdomain-samsung-plug-potential-memleak-during-probe.patch
 pmdomain-arm-scmi-fix-genpd-leak-on-provider-registration-failure.patch
 pmdomain-imx-fix-reference-count-leak-in-imx_gpc_remove.patch
+filemap-cap-pte-range-to-be-created-to-allowed-zero-.patch
+mm-memory-do-not-populate-page-table-entries-beyond-.patch