6.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 15 Jul 2024 10:15:13 +0000 (12:15 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 15 Jul 2024 10:15:13 +0000 (12:15 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Jul 2024 10:15:13 +0000 (12:15 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Jul 2024 10:15:13 +0000 (12:15 +0200)
diff --git a/queue-6.9/cachestat-do-not-flush-stats-in-recency-check.patch b/queue-6.9/cachestat-do-not-flush-stats-in-recency-check.patch

new file mode 100644 (file)

index 0000000..f4ce1a3
--- /dev/null
+++ b/queue-6.9/cachestat-do-not-flush-stats-in-recency-check.patch
@@ -0,0 +1,119 @@
+From 5a4d8944d6b1e1aaaa83ea42c116b520b4ed0394 Mon Sep 17 00:00:00 2001
+From: Nhat Pham <nphamcs@gmail.com>
+Date: Thu, 27 Jun 2024 13:17:37 -0700
+Subject: cachestat: do not flush stats in recency check
+
+From: Nhat Pham <nphamcs@gmail.com>
+
+commit 5a4d8944d6b1e1aaaa83ea42c116b520b4ed0394 upstream.
+
+syzbot detects that cachestat() is flushing stats, which can sleep, in its
+RCU read section (see [1]).  This is done in the workingset_test_recent()
+step (which checks if the folio's eviction is recent).
+
+Move the stat flushing step to before the RCU read section of cachestat,
+and skip stat flushing during the recency check.
+
+[1]: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@google.com/
+
+Link: https://lkml.kernel.org/r/20240627201737.3506959-1-nphamcs@gmail.com
+Fixes: b00684722262 ("mm: workingset: move the stats flush into workingset_test_recent()")
+Signed-off-by: Nhat Pham <nphamcs@gmail.com>
+Reported-by: syzbot+b7f13b2d0cc156edf61a@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@google.com/
+Debugged-by: Johannes Weiner <hannes@cmpxchg.org>
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Kairui Song <kasong@tencent.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Yosry Ahmed <yosryahmed@google.com>
+Cc: <stable@vger.kernel.org>   [6.8+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/swap.h |    3 ++-
+ mm/filemap.c         |    5 ++++-
+ mm/workingset.c      |   14 +++++++++++---
+ 3 files changed, 17 insertions(+), 5 deletions(-)
+
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -344,7 +344,8 @@ static inline swp_entry_t page_swap_entr
+ }
+ 
+ /* linux/mm/workingset.c */
+-bool workingset_test_recent(void *shadow, bool file, bool *workingset);
++bool workingset_test_recent(void *shadow, bool file, bool *workingset,
++                              bool flush);
+ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
+ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
+ void workingset_refault(struct folio *folio, void *shadow);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -4153,6 +4153,9 @@ static void filemap_cachestat(struct add
+       XA_STATE(xas, &mapping->i_pages, first_index);
+       struct folio *folio;
+ 
++      /* Flush stats (and potentially sleep) outside the RCU read section. */
++      mem_cgroup_flush_stats_ratelimited(NULL);
++
+       rcu_read_lock();
+       xas_for_each(&xas, folio, last_index) {
+               int order;
+@@ -4216,7 +4219,7 @@ static void filemap_cachestat(struct add
+                                       goto resched;
+                       }
+ #endif
+-                      if (workingset_test_recent(shadow, true, &workingset))
++                      if (workingset_test_recent(shadow, true, &workingset, false))
+                               cs->nr_recently_evicted += nr_pages;
+ 
+                       goto resched;
+--- a/mm/workingset.c
++++ b/mm/workingset.c
+@@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *
+  * @file: whether the corresponding folio is from the file lru.
+  * @workingset: where the workingset value unpacked from shadow should
+  * be stored.
++ * @flush: whether to flush cgroup rstat.
+  *
+  * Return: true if the shadow is for a recently evicted folio; false otherwise.
+  */
+-bool workingset_test_recent(void *shadow, bool file, bool *workingset)
++bool workingset_test_recent(void *shadow, bool file, bool *workingset,
++                              bool flush)
+ {
+       struct mem_cgroup *eviction_memcg;
+       struct lruvec *eviction_lruvec;
+@@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow
+ 
+       /*
+        * Flush stats (and potentially sleep) outside the RCU read section.
++       *
++       * Note that workingset_test_recent() itself might be called in RCU read
++       * section (for e.g, in cachestat) - these callers need to skip flushing
++       * stats (via the flush argument).
++       *
+        * XXX: With per-memcg flushing and thresholding, is ratelimiting
+        * still needed here?
+        */
+-      mem_cgroup_flush_stats_ratelimited(eviction_memcg);
++      if (flush)
++              mem_cgroup_flush_stats_ratelimited(eviction_memcg);
+ 
+       eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
+       refault = atomic_long_read(&eviction_lruvec->nonresident_age);
+@@ -558,7 +566,7 @@ void workingset_refault(struct folio *fo
+ 
+       mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
+ 
+-      if (!workingset_test_recent(shadow, file, &workingset))
++      if (!workingset_test_recent(shadow, file, &workingset, true))
+               return;
+ 
+       folio_set_active(folio);
diff --git a/queue-6.9/filemap-replace-pte_offset_map-with-pte_offset_map_nolock.patch b/queue-6.9/filemap-replace-pte_offset_map-with-pte_offset_map_nolock.patch

new file mode 100644 (file)

index 0000000..a781e80
--- /dev/null
+++ b/queue-6.9/filemap-replace-pte_offset_map-with-pte_offset_map_nolock.patch
@@ -0,0 +1,53 @@
+From 24be02a42181f0707be0498045c4c4b13273b16d Mon Sep 17 00:00:00 2001
+From: ZhangPeng <zhangpeng362@huawei.com>
+Date: Wed, 13 Mar 2024 09:29:13 +0800
+Subject: filemap: replace pte_offset_map() with pte_offset_map_nolock()
+
+From: ZhangPeng <zhangpeng362@huawei.com>
+
+commit 24be02a42181f0707be0498045c4c4b13273b16d upstream.
+
+The vmf->ptl in filemap_fault_recheck_pte_none() is still set from
+handle_pte_fault().  But at the same time, we did a pte_unmap(vmf->pte).
+After a pte_unmap(vmf->pte) unmap and rcu_read_unlock(), the page table
+may be racily changed and vmf->ptl maybe fails to protect the actual page
+table.  Fix this by replacing pte_offset_map() with
+pte_offset_map_nolock().
+
+As David said, the PTL pointer might be stale so if we continue to use
+it infilemap_fault_recheck_pte_none(), it might trigger UAF.  Also, if
+the PTL fails, the issue fixed by commit 58f327f2ce80 ("filemap: avoid
+unnecessary major faults in filemap_fault()") might reappear.
+
+Link: https://lkml.kernel.org/r/20240313012913.2395414-1-zhangpeng362@huawei.com
+Fixes: 58f327f2ce80 ("filemap: avoid unnecessary major faults in filemap_fault()")
+Signed-off-by: ZhangPeng <zhangpeng362@huawei.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Nanyong Sun <sunnanyong@huawei.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/filemap.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3207,7 +3207,8 @@ static vm_fault_t filemap_fault_recheck_
+       if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
+               return 0;
+ 
+-      ptep = pte_offset_map(vmf->pmd, vmf->address);
++      ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address,
++                                   &vmf->ptl);
+       if (unlikely(!ptep))
+               return VM_FAULT_NOPAGE;
+ 
diff --git a/queue-6.9/ksmbd-discard-write-access-to-the-directory-open.patch b/queue-6.9/ksmbd-discard-write-access-to-the-directory-open.patch

new file mode 100644 (file)

index 0000000..b9f2075
--- /dev/null
+++ b/queue-6.9/ksmbd-discard-write-access-to-the-directory-open.patch
@@ -0,0 +1,80 @@
+From e2e33caa5dc2eae7bddf88b22ce11ec3d760e5cd Mon Sep 17 00:00:00 2001
+From: Hobin Woo <hobin.woo@samsung.com>
+Date: Fri, 5 Jul 2024 12:27:25 +0900
+Subject: ksmbd: discard write access to the directory open
+
+From: Hobin Woo <hobin.woo@samsung.com>
+
+commit e2e33caa5dc2eae7bddf88b22ce11ec3d760e5cd upstream.
+
+may_open() does not allow a directory to be opened with the write access.
+However, some writing flags set by client result in adding write access
+on server, making ksmbd incompatible with FUSE file system. Simply, let's
+discard the write access when opening a directory.
+
+list_add corruption. next is NULL.
+------------[ cut here ]------------
+kernel BUG at lib/list_debug.c:26!
+pc : __list_add_valid+0x88/0xbc
+lr : __list_add_valid+0x88/0xbc
+Call trace:
+__list_add_valid+0x88/0xbc
+fuse_finish_open+0x11c/0x170
+fuse_open_common+0x284/0x5e8
+fuse_dir_open+0x14/0x24
+do_dentry_open+0x2a4/0x4e0
+dentry_open+0x50/0x80
+smb2_open+0xbe4/0x15a4
+handle_ksmbd_work+0x478/0x5ec
+process_one_work+0x1b4/0x448
+worker_thread+0x25c/0x430
+kthread+0x104/0x1d4
+ret_from_fork+0x10/0x20
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Yoonho Shin <yoonho.shin@samsung.com>
+Signed-off-by: Hobin Woo <hobin.woo@samsung.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/server/smb2pdu.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/fs/smb/server/smb2pdu.c
++++ b/fs/smb/server/smb2pdu.c
+@@ -2051,15 +2051,22 @@ out_err1:
+  * @access:           file access flags
+  * @disposition:      file disposition flags
+  * @may_flags:                set with MAY_ flags
++ * @is_dir:           is creating open flags for directory
+  *
+  * Return:      file open flags
+  */
+ static int smb2_create_open_flags(bool file_present, __le32 access,
+                                 __le32 disposition,
+-                                int *may_flags)
++                                int *may_flags,
++                                bool is_dir)
+ {
+       int oflags = O_NONBLOCK | O_LARGEFILE;
+ 
++      if (is_dir) {
++              access &= ~FILE_WRITE_DESIRE_ACCESS_LE;
++              ksmbd_debug(SMB, "Discard write access to a directory\n");
++      }
++
+       if (access & FILE_READ_DESIRED_ACCESS_LE &&
+           access & FILE_WRITE_DESIRE_ACCESS_LE) {
+               oflags |= O_RDWR;
+@@ -3167,7 +3174,9 @@ int smb2_open(struct ksmbd_work *work)
+ 
+       open_flags = smb2_create_open_flags(file_present, daccess,
+                                           req->CreateDisposition,
+-                                          &may_flags);
++                                          &may_flags,
++              req->CreateOptions & FILE_DIRECTORY_FILE_LE ||
++              (file_present && S_ISDIR(d_inode(path.dentry)->i_mode)));
+ 
+       if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+               if (open_flags & (O_CREAT | O_TRUNC)) {
diff --git a/queue-6.9/mm-filemap-make-max_pagecache_order-acceptable-to-xarray.patch b/queue-6.9/mm-filemap-make-max_pagecache_order-acceptable-to-xarray.patch

new file mode 100644 (file)

index 0000000..7c35630
--- /dev/null
+++ b/queue-6.9/mm-filemap-make-max_pagecache_order-acceptable-to-xarray.patch
@@ -0,0 +1,261 @@
+From 099d90642a711caae377f53309abfe27e8724a8b Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gshan@redhat.com>
+Date: Thu, 27 Jun 2024 10:39:49 +1000
+Subject: mm/filemap: make MAX_PAGECACHE_ORDER acceptable to xarray
+
+From: Gavin Shan <gshan@redhat.com>
+
+commit 099d90642a711caae377f53309abfe27e8724a8b upstream.
+
+Patch series "mm/filemap: Limit page cache size to that supported by
+xarray", v2.
+
+Currently, xarray can't support arbitrary page cache size.  More details
+can be found from the WARN_ON() statement in xas_split_alloc().  In our
+test whose code is attached below, we hit the WARN_ON() on ARM64 system
+where the base page size is 64KB and huge page size is 512MB.  The issue
+was reported long time ago and some discussions on it can be found here
+[1].
+
+[1] https://www.spinics.net/lists/linux-xfs/msg75404.html
+
+In order to fix the issue, we need to adjust MAX_PAGECACHE_ORDER to one
+supported by xarray and avoid PMD-sized page cache if needed.  The code
+changes are suggested by David Hildenbrand.
+
+PATCH[1] adjusts MAX_PAGECACHE_ORDER to that supported by xarray
+PATCH[2-3] avoids PMD-sized page cache in the synchronous readahead path
+PATCH[4] avoids PMD-sized page cache for shmem files if needed
+
+Test program
+============
+# cat test.c
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+
+#define TEST_XFS_FILENAME      "/tmp/data"
+#define TEST_SHMEM_FILENAME    "/dev/shm/data"
+#define TEST_MEM_SIZE          0x20000000
+
+int main(int argc, char **argv)
+{
+       const char *filename;
+       int fd = 0;
+       void *buf = (void *)-1, *p;
+       int pgsize = getpagesize();
+       int ret;
+
+       if (pgsize != 0x10000) {
+               fprintf(stderr, "64KB base page size is required\n");
+               return -EPERM;
+       }
+
+       system("echo force > /sys/kernel/mm/transparent_hugepage/shmem_enabled");
+       system("rm -fr /tmp/data");
+       system("rm -fr /dev/shm/data");
+       system("echo 1 > /proc/sys/vm/drop_caches");
+
+       /* Open xfs or shmem file */
+       filename = TEST_XFS_FILENAME;
+       if (argc > 1 && !strcmp(argv[1], "shmem"))
+               filename = TEST_SHMEM_FILENAME;
+
+       fd = open(filename, O_CREAT | O_RDWR | O_TRUNC);
+       if (fd < 0) {
+               fprintf(stderr, "Unable to open <%s>\n", filename);
+               return -EIO;
+       }
+
+       /* Extend file size */
+       ret = ftruncate(fd, TEST_MEM_SIZE);
+       if (ret) {
+               fprintf(stderr, "Error %d to ftruncate()\n", ret);
+               goto cleanup;
+       }
+
+       /* Create VMA */
+       buf = mmap(NULL, TEST_MEM_SIZE,
+                  PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+       if (buf == (void *)-1) {
+               fprintf(stderr, "Unable to mmap <%s>\n", filename);
+               goto cleanup;
+       }
+
+       fprintf(stdout, "mapped buffer at 0x%p\n", buf);
+       ret = madvise(buf, TEST_MEM_SIZE, MADV_HUGEPAGE);
+        if (ret) {
+               fprintf(stderr, "Unable to madvise(MADV_HUGEPAGE)\n");
+               goto cleanup;
+       }
+
+       /* Populate VMA */
+       ret = madvise(buf, TEST_MEM_SIZE, MADV_POPULATE_WRITE);
+       if (ret) {
+               fprintf(stderr, "Error %d to madvise(MADV_POPULATE_WRITE)\n", ret);
+               goto cleanup;
+       }
+
+       /* Punch the file to enforce xarray split */
+       ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+                       TEST_MEM_SIZE - pgsize, pgsize);
+       if (ret)
+               fprintf(stderr, "Error %d to fallocate()\n", ret);
+
+cleanup:
+       if (buf != (void *)-1)
+               munmap(buf, TEST_MEM_SIZE);
+       if (fd > 0)
+               close(fd);
+
+       return 0;
+}
+
+# gcc test.c -o test
+# cat /proc/1/smaps | grep KernelPageSize | head -n 1
+KernelPageSize:       64 kB
+# ./test shmem
+   :
+------------[ cut here ]------------
+WARNING: CPU: 17 PID: 5253 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128
+Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib  \
+nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct    \
+nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4    \
+ip_set nf_tables rfkill nfnetlink vfat fat virtio_balloon          \
+drm fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64  \
+virtio_net sha1_ce net_failover failover virtio_console virtio_blk \
+dimlib virtio_mmio
+CPU: 17 PID: 5253 Comm: test Kdump: loaded Tainted: G W 6.10.0-rc5-gavin+ #12
+Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024
+pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+pc : xas_split_alloc+0xf8/0x128
+lr : split_huge_page_to_list_to_order+0x1c4/0x720
+sp : ffff80008a92f5b0
+x29: ffff80008a92f5b0 x28: ffff80008a92f610 x27: ffff80008a92f728
+x26: 0000000000000cc0 x25: 000000000000000d x24: ffff0000cf00c858
+x23: ffff80008a92f610 x22: ffffffdfc0600000 x21: 0000000000000000
+x20: 0000000000000000 x19: ffffffdfc0600000 x18: 0000000000000000
+x17: 0000000000000000 x16: 0000018000000000 x15: 3374004000000000
+x14: 0000e00000000000 x13: 0000000000002000 x12: 0000000000000020
+x11: 3374000000000000 x10: 3374e1c0ffff6000 x9 : ffffb463a84c681c
+x8 : 0000000000000003 x7 : 0000000000000000 x6 : ffff00011c976ce0
+x5 : ffffb463aa47e378 x4 : 0000000000000000 x3 : 0000000000000cc0
+x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000
+Call trace:
+ xas_split_alloc+0xf8/0x128
+ split_huge_page_to_list_to_order+0x1c4/0x720
+ truncate_inode_partial_folio+0xdc/0x160
+ shmem_undo_range+0x2bc/0x6a8
+ shmem_fallocate+0x134/0x430
+ vfs_fallocate+0x124/0x2e8
+ ksys_fallocate+0x4c/0xa0
+ __arm64_sys_fallocate+0x24/0x38
+ invoke_syscall.constprop.0+0x7c/0xd8
+ do_el0_svc+0xb4/0xd0
+ el0_svc+0x44/0x1d8
+ el0t_64_sync_handler+0x134/0x150
+ el0t_64_sync+0x17c/0x180
+
+
+This patch (of 4):
+
+The largest page cache order can be HPAGE_PMD_ORDER (13) on ARM64 with
+64KB base page size.  The xarray entry with this order can't be split as
+the following error messages indicate.
+
+------------[ cut here ]------------
+WARNING: CPU: 35 PID: 7484 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128
+Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib  \
+nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct    \
+nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4    \
+ip_set rfkill nf_tables nfnetlink vfat fat virtio_balloon drm      \
+fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64      \
+sha1_ce virtio_net net_failover virtio_console virtio_blk failover \
+dimlib virtio_mmio
+CPU: 35 PID: 7484 Comm: test Kdump: loaded Tainted: G W 6.10.0-rc5-gavin+ #9
+Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024
+pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+pc : xas_split_alloc+0xf8/0x128
+lr : split_huge_page_to_list_to_order+0x1c4/0x720
+sp : ffff800087a4f6c0
+x29: ffff800087a4f6c0 x28: ffff800087a4f720 x27: 000000001fffffff
+x26: 0000000000000c40 x25: 000000000000000d x24: ffff00010625b858
+x23: ffff800087a4f720 x22: ffffffdfc0780000 x21: 0000000000000000
+x20: 0000000000000000 x19: ffffffdfc0780000 x18: 000000001ff40000
+x17: 00000000ffffffff x16: 0000018000000000 x15: 51ec004000000000
+x14: 0000e00000000000 x13: 0000000000002000 x12: 0000000000000020
+x11: 51ec000000000000 x10: 51ece1c0ffff8000 x9 : ffffbeb961a44d28
+x8 : 0000000000000003 x7 : ffffffdfc0456420 x6 : ffff0000e1aa6eb8
+x5 : 20bf08b4fe778fca x4 : ffffffdfc0456420 x3 : 0000000000000c40
+x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000
+Call trace:
+ xas_split_alloc+0xf8/0x128
+ split_huge_page_to_list_to_order+0x1c4/0x720
+ truncate_inode_partial_folio+0xdc/0x160
+ truncate_inode_pages_range+0x1b4/0x4a8
+ truncate_pagecache_range+0x84/0xa0
+ xfs_flush_unmap_range+0x70/0x90 [xfs]
+ xfs_file_fallocate+0xfc/0x4d8 [xfs]
+ vfs_fallocate+0x124/0x2e8
+ ksys_fallocate+0x4c/0xa0
+ __arm64_sys_fallocate+0x24/0x38
+ invoke_syscall.constprop.0+0x7c/0xd8
+ do_el0_svc+0xb4/0xd0
+ el0_svc+0x44/0x1d8
+ el0t_64_sync_handler+0x134/0x150
+ el0t_64_sync+0x17c/0x180
+
+Fix it by decreasing MAX_PAGECACHE_ORDER to the largest supported order
+by xarray. For this specific case, MAX_PAGECACHE_ORDER is dropped from
+13 to 11 when CONFIG_BASE_SMALL is disabled.
+
+Link: https://lkml.kernel.org/r/20240627003953.1262512-1-gshan@redhat.com
+Link: https://lkml.kernel.org/r/20240627003953.1262512-2-gshan@redhat.com
+Fixes: 793917d997df ("mm/readahead: Add large folio readahead")
+Signed-off-by: Gavin Shan <gshan@redhat.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Darrick J. Wong <djwong@kernel.org>
+Cc: Don Dutile <ddutile@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: William Kucharski <william.kucharski@oracle.com>
+Cc: Zhenyu Zhang <zhenyzha@redhat.com>
+Cc: <stable@vger.kernel.org>   [5.18+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pagemap.h |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -352,11 +352,18 @@ static inline void mapping_set_gfp_mask(
+  * a good order (that's 1MB if you're using 4kB pages)
+  */
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-#define MAX_PAGECACHE_ORDER   HPAGE_PMD_ORDER
++#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+ #else
+-#define MAX_PAGECACHE_ORDER   8
++#define PREFERRED_MAX_PAGECACHE_ORDER 8
+ #endif
+ 
++/*
++ * xas_split_alloc() does not support arbitrary orders. This implies no
++ * 512MB THP on ARM64 with 64KB base page size.
++ */
++#define MAX_XAS_ORDER         (XA_CHUNK_SHIFT * 2 - 1)
++#define MAX_PAGECACHE_ORDER   min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
++
+ /**
+  * mapping_set_large_folios() - Indicate the file supports large folios.
+  * @mapping: The file.
diff --git a/queue-6.9/mm-filemap-skip-to-create-pmd-sized-page-cache-if-needed.patch b/queue-6.9/mm-filemap-skip-to-create-pmd-sized-page-cache-if-needed.patch

new file mode 100644 (file)

index 0000000..b2b0685
--- /dev/null
+++ b/queue-6.9/mm-filemap-skip-to-create-pmd-sized-page-cache-if-needed.patch
@@ -0,0 +1,91 @@
+From 3390916aca7af1893ed2ebcdfee1d6fdb65bb058 Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gshan@redhat.com>
+Date: Thu, 27 Jun 2024 10:39:51 +1000
+Subject: mm/filemap: skip to create PMD-sized page cache if needed
+
+From: Gavin Shan <gshan@redhat.com>
+
+commit 3390916aca7af1893ed2ebcdfee1d6fdb65bb058 upstream.
+
+On ARM64, HPAGE_PMD_ORDER is 13 when the base page size is 64KB.  The
+PMD-sized page cache can't be supported by xarray as the following error
+messages indicate.
+
+------------[ cut here ]------------
+WARNING: CPU: 35 PID: 7484 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128
+Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib  \
+nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct    \
+nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4    \
+ip_set rfkill nf_tables nfnetlink vfat fat virtio_balloon drm      \
+fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64      \
+sha1_ce virtio_net net_failover virtio_console virtio_blk failover \
+dimlib virtio_mmio
+CPU: 35 PID: 7484 Comm: test Kdump: loaded Tainted: G W 6.10.0-rc5-gavin+ #9
+Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024
+pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+pc : xas_split_alloc+0xf8/0x128
+lr : split_huge_page_to_list_to_order+0x1c4/0x720
+sp : ffff800087a4f6c0
+x29: ffff800087a4f6c0 x28: ffff800087a4f720 x27: 000000001fffffff
+x26: 0000000000000c40 x25: 000000000000000d x24: ffff00010625b858
+x23: ffff800087a4f720 x22: ffffffdfc0780000 x21: 0000000000000000
+x20: 0000000000000000 x19: ffffffdfc0780000 x18: 000000001ff40000
+x17: 00000000ffffffff x16: 0000018000000000 x15: 51ec004000000000
+x14: 0000e00000000000 x13: 0000000000002000 x12: 0000000000000020
+x11: 51ec000000000000 x10: 51ece1c0ffff8000 x9 : ffffbeb961a44d28
+x8 : 0000000000000003 x7 : ffffffdfc0456420 x6 : ffff0000e1aa6eb8
+x5 : 20bf08b4fe778fca x4 : ffffffdfc0456420 x3 : 0000000000000c40
+x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000
+Call trace:
+ xas_split_alloc+0xf8/0x128
+ split_huge_page_to_list_to_order+0x1c4/0x720
+ truncate_inode_partial_folio+0xdc/0x160
+ truncate_inode_pages_range+0x1b4/0x4a8
+ truncate_pagecache_range+0x84/0xa0
+ xfs_flush_unmap_range+0x70/0x90 [xfs]
+ xfs_file_fallocate+0xfc/0x4d8 [xfs]
+ vfs_fallocate+0x124/0x2e8
+ ksys_fallocate+0x4c/0xa0
+ __arm64_sys_fallocate+0x24/0x38
+ invoke_syscall.constprop.0+0x7c/0xd8
+ do_el0_svc+0xb4/0xd0
+ el0_svc+0x44/0x1d8
+ el0t_64_sync_handler+0x134/0x150
+ el0t_64_sync+0x17c/0x180
+
+Fix it by skipping to allocate PMD-sized page cache when its size is
+larger than MAX_PAGECACHE_ORDER.  For this specific case, we will fall to
+regular path where the readahead window is determined by BDI's sysfs file
+(read_ahead_kb).
+
+Link: https://lkml.kernel.org/r/20240627003953.1262512-4-gshan@redhat.com
+Fixes: 4687fdbb805a ("mm/filemap: Support VM_HUGEPAGE for file mappings")
+Signed-off-by: Gavin Shan <gshan@redhat.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Darrick J. Wong <djwong@kernel.org>
+Cc: Don Dutile <ddutile@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: William Kucharski <william.kucharski@oracle.com>
+Cc: Zhenyu Zhang <zhenyzha@redhat.com>
+Cc: <stable@vger.kernel.org>   [5.18+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/filemap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3100,7 +3100,7 @@ static struct file *do_sync_mmap_readahe
+ 
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       /* Use the readahead code, even if readahead is disabled */
+-      if (vm_flags & VM_HUGEPAGE) {
++      if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
+               fpin = maybe_unlock_mmap_for_io(vmf, fpin);
+               ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
+               ra->size = HPAGE_PMD_NR;
diff --git a/queue-6.9/mm-fix-crashes-from-deferred-split-racing-folio-migration.patch b/queue-6.9/mm-fix-crashes-from-deferred-split-racing-folio-migration.patch

new file mode 100644 (file)

index 0000000..6a14393
--- /dev/null
+++ b/queue-6.9/mm-fix-crashes-from-deferred-split-racing-folio-migration.patch
@@ -0,0 +1,99 @@
+From be9581ea8c058d81154251cb0695987098996cad Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 2 Jul 2024 00:40:55 -0700
+Subject: mm: fix crashes from deferred split racing folio migration
+
+From: Hugh Dickins <hughd@google.com>
+
+commit be9581ea8c058d81154251cb0695987098996cad upstream.
+
+Even on 6.10-rc6, I've been seeing elusive "Bad page state"s (often on
+flags when freeing, yet the flags shown are not bad: PG_locked had been
+set and cleared??), and VM_BUG_ON_PAGE(page_ref_count(page) == 0)s from
+deferred_split_scan()'s folio_put(), and a variety of other BUG and WARN
+symptoms implying double free by deferred split and large folio migration.
+
+6.7 commit 9bcef5973e31 ("mm: memcg: fix split queue list crash when large
+folio migration") was right to fix the memcg-dependent locking broken in
+85ce2c517ade ("memcontrol: only transfer the memcg data for migration"),
+but missed a subtlety of deferred_split_scan(): it moves folios to its own
+local list to work on them without split_queue_lock, during which time
+folio->_deferred_list is not empty, but even the "right" lock does nothing
+to secure the folio and the list it is on.
+
+Fortunately, deferred_split_scan() is careful to use folio_try_get(): so
+folio_migrate_mapping() can avoid the race by folio_undo_large_rmappable()
+while the old folio's reference count is temporarily frozen to 0 - adding
+such a freeze in the !mapping case too (originally, folio lock and
+unmapping and no swap cache left an anon folio unreachable, so no freezing
+was needed there: but the deferred split queue offers a way to reach it).
+
+Link: https://lkml.kernel.org/r/29c83d1a-11ca-b6c9-f92e-6ccb322af510@google.com
+Fixes: 9bcef5973e31 ("mm: memcg: fix split queue list crash when large folio migration")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Nhat Pham <nphamcs@gmail.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c |   11 -----------
+ mm/migrate.c    |   13 +++++++++++++
+ 2 files changed, 13 insertions(+), 11 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -7609,17 +7609,6 @@ void mem_cgroup_migrate(struct folio *ol
+ 
+       /* Transfer the charge and the css ref */
+       commit_charge(new, memcg);
+-      /*
+-       * If the old folio is a large folio and is in the split queue, it needs
+-       * to be removed from the split queue now, in case getting an incorrect
+-       * split queue in destroy_large_folio() after the memcg of the old folio
+-       * is cleared.
+-       *
+-       * In addition, the old folio is about to be freed after migration, so
+-       * removing from the split queue a bit earlier seems reasonable.
+-       */
+-      if (folio_test_large(old) && folio_test_large_rmappable(old))
+-              folio_undo_large_rmappable(old);
+       old->memcg_data = 0;
+ }
+ 
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -415,6 +415,15 @@ int folio_migrate_mapping(struct address
+               if (folio_ref_count(folio) != expected_count)
+                       return -EAGAIN;
+ 
++              /* Take off deferred split queue while frozen and memcg set */
++              if (folio_test_large(folio) &&
++                  folio_test_large_rmappable(folio)) {
++                      if (!folio_ref_freeze(folio, expected_count))
++                              return -EAGAIN;
++                      folio_undo_large_rmappable(folio);
++                      folio_ref_unfreeze(folio, expected_count);
++              }
++
+               /* No turning back from here */
+               newfolio->index = folio->index;
+               newfolio->mapping = folio->mapping;
+@@ -433,6 +442,10 @@ int folio_migrate_mapping(struct address
+               return -EAGAIN;
+       }
+ 
++      /* Take off deferred split queue while frozen and memcg set */
++      if (folio_test_large(folio) && folio_test_large_rmappable(folio))
++              folio_undo_large_rmappable(folio);
++
+       /*
+        * Now we know that no one else is looking at the folio:
+        * no turning back from here.
diff --git a/queue-6.9/mm-vmalloc-check-if-a-hash-index-is-in-cpu_possible_mask.patch b/queue-6.9/mm-vmalloc-check-if-a-hash-index-is-in-cpu_possible_mask.patch

new file mode 100644 (file)

index 0000000..7a349e5
--- /dev/null
+++ b/queue-6.9/mm-vmalloc-check-if-a-hash-index-is-in-cpu_possible_mask.patch
@@ -0,0 +1,53 @@
+From a34acf30b19bc4ee3ba2f1082756ea2604c19138 Mon Sep 17 00:00:00 2001
+From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
+Date: Wed, 26 Jun 2024 16:03:30 +0200
+Subject: mm: vmalloc: check if a hash-index is in cpu_possible_mask
+
+From: Uladzislau Rezki (Sony) <urezki@gmail.com>
+
+commit a34acf30b19bc4ee3ba2f1082756ea2604c19138 upstream.
+
+The problem is that there are systems where cpu_possible_mask has gaps
+between set CPUs, for example SPARC.  In this scenario addr_to_vb_xa()
+hash function can return an index which accesses to not-possible and not
+setup CPU area using per_cpu() macro.  This results in an oops on SPARC.
+
+A per-cpu vmap_block_queue is also used as hash table, incorrectly
+assuming the cpu_possible_mask has no gaps.  Fix it by adjusting an index
+to a next possible CPU.
+
+Link: https://lkml.kernel.org/r/20240626140330.89836-1-urezki@gmail.com
+Fixes: 062eacf57ad9 ("mm: vmalloc: remove a global vmap_blocks xarray")
+Reported-by: Nick Bowler <nbowler@draconx.ca>
+Closes: https://lore.kernel.org/linux-kernel/ZntjIE6msJbF8zTa@MiWiFi-R3L-srv/T/
+Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Hailong.Liu <hailong.liu@oppo.com>
+Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -2519,7 +2519,15 @@ static DEFINE_PER_CPU(struct vmap_block_
+ static struct xarray *
+ addr_to_vb_xa(unsigned long addr)
+ {
+-      int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus();
++      int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids;
++
++      /*
++       * Please note, nr_cpu_ids points on a highest set
++       * possible bit, i.e. we never invoke cpumask_next()
++       * if an index points on it which is nr_cpu_ids - 1.
++       */
++      if (!cpu_possible(index))
++              index = cpumask_next(index, cpu_possible_mask);
+ 
+       return &per_cpu(vmap_block_queue, index).vmap_blocks;
+ }
diff --git a/queue-6.9/nilfs2-fix-kernel-bug-on-rename-operation-of-broken-directory.patch b/queue-6.9/nilfs2-fix-kernel-bug-on-rename-operation-of-broken-directory.patch

new file mode 100644 (file)

index 0000000..d1e557b
--- /dev/null
+++ b/queue-6.9/nilfs2-fix-kernel-bug-on-rename-operation-of-broken-directory.patch
@@ -0,0 +1,78 @@
+From a9e1ddc09ca55746079cc479aa3eb6411f0d99d4 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Sat, 29 Jun 2024 01:51:07 +0900
+Subject: nilfs2: fix kernel bug on rename operation of broken directory
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit a9e1ddc09ca55746079cc479aa3eb6411f0d99d4 upstream.
+
+Syzbot reported that in rename directory operation on broken directory on
+nilfs2, __block_write_begin_int() called to prepare block write may fail
+BUG_ON check for access exceeding the folio/page size.
+
+This is because nilfs_dotdot(), which gets parent directory reference
+entry ("..") of the directory to be moved or renamed, does not check
+consistency enough, and may return location exceeding folio/page size for
+broken directories.
+
+Fix this issue by checking required directory entries ("." and "..") in
+the first chunk of the directory in nilfs_dotdot().
+
+Link: https://lkml.kernel.org/r/20240628165107.9006-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+d3abed1ad3d367fa2627@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=d3abed1ad3d367fa2627
+Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations")
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/dir.c |   32 ++++++++++++++++++++++++++++++--
+ 1 file changed, 30 insertions(+), 2 deletions(-)
+
+--- a/fs/nilfs2/dir.c
++++ b/fs/nilfs2/dir.c
+@@ -384,11 +384,39 @@ found:
+ 
+ struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
+ {
+-      struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop);
++      struct folio *folio;
++      struct nilfs_dir_entry *de, *next_de;
++      size_t limit;
++      char *msg;
+ 
++      de = nilfs_get_folio(dir, 0, &folio);
+       if (IS_ERR(de))
+               return NULL;
+-      return nilfs_next_entry(de);
++
++      limit = nilfs_last_byte(dir, 0);  /* is a multiple of chunk size */
++      if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
++                   !nilfs_match(1, ".", de))) {
++              msg = "missing '.'";
++              goto fail;
++      }
++
++      next_de = nilfs_next_entry(de);
++      /*
++       * If "next_de" has not reached the end of the chunk, there is
++       * at least one more record.  Check whether it matches "..".
++       */
++      if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
++                   !nilfs_match(2, "..", next_de))) {
++              msg = "missing '..'";
++              goto fail;
++      }
++      *foliop = folio;
++      return next_de;
++
++fail:
++      nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
++      folio_release_kmap(folio, de);
++      return NULL;
+ }
+ 
+ ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
diff --git a/queue-6.9/s390-mm-add-null-pointer-check-to-crst_table_free-base_crst_free.patch b/queue-6.9/s390-mm-add-null-pointer-check-to-crst_table_free-base_crst_free.patch

new file mode 100644 (file)

index 0000000..87023e4
--- /dev/null
+++ b/queue-6.9/s390-mm-add-null-pointer-check-to-crst_table_free-base_crst_free.patch
@@ -0,0 +1,49 @@
+From b5efb63acf7bddaf20eacfcac654c25c446eabe8 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Tue, 9 Jul 2024 08:50:56 +0200
+Subject: s390/mm: Add NULL pointer check to crst_table_free() base_crst_free()
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+commit b5efb63acf7bddaf20eacfcac654c25c446eabe8 upstream.
+
+crst_table_free() used to work with NULL pointers before the conversion
+to ptdescs.  Since crst_table_free() can be called with a NULL pointer
+(error handling in crst_table_upgrade() add an explicit check.
+
+Also add the same check to base_crst_free() for consistency reasons.
+
+In real life this should not happen, since order two GFP_KERNEL
+allocations will not fail, unless FAIL_PAGE_ALLOC is enabled and used.
+
+Reported-by: Yunseong Kim <yskelg@gmail.com>
+Fixes: 6326c26c1514 ("s390: convert various pgalloc functions to use ptdescs")
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/mm/pgalloc.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/s390/mm/pgalloc.c
++++ b/arch/s390/mm/pgalloc.c
+@@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct m
+ 
+ void crst_table_free(struct mm_struct *mm, unsigned long *table)
+ {
++      if (!table)
++              return;
+       pagetable_free(virt_to_ptdesc(table));
+ }
+ 
+@@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(un
+ 
+ static void base_crst_free(unsigned long *table)
+ {
++      if (!table)
++              return;
+       pagetable_free(virt_to_ptdesc(table));
+ }
+ 
diff --git a/queue-6.9/scsi-sd-do-not-repeat-the-starting-disk-message.patch b/queue-6.9/scsi-sd-do-not-repeat-the-starting-disk-message.patch

new file mode 100644 (file)

index 0000000..77517da
--- /dev/null
+++ b/queue-6.9/scsi-sd-do-not-repeat-the-starting-disk-message.patch
@@ -0,0 +1,69 @@
+From 7a6bbc2829d4ab592c7e440a6f6f5deb3cd95db4 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Tue, 2 Jul 2024 06:53:26 +0900
+Subject: scsi: sd: Do not repeat the starting disk message
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 7a6bbc2829d4ab592c7e440a6f6f5deb3cd95db4 upstream.
+
+The SCSI disk message "Starting disk" to signal resuming of a suspended
+disk is printed in both sd_resume() and sd_resume_common() which results
+in this message being printed twice when resuming from e.g. autosuspend:
+
+$ echo 5000 > /sys/block/sda/device/power/autosuspend_delay_ms
+$ echo auto > /sys/block/sda/device/power/control
+
+[ 4962.438293] sd 0:0:0:0: [sda] Synchronizing SCSI cache
+[ 4962.501121] sd 0:0:0:0: [sda] Stopping disk
+
+$ echo on > /sys/block/sda/device/power/control
+
+[ 4972.805851] sd 0:0:0:0: [sda] Starting disk
+[ 4980.558806] sd 0:0:0:0: [sda] Starting disk
+
+Fix this double print by removing the call to sd_printk() from sd_resume()
+and moving the call to sd_printk() in sd_resume_common() earlier in the
+function, before the check using sd_do_start_stop().  Doing so, the message
+is printed once regardless if sd_resume_common() actually executes
+sd_start_stop_device() (i.e. SCSI device case) or not (libsas and libata
+managed ATA devices case).
+
+Fixes: 0c76106cb975 ("scsi: sd: Fix TCG OPAL unlock on system resume")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Link: https://lore.kernel.org/r/20240701215326.128067-1-dlemoal@kernel.org
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/sd.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -4127,8 +4127,6 @@ static int sd_resume(struct device *dev)
+ {
+       struct scsi_disk *sdkp = dev_get_drvdata(dev);
+ 
+-      sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+-
+       if (opal_unlock_from_suspend(sdkp->opal_dev)) {
+               sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
+               return -EIO;
+@@ -4145,12 +4143,13 @@ static int sd_resume_common(struct devic
+       if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
+               return 0;
+ 
++      sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
++
+       if (!sd_do_start_stop(sdkp->device, runtime)) {
+               sdkp->suspended = false;
+               return 0;
+       }
+ 
+-      sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+       ret = sd_start_stop_device(sdkp, 1);
+       if (!ret) {
+               sd_resume(dev);
diff --git a/queue-6.9/series b/queue-6.9/series

index 6064530efd0b36ee1a10ca13d2ed0a3e3059de97..2334e767390aa884b6bd2c66a1e141689025d353 100644 (file)
--- a/queue-6.9/series
+++ b/queue-6.9/series
@@ -81,3 +81,13 @@ usb-core-fix-duplicate-endpoint-bug-by-clearing-reserved-bits-in-the-descriptor.
  misc-microchip-pci1xxxx-fix-return-value-of-nvmem-callbacks.patch
  hpet-support-32-bit-userspace.patch
  xhci-always-resume-roothubs-if-xhc-was-reset-during-resume.patch
+s390-mm-add-null-pointer-check-to-crst_table_free-base_crst_free.patch
+nilfs2-fix-kernel-bug-on-rename-operation-of-broken-directory.patch
+cachestat-do-not-flush-stats-in-recency-check.patch
+mm-vmalloc-check-if-a-hash-index-is-in-cpu_possible_mask.patch
+mm-fix-crashes-from-deferred-split-racing-folio-migration.patch
+filemap-replace-pte_offset_map-with-pte_offset_map_nolock.patch
+mm-filemap-skip-to-create-pmd-sized-page-cache-if-needed.patch
+mm-filemap-make-max_pagecache_order-acceptable-to-xarray.patch
+ksmbd-discard-write-access-to-the-directory-open.patch
+scsi-sd-do-not-repeat-the-starting-disk-message.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 15 Jul 2024 10:15:13 +0000 (12:15 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 15 Jul 2024 10:15:13 +0000 (12:15 +0200)
queue-6.9/cachestat-do-not-flush-stats-in-recency-check.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/filemap-replace-pte_offset_map-with-pte_offset_map_nolock.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/ksmbd-discard-write-access-to-the-directory-open.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/mm-filemap-make-max_pagecache_order-acceptable-to-xarray.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/mm-filemap-skip-to-create-pmd-sized-page-cache-if-needed.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/mm-fix-crashes-from-deferred-split-racing-folio-migration.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/mm-vmalloc-check-if-a-hash-index-is-in-cpu_possible_mask.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/nilfs2-fix-kernel-bug-on-rename-operation-of-broken-directory.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/s390-mm-add-null-pointer-check-to-crst_table_free-base_crst_free.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/scsi-sd-do-not-repeat-the-starting-disk-message.patch	[new file with mode: 0644]	patch \| blob
queue-6.9/series		patch \| blob \| blame \| history