]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 9 Dec 2023 12:44:08 +0000 (13:44 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 9 Dec 2023 12:44:08 +0000 (13:44 +0100)
added patches:
cgroup_freezer-cgroup_freezing-check-if-not-frozen.patch
checkstack-fix-printed-address.patch
hugetlb-fix-null-ptr-deref-in-hugetlb_vma_lock_write.patch
mm-fix-oops-when-filemap_map_pmd-without-prealloc_pte.patch
nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch
nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch
packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch
platform-surface-aggregator-fix-recv_buf-return-value.patch
r8169-fix-rtl8125b-pause-frames-blasting-when-suspended.patch
regmap-fix-bogus-error-on-regcache_sync-success.patch
ring-buffer-test-last-update-in-32bit-version-of-__rb_time_read.patch
tracing-always-update-snapshot-buffer-size.patch
tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch
tracing-fix-a-possible-race-when-disabling-buffered-events.patch
tracing-fix-incomplete-locking-when-disabling-buffered-events.patch

16 files changed:
queue-6.1/cgroup_freezer-cgroup_freezing-check-if-not-frozen.patch [new file with mode: 0644]
queue-6.1/checkstack-fix-printed-address.patch [new file with mode: 0644]
queue-6.1/hugetlb-fix-null-ptr-deref-in-hugetlb_vma_lock_write.patch [new file with mode: 0644]
queue-6.1/mm-fix-oops-when-filemap_map_pmd-without-prealloc_pte.patch [new file with mode: 0644]
queue-6.1/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch [new file with mode: 0644]
queue-6.1/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch [new file with mode: 0644]
queue-6.1/packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch [new file with mode: 0644]
queue-6.1/platform-surface-aggregator-fix-recv_buf-return-value.patch [new file with mode: 0644]
queue-6.1/r8169-fix-rtl8125b-pause-frames-blasting-when-suspended.patch [new file with mode: 0644]
queue-6.1/regmap-fix-bogus-error-on-regcache_sync-success.patch [new file with mode: 0644]
queue-6.1/ring-buffer-test-last-update-in-32bit-version-of-__rb_time_read.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/tracing-always-update-snapshot-buffer-size.patch [new file with mode: 0644]
queue-6.1/tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch [new file with mode: 0644]
queue-6.1/tracing-fix-a-possible-race-when-disabling-buffered-events.patch [new file with mode: 0644]
queue-6.1/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch [new file with mode: 0644]

diff --git a/queue-6.1/cgroup_freezer-cgroup_freezing-check-if-not-frozen.patch b/queue-6.1/cgroup_freezer-cgroup_freezing-check-if-not-frozen.patch
new file mode 100644 (file)
index 0000000..d91972e
--- /dev/null
@@ -0,0 +1,67 @@
+From cff5f49d433fcd0063c8be7dd08fa5bf190c6c37 Mon Sep 17 00:00:00 2001
+From: Tim Van Patten <timvp@google.com>
+Date: Wed, 15 Nov 2023 09:20:43 -0700
+Subject: cgroup_freezer: cgroup_freezing: Check if not frozen
+
+From: Tim Van Patten <timvp@google.com>
+
+commit cff5f49d433fcd0063c8be7dd08fa5bf190c6c37 upstream.
+
+__thaw_task() was recently updated to warn if the task being thawed was
+part of a freezer cgroup that is still currently freezing:
+
+       void __thaw_task(struct task_struct *p)
+       {
+       ...
+               if (WARN_ON_ONCE(freezing(p)))
+                       goto unlock;
+
+This has exposed a bug in cgroup1 freezing where when CGROUP_FROZEN is
+asserted, the CGROUP_FREEZING bits are not also cleared at the same
+time. Meaning, when a cgroup is marked FROZEN it continues to be marked
+FREEZING as well. This causes the WARNING to trigger, because
+cgroup_freezing() thinks the cgroup is still freezing.
+
+There are two ways to fix this:
+
+1. Whenever FROZEN is set, clear FREEZING for the cgroup and all
+children cgroups.
+2. Update cgroup_freezing() to also verify that FROZEN is not set.
+
+This patch implements option (2), since it's smaller and more
+straightforward.
+
+Signed-off-by: Tim Van Patten <timvp@google.com>
+Tested-by: Mark Hasemeyer <markhas@chromium.org>
+Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic")
+Cc: stable@vger.kernel.org # v6.1+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/legacy_freezer.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
+index 122dacb3a443..66d1708042a7 100644
+--- a/kernel/cgroup/legacy_freezer.c
++++ b/kernel/cgroup/legacy_freezer.c
+@@ -66,9 +66,15 @@ static struct freezer *parent_freezer(struct freezer *freezer)
+ bool cgroup_freezing(struct task_struct *task)
+ {
+       bool ret;
++      unsigned int state;
+       rcu_read_lock();
+-      ret = task_freezer(task)->state & CGROUP_FREEZING;
++      /* Check if the cgroup is still FREEZING, but not FROZEN. The extra
++       * !FROZEN check is required, because the FREEZING bit is not cleared
++       * when the state FROZEN is reached.
++       */
++      state = task_freezer(task)->state;
++      ret = (state & CGROUP_FREEZING) && !(state & CGROUP_FROZEN);
+       rcu_read_unlock();
+       return ret;
+-- 
+2.43.0
+
diff --git a/queue-6.1/checkstack-fix-printed-address.patch b/queue-6.1/checkstack-fix-printed-address.patch
new file mode 100644 (file)
index 0000000..5ba17f2
--- /dev/null
@@ -0,0 +1,60 @@
+From ee34db3f271cea4d4252048617919c2caafe698b Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Mon, 20 Nov 2023 19:37:17 +0100
+Subject: checkstack: fix printed address
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+commit ee34db3f271cea4d4252048617919c2caafe698b upstream.
+
+All addresses printed by checkstack have an extra incorrect 0 appended at
+the end.
+
+This was introduced with commit 677f1410e058 ("scripts/checkstack.pl: don't
+display $dre as different entity"): since then the address is taken from
+the line which contains the function name, instead of the line which
+contains stack consumption. E.g. on s390:
+
+0000000000100a30 <do_one_initcall>:
+...
+  100a44:       e3 f0 ff 70 ff 71       lay     %r15,-144(%r15)
+
+So the used regex which matches spaces and hexadecimal numbers to extract
+an address now matches a different substring. Subsequently replacing spaces
+with 0 appends a zero at the and, instead of replacing leading spaces.
+
+Fix this by using the proper regex, and simplify the code a bit.
+
+Link: https://lkml.kernel.org/r/20231120183719.2188479-2-hca@linux.ibm.com
+Fixes: 677f1410e058 ("scripts/checkstack.pl: don't display $dre as different entity")
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Cc: Maninder Singh <maninder1.s@samsung.com>
+Cc: Masahiro Yamada <masahiroy@kernel.org>
+Cc: Vaneet Narang <v.narang@samsung.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ scripts/checkstack.pl |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/scripts/checkstack.pl
++++ b/scripts/checkstack.pl
+@@ -146,15 +146,11 @@ $total_size = 0;
+ while (my $line = <STDIN>) {
+       if ($line =~ m/$funcre/) {
+               $func = $1;
+-              next if $line !~ m/^($xs*)/;
++              next if $line !~ m/^($x*)/;
+               if ($total_size > $min_stack) {
+                       push @stack, "$intro$total_size\n";
+               }
+-
+-              $addr = $1;
+-              $addr =~ s/ /0/g;
+-              $addr = "0x$addr";
+-
++              $addr = "0x$1";
+               $intro = "$addr $func [$file]:";
+               my $padlen = 56 - length($intro);
+               while ($padlen > 0) {
diff --git a/queue-6.1/hugetlb-fix-null-ptr-deref-in-hugetlb_vma_lock_write.patch b/queue-6.1/hugetlb-fix-null-ptr-deref-in-hugetlb_vma_lock_write.patch
new file mode 100644 (file)
index 0000000..ba09354
--- /dev/null
@@ -0,0 +1,100 @@
+From 187da0f8250aa94bd96266096aef6f694e0b4cd2 Mon Sep 17 00:00:00 2001
+From: Mike Kravetz <mike.kravetz@oracle.com>
+Date: Mon, 13 Nov 2023 17:20:33 -0800
+Subject: hugetlb: fix null-ptr-deref in hugetlb_vma_lock_write
+
+From: Mike Kravetz <mike.kravetz@oracle.com>
+
+commit 187da0f8250aa94bd96266096aef6f694e0b4cd2 upstream.
+
+The routine __vma_private_lock tests for the existence of a reserve map
+associated with a private hugetlb mapping.  A pointer to the reserve map
+is in vma->vm_private_data.  __vma_private_lock was checking the pointer
+for NULL.  However, it is possible that the low bits of the pointer could
+be used as flags.  In such instances, vm_private_data is not NULL and not
+a valid pointer.  This results in the null-ptr-deref reported by syzbot:
+
+general protection fault, probably for non-canonical address 0xdffffc000000001d:
+ 0000 [#1] PREEMPT SMP KASAN
+KASAN: null-ptr-deref in range [0x00000000000000e8-0x00000000000000ef]
+CPU: 0 PID: 5048 Comm: syz-executor139 Not tainted 6.6.0-rc7-syzkaller-00142-g88
+8cf78c29e2 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 1
+0/09/2023
+RIP: 0010:__lock_acquire+0x109/0x5de0 kernel/locking/lockdep.c:5004
+...
+Call Trace:
+ <TASK>
+ lock_acquire kernel/locking/lockdep.c:5753 [inline]
+ lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5718
+ down_write+0x93/0x200 kernel/locking/rwsem.c:1573
+ hugetlb_vma_lock_write mm/hugetlb.c:300 [inline]
+ hugetlb_vma_lock_write+0xae/0x100 mm/hugetlb.c:291
+ __hugetlb_zap_begin+0x1e9/0x2b0 mm/hugetlb.c:5447
+ hugetlb_zap_begin include/linux/hugetlb.h:258 [inline]
+ unmap_vmas+0x2f4/0x470 mm/memory.c:1733
+ exit_mmap+0x1ad/0xa60 mm/mmap.c:3230
+ __mmput+0x12a/0x4d0 kernel/fork.c:1349
+ mmput+0x62/0x70 kernel/fork.c:1371
+ exit_mm kernel/exit.c:567 [inline]
+ do_exit+0x9ad/0x2a20 kernel/exit.c:861
+ __do_sys_exit kernel/exit.c:991 [inline]
+ __se_sys_exit kernel/exit.c:989 [inline]
+ __x64_sys_exit+0x42/0x50 kernel/exit.c:989
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Mask off low bit flags before checking for NULL pointer.  In addition, the
+reserve map only 'belongs' to the OWNER (parent in parent/child
+relationships) so also check for the OWNER flag.
+
+Link: https://lkml.kernel.org/r/20231114012033.259600-1-mike.kravetz@oracle.com
+Reported-by: syzbot+6ada951e7c0f7bc8a71e@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/linux-mm/00000000000078d1e00608d7878b@google.com/
+Fixes: bf4916922c60 ("hugetlbfs: extend hugetlb_vma_lock to private VMAs")
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Rik van Riel <riel@surriel.com>
+Cc: Edward Adam Davis <eadavis@qq.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Tom Rix <trix@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb.h |    5 +----
+ mm/hugetlb.c            |    7 +++++++
+ 2 files changed, 8 insertions(+), 4 deletions(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -880,10 +880,7 @@ static inline bool hugepage_migration_su
+       return arch_hugetlb_migration_supported(h);
+ }
+-static inline bool __vma_private_lock(struct vm_area_struct *vma)
+-{
+-      return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data;
+-}
++bool __vma_private_lock(struct vm_area_struct *vma);
+ /*
+  * Movability check is different as compared to migration check.
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1189,6 +1189,13 @@ static int is_vma_resv_set(struct vm_are
+       return (get_vma_private_data(vma) & flag) != 0;
+ }
++bool __vma_private_lock(struct vm_area_struct *vma)
++{
++      return !(vma->vm_flags & VM_MAYSHARE) &&
++              get_vma_private_data(vma) & ~HPAGE_RESV_MASK &&
++              is_vma_resv_set(vma, HPAGE_RESV_OWNER);
++}
++
+ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
+ {
+       VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
diff --git a/queue-6.1/mm-fix-oops-when-filemap_map_pmd-without-prealloc_pte.patch b/queue-6.1/mm-fix-oops-when-filemap_map_pmd-without-prealloc_pte.patch
new file mode 100644 (file)
index 0000000..2e8482b
--- /dev/null
@@ -0,0 +1,62 @@
+From 9aa1345d66b8132745ffb99b348b1492088da9e2 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 17 Nov 2023 00:49:18 -0800
+Subject: mm: fix oops when filemap_map_pmd() without prealloc_pte
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 9aa1345d66b8132745ffb99b348b1492088da9e2 upstream.
+
+syzbot reports oops in lockdep's __lock_acquire(), called from
+__pte_offset_map_lock() called from filemap_map_pages(); or when I run the
+repro, the oops comes in pmd_install(), called from filemap_map_pmd()
+called from filemap_map_pages(), just before the __pte_offset_map_lock().
+
+The problem is that filemap_map_pmd() has been assuming that when it finds
+pmd_none(), a page table has already been prepared in prealloc_pte; and
+indeed do_fault_around() has been careful to preallocate one there, when
+it finds pmd_none(): but what if *pmd became none in between?
+
+My 6.6 mods in mm/khugepaged.c, avoiding mmap_lock for write, have made it
+easy for *pmd to be cleared while servicing a page fault; but even before
+those, a huge *pmd might be zapped while a fault is serviced.
+
+The difference in symptomatic stack traces comes from the "memory model"
+in use: pmd_install() uses pmd_populate() uses page_to_pfn(): in some
+models that is strict, and will oops on the NULL prealloc_pte; in other
+models, it will construct a bogus value to be populated into *pmd, then
+__pte_offset_map_lock() oops when trying to access split ptlock pointer
+(or some other symptom in normal case of ptlock embedded not pointer).
+
+Link: https://lore.kernel.org/linux-mm/20231115065506.19780-1-jose.pekkarinen@foxhound.fi/
+Link: https://lkml.kernel.org/r/6ed0c50c-78ef-0719-b3c5-60c0c010431c@google.com
+Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reported-and-tested-by: syzbot+89edd67979b52675ddec@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/linux-mm/0000000000005e44550608a0806c@google.com/
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: Jann Horn <jannh@google.com>,
+Cc: José Pekkarinen <jose.pekkarinen@foxhound.fi>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: <stable@vger.kernel.org>    [5.12+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/filemap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -3319,7 +3319,7 @@ static bool filemap_map_pmd(struct vm_fa
+               }
+       }
+-      if (pmd_none(*vmf->pmd))
++      if (pmd_none(*vmf->pmd) && vmf->prealloc_pte)
+               pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);
+       /* See comment in handle_pte_fault() */
diff --git a/queue-6.1/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch b/queue-6.1/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch
new file mode 100644 (file)
index 0000000..5339d1e
--- /dev/null
@@ -0,0 +1,79 @@
+From d61d0ab573649789bf9eb909c89a1a193b2e3d10 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Wed, 29 Nov 2023 23:15:47 +0900
+Subject: nilfs2: fix missing error check for sb_set_blocksize call
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit d61d0ab573649789bf9eb909c89a1a193b2e3d10 upstream.
+
+When mounting a filesystem image with a block size larger than the page
+size, nilfs2 repeatedly outputs long error messages with stack traces to
+the kernel log, such as the following:
+
+ getblk(): invalid block size 8192 requested
+ logical block size: 512
+ ...
+ Call Trace:
+  dump_stack_lvl+0x92/0xd4
+  dump_stack+0xd/0x10
+  bdev_getblk+0x33a/0x354
+  __breadahead+0x11/0x80
+  nilfs_search_super_root+0xe2/0x704 [nilfs2]
+  load_nilfs+0x72/0x504 [nilfs2]
+  nilfs_mount+0x30f/0x518 [nilfs2]
+  legacy_get_tree+0x1b/0x40
+  vfs_get_tree+0x18/0xc4
+  path_mount+0x786/0xa88
+  __ia32_sys_mount+0x147/0x1a8
+  __do_fast_syscall_32+0x56/0xc8
+  do_fast_syscall_32+0x29/0x58
+  do_SYSENTER_32+0x15/0x18
+  entry_SYSENTER_32+0x98/0xf1
+ ...
+
+This overloads the system logger.  And to make matters worse, it sometimes
+crashes the kernel with a memory access violation.
+
+This is because the return value of the sb_set_blocksize() call, which
+should be checked for errors, is not checked.
+
+The latter issue is due to out-of-buffer memory being accessed based on a
+large block size that caused sb_set_blocksize() to fail for buffers read
+with the initial minimum block size that remained unupdated in the
+super_block structure.
+
+Since nilfs2 mkfs tool does not accept block sizes larger than the system
+page size, this has been overlooked.  However, it is possible to create
+this situation by intentionally modifying the tool or by passing a
+filesystem image created on a system with a large page size to a system
+with a smaller page size and mounting it.
+
+Fix this issue by inserting the expected error handling for the call to
+sb_set_blocksize().
+
+Link: https://lkml.kernel.org/r/20231129141547.4726-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/the_nilfs.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/nilfs2/the_nilfs.c
++++ b/fs/nilfs2/the_nilfs.c
+@@ -716,7 +716,11 @@ int init_nilfs(struct the_nilfs *nilfs,
+                       goto failed_sbh;
+               }
+               nilfs_release_super_block(nilfs);
+-              sb_set_blocksize(sb, blocksize);
++              if (!sb_set_blocksize(sb, blocksize)) {
++                      nilfs_err(sb, "bad blocksize %d", blocksize);
++                      err = -EINVAL;
++                      goto out;
++              }
+               err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
+               if (err)
diff --git a/queue-6.1/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch b/queue-6.1/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch
new file mode 100644 (file)
index 0000000..95d748b
--- /dev/null
@@ -0,0 +1,109 @@
+From 675abf8df1353e0e3bde314993e0796c524cfbf0 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Tue, 5 Dec 2023 17:59:47 +0900
+Subject: nilfs2: prevent WARNING in nilfs_sufile_set_segment_usage()
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 675abf8df1353e0e3bde314993e0796c524cfbf0 upstream.
+
+If nilfs2 reads a disk image with corrupted segment usage metadata, and
+its segment usage information is marked as an error for the segment at the
+write location, nilfs_sufile_set_segment_usage() can trigger WARN_ONs
+during log writing.
+
+Segments newly allocated for writing with nilfs_sufile_alloc() will not
+have this error flag set, but this unexpected situation will occur if the
+segment indexed by either nilfs->ns_segnum or nilfs->ns_nextnum (active
+segment) was marked in error.
+
+Fix this issue by inserting a sanity check to treat it as a file system
+corruption.
+
+Since error returns are not allowed during the execution phase where
+nilfs_sufile_set_segment_usage() is used, this inserts the sanity check
+into nilfs_sufile_mark_dirty() which pre-reads the buffer containing the
+segment usage record to be updated and sets it up in a dirty state for
+writing.
+
+In addition, nilfs_sufile_set_segment_usage() is also called when
+canceling log writing and undoing segment usage update, so in order to
+avoid issuing the same kernel warning in that case, in case of
+cancellation, avoid checking the error flag in
+nilfs_sufile_set_segment_usage().
+
+Link: https://lkml.kernel.org/r/20231205085947.4431-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+14e9f834f6ddecece094@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=14e9f834f6ddecece094
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/sufile.c |   42 +++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 35 insertions(+), 7 deletions(-)
+
+--- a/fs/nilfs2/sufile.c
++++ b/fs/nilfs2/sufile.c
+@@ -501,15 +501,38 @@ int nilfs_sufile_mark_dirty(struct inode
+       down_write(&NILFS_MDT(sufile)->mi_sem);
+       ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
+-      if (!ret) {
+-              mark_buffer_dirty(bh);
+-              nilfs_mdt_mark_dirty(sufile);
+-              kaddr = kmap_atomic(bh->b_page);
+-              su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
++      if (ret)
++              goto out_sem;
++
++      kaddr = kmap_atomic(bh->b_page);
++      su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
++      if (unlikely(nilfs_segment_usage_error(su))) {
++              struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
++
++              kunmap_atomic(kaddr);
++              brelse(bh);
++              if (nilfs_segment_is_active(nilfs, segnum)) {
++                      nilfs_error(sufile->i_sb,
++                                  "active segment %llu is erroneous",
++                                  (unsigned long long)segnum);
++              } else {
++                      /*
++                       * Segments marked erroneous are never allocated by
++                       * nilfs_sufile_alloc(); only active segments, ie,
++                       * the segments indexed by ns_segnum or ns_nextnum,
++                       * can be erroneous here.
++                       */
++                      WARN_ON_ONCE(1);
++              }
++              ret = -EIO;
++      } else {
+               nilfs_segment_usage_set_dirty(su);
+               kunmap_atomic(kaddr);
++              mark_buffer_dirty(bh);
++              nilfs_mdt_mark_dirty(sufile);
+               brelse(bh);
+       }
++out_sem:
+       up_write(&NILFS_MDT(sufile)->mi_sem);
+       return ret;
+ }
+@@ -536,9 +559,14 @@ int nilfs_sufile_set_segment_usage(struc
+       kaddr = kmap_atomic(bh->b_page);
+       su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+-      WARN_ON(nilfs_segment_usage_error(su));
+-      if (modtime)
++      if (modtime) {
++              /*
++               * Check segusage error and set su_lastmod only when updating
++               * this entry with a valid timestamp, not for cancellation.
++               */
++              WARN_ON_ONCE(nilfs_segment_usage_error(su));
+               su->su_lastmod = cpu_to_le64(modtime);
++      }
+       su->su_nblocks = cpu_to_le32(nblocks);
+       kunmap_atomic(kaddr);
diff --git a/queue-6.1/packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch b/queue-6.1/packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch
new file mode 100644 (file)
index 0000000..5dfcda8
--- /dev/null
@@ -0,0 +1,109 @@
+From db3fadacaf0c817b222090290d06ca2a338422d0 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 1 Dec 2023 14:10:21 +0100
+Subject: packet: Move reference count in packet_sock to atomic_long_t
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit db3fadacaf0c817b222090290d06ca2a338422d0 upstream.
+
+In some potential instances the reference count on struct packet_sock
+could be saturated and cause overflows which gets the kernel a bit
+confused. To prevent this, move to a 64-bit atomic reference count on
+64-bit architectures to prevent the possibility of this type to overflow.
+
+Because we can not handle saturation, using refcount_t is not possible
+in this place. Maybe someday in the future if it changes it could be
+used. Also, instead of using plain atomic64_t, use atomic_long_t instead.
+32-bit machines tend to be memory-limited (i.e. anything that increases
+a reference uses so much memory that you can't actually get to 2**32
+references). 32-bit architectures also tend to have serious problems
+with 64-bit atomics. Hence, atomic_long_t is the more natural solution.
+
+Reported-by: "The UK's National Cyber Security Centre (NCSC)" <security@ncsc.gov.uk>
+Co-developed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: stable@kernel.org
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20231201131021.19999-1-daniel@iogearbox.net
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   16 ++++++++--------
+ net/packet/internal.h  |    2 +-
+ 2 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -4275,7 +4275,7 @@ static void packet_mm_open(struct vm_are
+       struct sock *sk = sock->sk;
+       if (sk)
+-              atomic_inc(&pkt_sk(sk)->mapped);
++              atomic_long_inc(&pkt_sk(sk)->mapped);
+ }
+ static void packet_mm_close(struct vm_area_struct *vma)
+@@ -4285,7 +4285,7 @@ static void packet_mm_close(struct vm_ar
+       struct sock *sk = sock->sk;
+       if (sk)
+-              atomic_dec(&pkt_sk(sk)->mapped);
++              atomic_long_dec(&pkt_sk(sk)->mapped);
+ }
+ static const struct vm_operations_struct packet_mmap_ops = {
+@@ -4380,7 +4380,7 @@ static int packet_set_ring(struct sock *
+       err = -EBUSY;
+       if (!closing) {
+-              if (atomic_read(&po->mapped))
++              if (atomic_long_read(&po->mapped))
+                       goto out;
+               if (packet_read_pending(rb))
+                       goto out;
+@@ -4483,7 +4483,7 @@ static int packet_set_ring(struct sock *
+       err = -EBUSY;
+       mutex_lock(&po->pg_vec_lock);
+-      if (closing || atomic_read(&po->mapped) == 0) {
++      if (closing || atomic_long_read(&po->mapped) == 0) {
+               err = 0;
+               spin_lock_bh(&rb_queue->lock);
+               swap(rb->pg_vec, pg_vec);
+@@ -4501,9 +4501,9 @@ static int packet_set_ring(struct sock *
+               po->prot_hook.func = (po->rx_ring.pg_vec) ?
+                                               tpacket_rcv : packet_rcv;
+               skb_queue_purge(rb_queue);
+-              if (atomic_read(&po->mapped))
+-                      pr_err("packet_mmap: vma is busy: %d\n",
+-                             atomic_read(&po->mapped));
++              if (atomic_long_read(&po->mapped))
++                      pr_err("packet_mmap: vma is busy: %ld\n",
++                             atomic_long_read(&po->mapped));
+       }
+       mutex_unlock(&po->pg_vec_lock);
+@@ -4581,7 +4581,7 @@ static int packet_mmap(struct file *file
+               }
+       }
+-      atomic_inc(&po->mapped);
++      atomic_long_inc(&po->mapped);
+       vma->vm_ops = &packet_mmap_ops;
+       err = 0;
+--- a/net/packet/internal.h
++++ b/net/packet/internal.h
+@@ -126,7 +126,7 @@ struct packet_sock {
+       __be16                  num;
+       struct packet_rollover  *rollover;
+       struct packet_mclist    *mclist;
+-      atomic_t                mapped;
++      atomic_long_t           mapped;
+       enum tpacket_versions   tp_version;
+       unsigned int            tp_hdrlen;
+       unsigned int            tp_reserve;
diff --git a/queue-6.1/platform-surface-aggregator-fix-recv_buf-return-value.patch b/queue-6.1/platform-surface-aggregator-fix-recv_buf-return-value.patch
new file mode 100644 (file)
index 0000000..04a1298
--- /dev/null
@@ -0,0 +1,49 @@
+From c8820c92caf0770bec976b01fa9e82bb993c5865 Mon Sep 17 00:00:00 2001
+From: Francesco Dolcini <francesco.dolcini@toradex.com>
+Date: Tue, 28 Nov 2023 20:49:35 +0100
+Subject: platform/surface: aggregator: fix recv_buf() return value
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Francesco Dolcini <francesco.dolcini@toradex.com>
+
+commit c8820c92caf0770bec976b01fa9e82bb993c5865 upstream.
+
+Serdev recv_buf() callback is supposed to return the amount of bytes
+consumed, therefore an int in between 0 and count.
+
+Do not return negative number in case of issue, when
+ssam_controller_receive_buf() returns ESHUTDOWN just returns 0, e.g. no
+bytes consumed, this keep the exact same behavior as it was before.
+
+This fixes a potential WARN in serdev-ttyport.c:ttyport_receive_buf().
+
+Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem")
+Cc: stable@vger.kernel.org
+Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
+Reviewed-by: Maximilian Luz <luzmaximilian@gmail.com>
+Link: https://lore.kernel.org/r/20231128194935.11350-1-francesco@dolcini.it
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/surface/aggregator/core.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/platform/surface/aggregator/core.c
++++ b/drivers/platform/surface/aggregator/core.c
+@@ -231,9 +231,12 @@ static int ssam_receive_buf(struct serde
+                           size_t n)
+ {
+       struct ssam_controller *ctrl;
++      int ret;
+       ctrl = serdev_device_get_drvdata(dev);
+-      return ssam_controller_receive_buf(ctrl, buf, n);
++      ret = ssam_controller_receive_buf(ctrl, buf, n);
++
++      return ret < 0 ? 0 : ret;
+ }
+ static void ssam_write_wakeup(struct serdev_device *dev)
diff --git a/queue-6.1/r8169-fix-rtl8125b-pause-frames-blasting-when-suspended.patch b/queue-6.1/r8169-fix-rtl8125b-pause-frames-blasting-when-suspended.patch
new file mode 100644 (file)
index 0000000..ac0a385
--- /dev/null
@@ -0,0 +1,69 @@
+From 4b0768b6556af56ee9b7cf4e68452a2b6289ae45 Mon Sep 17 00:00:00 2001
+From: ChunHao Lin <hau@realtek.com>
+Date: Wed, 29 Nov 2023 23:53:50 +0800
+Subject: r8169: fix rtl8125b PAUSE frames blasting when suspended
+
+From: ChunHao Lin <hau@realtek.com>
+
+commit 4b0768b6556af56ee9b7cf4e68452a2b6289ae45 upstream.
+
+When FIFO reaches near full state, device will issue pause frame.
+If pause slot is enabled(set to 1), in this time, device will issue
+pause frame only once. But if pause slot is disabled(set to 0), device
+will keep sending pause frames until FIFO reaches near empty state.
+
+When pause slot is disabled, if there is no one to handle receive
+packets, device FIFO will reach near full state and keep sending
+pause frames. That will impact entire local area network.
+
+This issue can be reproduced in Chromebox (not Chromebook) in
+developer mode running a test image (and v5.10 kernel):
+1) ping -f $CHROMEBOX (from workstation on same local network)
+2) run "powerd_dbus_suspend" from command line on the $CHROMEBOX
+3) ping $ROUTER (wait until ping fails from workstation)
+
+Takes about ~20-30 seconds after step 2 for the local network to
+stop working.
+
+Fix this issue by enabling pause slot to only send pause frame once
+when FIFO reaches near full state.
+
+Fixes: f1bce4ad2f1c ("r8169: add support for RTL8125")
+Reported-by: Grant Grundler <grundler@chromium.org>
+Tested-by: Grant Grundler <grundler@chromium.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: ChunHao Lin <hau@realtek.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
+Link: https://lore.kernel.org/r/20231129155350.5843-1-hau@realtek.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169_main.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -193,6 +193,7 @@ enum rtl_registers {
+                                       /* No threshold before first PCI xfer */
+ #define       RX_FIFO_THRESH                  (7 << RXCFG_FIFO_SHIFT)
+ #define       RX_EARLY_OFF                    (1 << 11)
++#define       RX_PAUSE_SLOT_ON                (1 << 11)       /* 8125b and later */
+ #define       RXCFG_DMA_SHIFT                 8
+                                       /* Unlimited maximum PCI burst. */
+ #define       RX_DMA_BURST                    (7 << RXCFG_DMA_SHIFT)
+@@ -2237,9 +2238,13 @@ static void rtl_init_rxcfg(struct rtl816
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53:
+               RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
+               break;
+-      case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63:
++      case RTL_GIGA_MAC_VER_61:
+               RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST);
+               break;
++      case RTL_GIGA_MAC_VER_63:
++              RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST |
++                      RX_PAUSE_SLOT_ON);
++              break;
+       default:
+               RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
+               break;
diff --git a/queue-6.1/regmap-fix-bogus-error-on-regcache_sync-success.patch b/queue-6.1/regmap-fix-bogus-error-on-regcache_sync-success.patch
new file mode 100644 (file)
index 0000000..43e7ef8
--- /dev/null
@@ -0,0 +1,45 @@
+From fea88064445a59584460f7f67d102b6e5fc1ca1d Mon Sep 17 00:00:00 2001
+From: Matthias Reichl <hias@horus.com>
+Date: Sun, 3 Dec 2023 23:22:16 +0100
+Subject: regmap: fix bogus error on regcache_sync success
+
+From: Matthias Reichl <hias@horus.com>
+
+commit fea88064445a59584460f7f67d102b6e5fc1ca1d upstream.
+
+Since commit 0ec7731655de ("regmap: Ensure range selector registers
+are updated after cache sync") opening pcm512x based soundcards fail
+with EINVAL and dmesg shows sync cache and pm_runtime_get errors:
+
+[  228.794676] pcm512x 1-004c: Failed to sync cache: -22
+[  228.794740] pcm512x 1-004c: ASoC: error at snd_soc_pcm_component_pm_runtime_get on pcm512x.1-004c: -22
+
+This is caused by the cache check result leaking out into the
+regcache_sync return value.
+
+Fix this by making the check local-only, as the comment above the
+regcache_read call states a non-zero return value means there's
+nothing to do so the return value should not be altered.
+
+Fixes: 0ec7731655de ("regmap: Ensure range selector registers are updated after cache sync")
+Cc: stable@vger.kernel.org
+Signed-off-by: Matthias Reichl <hias@horus.com>
+Link: https://lore.kernel.org/r/20231203222216.96547-1-hias@horus.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/regmap/regcache.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/base/regmap/regcache.c
++++ b/drivers/base/regmap/regcache.c
+@@ -410,8 +410,7 @@ out:
+                       rb_entry(node, struct regmap_range_node, node);
+               /* If there's nothing in the cache there's nothing to sync */
+-              ret = regcache_read(map, this->selector_reg, &i);
+-              if (ret != 0)
++              if (regcache_read(map, this->selector_reg, &i) != 0)
+                       continue;
+               ret = _regmap_write(map, this->selector_reg, i);
diff --git a/queue-6.1/ring-buffer-test-last-update-in-32bit-version-of-__rb_time_read.patch b/queue-6.1/ring-buffer-test-last-update-in-32bit-version-of-__rb_time_read.patch
new file mode 100644 (file)
index 0000000..6fb64e4
--- /dev/null
@@ -0,0 +1,50 @@
+From f458a1453424e03462b5bb539673c9a3cddda480 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Wed, 6 Dec 2023 10:00:50 -0500
+Subject: ring-buffer: Test last update in 32bit version of __rb_time_read()
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit f458a1453424e03462b5bb539673c9a3cddda480 upstream.
+
+Since 64 bit cmpxchg() is very expensive on 32bit architectures, the
+timestamp used by the ring buffer does some interesting tricks to be able
+to still have an atomic 64 bit number. It originally just used 60 bits and
+broke it up into two 32 bit words where the extra 2 bits were used for
+synchronization. But this was not enough for all use cases, and all 64
+bits were required.
+
+The 32bit version of the ring buffer timestamp was then broken up into 3
+32bit words using the same counter trick. But one update was not done. The
+check to see if the read operation was done without interruption only
+checked the first two words and not last one (like it had before this
+update). Fix it by making sure all three updates happen without
+interruption by comparing the initial counter with the last updated
+counter.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231206100050.3100b7bb@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Fixes: f03f2abce4f39 ("ring-buffer: Have 32 bit time stamps use all 64 bits")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/ring_buffer.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -646,8 +646,8 @@ static inline bool __rb_time_read(rb_tim
+       *cnt = rb_time_cnt(top);
+-      /* If top and bottom counts don't match, this interrupted a write */
+-      if (*cnt != rb_time_cnt(bottom))
++      /* If top and msb counts don't match, this interrupted a write */
++      if (*cnt != rb_time_cnt(msb))
+               return false;
+       /* The shift to msb will lose its cnt bits */
index 928754c7b096ef293a33b6ef3816a739b44e9b59..cf26fc5403a2771848afe60920c9553a9127d5c2 100644 (file)
@@ -108,3 +108,18 @@ alsa-pcm-fix-out-of-bounds-in-snd_pcm_state_names.patch
 alsa-hda-realtek-enable-headset-on-lenovo-m90-gen5.patch
 alsa-hda-realtek-add-new-framework-laptop-to-quirks.patch
 alsa-hda-realtek-add-framework-laptop-16-to-quirks.patch
+ring-buffer-test-last-update-in-32bit-version-of-__rb_time_read.patch
+nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch
+nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch
+cgroup_freezer-cgroup_freezing-check-if-not-frozen.patch
+checkstack-fix-printed-address.patch
+tracing-always-update-snapshot-buffer-size.patch
+tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch
+tracing-fix-incomplete-locking-when-disabling-buffered-events.patch
+tracing-fix-a-possible-race-when-disabling-buffered-events.patch
+packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch
+r8169-fix-rtl8125b-pause-frames-blasting-when-suspended.patch
+regmap-fix-bogus-error-on-regcache_sync-success.patch
+platform-surface-aggregator-fix-recv_buf-return-value.patch
+hugetlb-fix-null-ptr-deref-in-hugetlb_vma_lock_write.patch
+mm-fix-oops-when-filemap_map_pmd-without-prealloc_pte.patch
diff --git a/queue-6.1/tracing-always-update-snapshot-buffer-size.patch b/queue-6.1/tracing-always-update-snapshot-buffer-size.patch
new file mode 100644 (file)
index 0000000..c97c4c5
--- /dev/null
@@ -0,0 +1,83 @@
+From 7be76461f302ec05cbd62b90b2a05c64299ca01f Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Tue, 5 Dec 2023 16:52:09 -0500
+Subject: tracing: Always update snapshot buffer size
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 7be76461f302ec05cbd62b90b2a05c64299ca01f upstream.
+
+It use to be that only the top level instance had a snapshot buffer (for
+latency tracers like wakeup and irqsoff). The update of the ring buffer
+size would check if the instance was the top level and if so, it would
+also update the snapshot buffer as it needs to be the same as the main
+buffer.
+
+Now that lower level instances also has a snapshot buffer, they too need
+to update their snapshot buffer sizes when the main buffer is changed,
+otherwise the following can be triggered:
+
+ # cd /sys/kernel/tracing
+ # echo 1500 > buffer_size_kb
+ # mkdir instances/foo
+ # echo irqsoff > instances/foo/current_tracer
+ # echo 1000 > instances/foo/buffer_size_kb
+
+Produces:
+
+ WARNING: CPU: 2 PID: 856 at kernel/trace/trace.c:1938 update_max_tr_single.part.0+0x27d/0x320
+
+Which is:
+
+       ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
+
+       if (ret == -EBUSY) {
+               [..]
+       }
+
+       WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);  <== here
+
+That's because ring_buffer_swap_cpu() has:
+
+       int ret = -EINVAL;
+
+       [..]
+
+       /* At least make sure the two buffers are somewhat the same */
+       if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
+               goto out;
+
+       [..]
+ out:
+       return ret;
+ }
+
+Instead, update all instances' snapshot buffer sizes when their main
+buffer size is updated.
+
+Link: https://lkml.kernel.org/r/20231205220010.454662151@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -6306,8 +6306,7 @@ static int __tracing_resize_ring_buffer(
+               return ret;
+ #ifdef CONFIG_TRACER_MAX_TRACE
+-      if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
+-          !tr->current_trace->use_max_tr)
++      if (!tr->current_trace->use_max_tr)
+               goto out;
+       ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
diff --git a/queue-6.1/tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch b/queue-6.1/tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch
new file mode 100644 (file)
index 0000000..31e1582
--- /dev/null
@@ -0,0 +1,203 @@
+From b538bf7d0ec11ca49f536dfda742a5f6db90a798 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Tue, 5 Dec 2023 16:52:11 -0500
+Subject: tracing: Disable snapshot buffer when stopping instance tracers
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit b538bf7d0ec11ca49f536dfda742a5f6db90a798 upstream.
+
+It use to be that only the top level instance had a snapshot buffer (for
+latency tracers like wakeup and irqsoff). When stopping a tracer in an
+instance would not disable the snapshot buffer. This could have some
+unintended consequences if the irqsoff tracer is enabled.
+
+Consolidate the tracing_start/stop() with tracing_start/stop_tr() so that
+all instances behave the same. The tracing_start/stop() functions will
+just call their respective tracing_start/stop_tr() with the global_array
+passed in.
+
+Link: https://lkml.kernel.org/r/20231205220011.041220035@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |  110 +++++++++++++++------------------------------------
+ 1 file changed, 34 insertions(+), 76 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2297,13 +2297,7 @@ int is_tracing_stopped(void)
+       return global_trace.stop_count;
+ }
+-/**
+- * tracing_start - quick start of the tracer
+- *
+- * If tracing is enabled but was stopped by tracing_stop,
+- * this will start the tracer back up.
+- */
+-void tracing_start(void)
++static void tracing_start_tr(struct trace_array *tr)
+ {
+       struct trace_buffer *buffer;
+       unsigned long flags;
+@@ -2311,119 +2305,83 @@ void tracing_start(void)
+       if (tracing_disabled)
+               return;
+-      raw_spin_lock_irqsave(&global_trace.start_lock, flags);
+-      if (--global_trace.stop_count) {
+-              if (global_trace.stop_count < 0) {
++      raw_spin_lock_irqsave(&tr->start_lock, flags);
++      if (--tr->stop_count) {
++              if (WARN_ON_ONCE(tr->stop_count < 0)) {
+                       /* Someone screwed up their debugging */
+-                      WARN_ON_ONCE(1);
+-                      global_trace.stop_count = 0;
++                      tr->stop_count = 0;
+               }
+               goto out;
+       }
+       /* Prevent the buffers from switching */
+-      arch_spin_lock(&global_trace.max_lock);
++      arch_spin_lock(&tr->max_lock);
+-      buffer = global_trace.array_buffer.buffer;
++      buffer = tr->array_buffer.buffer;
+       if (buffer)
+               ring_buffer_record_enable(buffer);
+ #ifdef CONFIG_TRACER_MAX_TRACE
+-      buffer = global_trace.max_buffer.buffer;
++      buffer = tr->max_buffer.buffer;
+       if (buffer)
+               ring_buffer_record_enable(buffer);
+ #endif
+-      arch_spin_unlock(&global_trace.max_lock);
+-
+- out:
+-      raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
+-}
+-
+-static void tracing_start_tr(struct trace_array *tr)
+-{
+-      struct trace_buffer *buffer;
+-      unsigned long flags;
+-
+-      if (tracing_disabled)
+-              return;
+-
+-      /* If global, we need to also start the max tracer */
+-      if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+-              return tracing_start();
+-
+-      raw_spin_lock_irqsave(&tr->start_lock, flags);
+-
+-      if (--tr->stop_count) {
+-              if (tr->stop_count < 0) {
+-                      /* Someone screwed up their debugging */
+-                      WARN_ON_ONCE(1);
+-                      tr->stop_count = 0;
+-              }
+-              goto out;
+-      }
+-
+-      buffer = tr->array_buffer.buffer;
+-      if (buffer)
+-              ring_buffer_record_enable(buffer);
++      arch_spin_unlock(&tr->max_lock);
+  out:
+       raw_spin_unlock_irqrestore(&tr->start_lock, flags);
+ }
+ /**
+- * tracing_stop - quick stop of the tracer
++ * tracing_start - quick start of the tracer
+  *
+- * Light weight way to stop tracing. Use in conjunction with
+- * tracing_start.
++ * If tracing is enabled but was stopped by tracing_stop,
++ * this will start the tracer back up.
+  */
+-void tracing_stop(void)
++void tracing_start(void)
++
++{
++      return tracing_start_tr(&global_trace);
++}
++
++static void tracing_stop_tr(struct trace_array *tr)
+ {
+       struct trace_buffer *buffer;
+       unsigned long flags;
+-      raw_spin_lock_irqsave(&global_trace.start_lock, flags);
+-      if (global_trace.stop_count++)
++      raw_spin_lock_irqsave(&tr->start_lock, flags);
++      if (tr->stop_count++)
+               goto out;
+       /* Prevent the buffers from switching */
+-      arch_spin_lock(&global_trace.max_lock);
++      arch_spin_lock(&tr->max_lock);
+-      buffer = global_trace.array_buffer.buffer;
++      buffer = tr->array_buffer.buffer;
+       if (buffer)
+               ring_buffer_record_disable(buffer);
+ #ifdef CONFIG_TRACER_MAX_TRACE
+-      buffer = global_trace.max_buffer.buffer;
++      buffer = tr->max_buffer.buffer;
+       if (buffer)
+               ring_buffer_record_disable(buffer);
+ #endif
+-      arch_spin_unlock(&global_trace.max_lock);
++      arch_spin_unlock(&tr->max_lock);
+  out:
+-      raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
++      raw_spin_unlock_irqrestore(&tr->start_lock, flags);
+ }
+-static void tracing_stop_tr(struct trace_array *tr)
++/**
++ * tracing_stop - quick stop of the tracer
++ *
++ * Light weight way to stop tracing. Use in conjunction with
++ * tracing_start.
++ */
++void tracing_stop(void)
+ {
+-      struct trace_buffer *buffer;
+-      unsigned long flags;
+-
+-      /* If global, we need to also stop the max tracer */
+-      if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+-              return tracing_stop();
+-
+-      raw_spin_lock_irqsave(&tr->start_lock, flags);
+-      if (tr->stop_count++)
+-              goto out;
+-
+-      buffer = tr->array_buffer.buffer;
+-      if (buffer)
+-              ring_buffer_record_disable(buffer);
+-
+- out:
+-      raw_spin_unlock_irqrestore(&tr->start_lock, flags);
++      return tracing_stop_tr(&global_trace);
+ }
+ static int trace_save_cmdline(struct task_struct *tsk)
diff --git a/queue-6.1/tracing-fix-a-possible-race-when-disabling-buffered-events.patch b/queue-6.1/tracing-fix-a-possible-race-when-disabling-buffered-events.patch
new file mode 100644 (file)
index 0000000..bca3075
--- /dev/null
@@ -0,0 +1,82 @@
+From c0591b1cccf708a47bc465c62436d669a4213323 Mon Sep 17 00:00:00 2001
+From: Petr Pavlu <petr.pavlu@suse.com>
+Date: Tue, 5 Dec 2023 17:17:36 +0100
+Subject: tracing: Fix a possible race when disabling buffered events
+
+From: Petr Pavlu <petr.pavlu@suse.com>
+
+commit c0591b1cccf708a47bc465c62436d669a4213323 upstream.
+
+Function trace_buffered_event_disable() is responsible for freeing pages
+backing buffered events and this process can run concurrently with
+trace_event_buffer_lock_reserve().
+
+The following race is currently possible:
+
+* Function trace_buffered_event_disable() is called on CPU 0. It
+  increments trace_buffered_event_cnt on each CPU and waits via
+  synchronize_rcu() for each user of trace_buffered_event to complete.
+
+* After synchronize_rcu() is finished, function
+  trace_buffered_event_disable() has the exclusive access to
+  trace_buffered_event. All counters trace_buffered_event_cnt are at 1
+  and all pointers trace_buffered_event are still valid.
+
+* At this point, on a different CPU 1, the execution reaches
+  trace_event_buffer_lock_reserve(). The function calls
+  preempt_disable_notrace() and only now enters an RCU read-side
+  critical section. The function proceeds and reads a still valid
+  pointer from trace_buffered_event[CPU1] into the local variable
+  "entry". However, it doesn't yet read trace_buffered_event_cnt[CPU1]
+  which happens later.
+
+* Function trace_buffered_event_disable() continues. It frees
+  trace_buffered_event[CPU1] and decrements
+  trace_buffered_event_cnt[CPU1] back to 0.
+
+* Function trace_event_buffer_lock_reserve() continues. It reads and
+  increments trace_buffered_event_cnt[CPU1] from 0 to 1. This makes it
+  believe that it can use the "entry" that it already obtained but the
+  pointer is now invalid and any access results in a use-after-free.
+
+Fix the problem by making a second synchronize_rcu() call after all
+trace_buffered_event values are set to NULL. This waits on all potential
+users in trace_event_buffer_lock_reserve() that still read a previous
+pointer from trace_buffered_event.
+
+Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/
+Link: https://lkml.kernel.org/r/20231205161736.19663-4-petr.pavlu@suse.com
+
+Cc: stable@vger.kernel.org
+Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events")
+Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2728,13 +2728,17 @@ void trace_buffered_event_disable(void)
+               free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
+               per_cpu(trace_buffered_event, cpu) = NULL;
+       }
++
+       /*
+-       * Make sure trace_buffered_event is NULL before clearing
+-       * trace_buffered_event_cnt.
++       * Wait for all CPUs that potentially started checking if they can use
++       * their event buffer only after the previous synchronize_rcu() call and
++       * they still read a valid pointer from trace_buffered_event. It must be
++       * ensured they don't see cleared trace_buffered_event_cnt else they
++       * could wrongly decide to use the pointed-to buffer which is now freed.
+        */
+-      smp_wmb();
++      synchronize_rcu();
+-      /* Do the work on each cpu */
++      /* For each CPU, relinquish the buffer */
+       on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
+                        true);
+ }
diff --git a/queue-6.1/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch b/queue-6.1/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch
new file mode 100644 (file)
index 0000000..bd206f5
--- /dev/null
@@ -0,0 +1,153 @@
+From 7fed14f7ac9cf5e38c693836fe4a874720141845 Mon Sep 17 00:00:00 2001
+From: Petr Pavlu <petr.pavlu@suse.com>
+Date: Tue, 5 Dec 2023 17:17:34 +0100
+Subject: tracing: Fix incomplete locking when disabling buffered events
+
+From: Petr Pavlu <petr.pavlu@suse.com>
+
+commit 7fed14f7ac9cf5e38c693836fe4a874720141845 upstream.
+
+The following warning appears when using buffered events:
+
+[  203.556451] WARNING: CPU: 53 PID: 10220 at kernel/trace/ring_buffer.c:3912 ring_buffer_discard_commit+0x2eb/0x420
+[...]
+[  203.670690] CPU: 53 PID: 10220 Comm: stress-ng-sysin Tainted: G            E      6.7.0-rc2-default #4 56e6d0fcf5581e6e51eaaecbdaec2a2338c80f3a
+[  203.670704] Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017
+[  203.670709] RIP: 0010:ring_buffer_discard_commit+0x2eb/0x420
+[  203.735721] Code: 4c 8b 4a 50 48 8b 42 48 49 39 c1 0f 84 b3 00 00 00 49 83 e8 01 75 b1 48 8b 42 10 f0 ff 40 08 0f 0b e9 fc fe ff ff f0 ff 47 08 <0f> 0b e9 77 fd ff ff 48 8b 42 10 f0 ff 40 08 0f 0b e9 f5 fe ff ff
+[  203.735734] RSP: 0018:ffffb4ae4f7b7d80 EFLAGS: 00010202
+[  203.735745] RAX: 0000000000000000 RBX: ffffb4ae4f7b7de0 RCX: ffff8ac10662c000
+[  203.735754] RDX: ffff8ac0c750be00 RSI: ffff8ac10662c000 RDI: ffff8ac0c004d400
+[  203.781832] RBP: ffff8ac0c039cea0 R08: 0000000000000000 R09: 0000000000000000
+[  203.781839] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+[  203.781842] R13: ffff8ac10662c000 R14: ffff8ac0c004d400 R15: ffff8ac10662c008
+[  203.781846] FS:  00007f4cd8a67740(0000) GS:ffff8ad798880000(0000) knlGS:0000000000000000
+[  203.781851] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  203.781855] CR2: 0000559766a74028 CR3: 00000001804c4000 CR4: 00000000001506f0
+[  203.781862] Call Trace:
+[  203.781870]  <TASK>
+[  203.851949]  trace_event_buffer_commit+0x1ea/0x250
+[  203.851967]  trace_event_raw_event_sys_enter+0x83/0xe0
+[  203.851983]  syscall_trace_enter.isra.0+0x182/0x1a0
+[  203.851990]  do_syscall_64+0x3a/0xe0
+[  203.852075]  entry_SYSCALL_64_after_hwframe+0x6e/0x76
+[  203.852090] RIP: 0033:0x7f4cd870fa77
+[  203.982920] Code: 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 b8 89 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e9 43 0e 00 f7 d8 64 89 01 48
+[  203.982932] RSP: 002b:00007fff99717dd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000089
+[  203.982942] RAX: ffffffffffffffda RBX: 0000558ea1d7b6f0 RCX: 00007f4cd870fa77
+[  203.982948] RDX: 0000000000000000 RSI: 00007fff99717de0 RDI: 0000558ea1d7b6f0
+[  203.982957] RBP: 00007fff99717de0 R08: 00007fff997180e0 R09: 00007fff997180e0
+[  203.982962] R10: 00007fff997180e0 R11: 0000000000000246 R12: 00007fff99717f40
+[  204.049239] R13: 00007fff99718590 R14: 0000558e9f2127a8 R15: 00007fff997180b0
+[  204.049256]  </TASK>
+
+For instance, it can be triggered by running these two commands in
+parallel:
+
+ $ while true; do
+    echo hist:key=id.syscall:val=hitcount > \
+      /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger;
+  done
+ $ stress-ng --sysinfo $(nproc)
+
+The warning indicates that the current ring_buffer_per_cpu is not in the
+committing state. It happens because the active ring_buffer_event
+doesn't actually come from the ring_buffer_per_cpu but is allocated from
+trace_buffered_event.
+
+The bug is in function trace_buffered_event_disable() where the
+following normally happens:
+
+* The code invokes disable_trace_buffered_event() via
+  smp_call_function_many() and follows it by synchronize_rcu(). This
+  increments the per-CPU variable trace_buffered_event_cnt on each
+  target CPU and grants trace_buffered_event_disable() the exclusive
+  access to the per-CPU variable trace_buffered_event.
+
+* Maintenance is performed on trace_buffered_event, all per-CPU event
+  buffers get freed.
+
+* The code invokes enable_trace_buffered_event() via
+  smp_call_function_many(). This decrements trace_buffered_event_cnt and
+  releases the access to trace_buffered_event.
+
+A problem is that smp_call_function_many() runs a given function on all
+target CPUs except on the current one. The following can then occur:
+
+* Task X executing trace_buffered_event_disable() runs on CPU 0.
+
+* The control reaches synchronize_rcu() and the task gets rescheduled on
+  another CPU 1.
+
+* The RCU synchronization finishes. At this point,
+  trace_buffered_event_disable() has the exclusive access to all
+  trace_buffered_event variables except trace_buffered_event[CPU0]
+  because trace_buffered_event_cnt[CPU0] is never incremented and if the
+  buffer is currently unused, remains set to 0.
+
+* A different task Y is scheduled on CPU 0 and hits a trace event. The
+  code in trace_event_buffer_lock_reserve() sees that
+  trace_buffered_event_cnt[CPU0] is set to 0 and decides the use the
+  buffer provided by trace_buffered_event[CPU0].
+
+* Task X continues its execution in trace_buffered_event_disable(). The
+  code incorrectly frees the event buffer pointed by
+  trace_buffered_event[CPU0] and resets the variable to NULL.
+
+* Task Y writes event data to the now freed buffer and later detects the
+  created inconsistency.
+
+The issue is observable since commit dea499781a11 ("tracing: Fix warning
+in trace_buffered_event_disable()") which moved the call of
+trace_buffered_event_disable() in __ftrace_event_enable_disable()
+earlier, prior to invoking call->class->reg(.. TRACE_REG_UNREGISTER ..).
+The underlying problem in trace_buffered_event_disable() is however
+present since the original implementation in commit 0fc1b09ff1ff
+("tracing: Use temp buffer when filtering events").
+
+Fix the problem by replacing the two smp_call_function_many() calls with
+on_each_cpu_mask() which invokes a given callback on all CPUs.
+
+Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/
+Link: https://lkml.kernel.org/r/20231205161736.19663-2-petr.pavlu@suse.com
+
+Cc: stable@vger.kernel.org
+Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events")
+Fixes: dea499781a11 ("tracing: Fix warning in trace_buffered_event_disable()")
+Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |   12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2717,11 +2717,9 @@ void trace_buffered_event_disable(void)
+       if (--trace_buffered_event_ref)
+               return;
+-      preempt_disable();
+       /* For each CPU, set the buffer as used. */
+-      smp_call_function_many(tracing_buffer_mask,
+-                             disable_trace_buffered_event, NULL, 1);
+-      preempt_enable();
++      on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
++                       NULL, true);
+       /* Wait for all current users to finish */
+       synchronize_rcu();
+@@ -2736,11 +2734,9 @@ void trace_buffered_event_disable(void)
+        */
+       smp_wmb();
+-      preempt_disable();
+       /* Do the work on each cpu */
+-      smp_call_function_many(tracing_buffer_mask,
+-                             enable_trace_buffered_event, NULL, 1);
+-      preempt_enable();
++      on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
++                       true);
+ }
+ static struct trace_buffer *temp_buffer;