]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 9 Dec 2023 12:32:00 +0000 (13:32 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 9 Dec 2023 12:32:00 +0000 (13:32 +0100)
added patches:
nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch
nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch
tracing-always-update-snapshot-buffer-size.patch
tracing-fix-a-possible-race-when-disabling-buffered-events.patch
tracing-fix-incomplete-locking-when-disabling-buffered-events.patch

queue-4.19/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch [new file with mode: 0644]
queue-4.19/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch [new file with mode: 0644]
queue-4.19/series
queue-4.19/tracing-always-update-snapshot-buffer-size.patch [new file with mode: 0644]
queue-4.19/tracing-fix-a-possible-race-when-disabling-buffered-events.patch [new file with mode: 0644]
queue-4.19/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch [new file with mode: 0644]

diff --git a/queue-4.19/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch b/queue-4.19/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch
new file mode 100644 (file)
index 0000000..487a84d
--- /dev/null
@@ -0,0 +1,79 @@
+From d61d0ab573649789bf9eb909c89a1a193b2e3d10 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Wed, 29 Nov 2023 23:15:47 +0900
+Subject: nilfs2: fix missing error check for sb_set_blocksize call
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit d61d0ab573649789bf9eb909c89a1a193b2e3d10 upstream.
+
+When mounting a filesystem image with a block size larger than the page
+size, nilfs2 repeatedly outputs long error messages with stack traces to
+the kernel log, such as the following:
+
+ getblk(): invalid block size 8192 requested
+ logical block size: 512
+ ...
+ Call Trace:
+  dump_stack_lvl+0x92/0xd4
+  dump_stack+0xd/0x10
+  bdev_getblk+0x33a/0x354
+  __breadahead+0x11/0x80
+  nilfs_search_super_root+0xe2/0x704 [nilfs2]
+  load_nilfs+0x72/0x504 [nilfs2]
+  nilfs_mount+0x30f/0x518 [nilfs2]
+  legacy_get_tree+0x1b/0x40
+  vfs_get_tree+0x18/0xc4
+  path_mount+0x786/0xa88
+  __ia32_sys_mount+0x147/0x1a8
+  __do_fast_syscall_32+0x56/0xc8
+  do_fast_syscall_32+0x29/0x58
+  do_SYSENTER_32+0x15/0x18
+  entry_SYSENTER_32+0x98/0xf1
+ ...
+
+This overloads the system logger.  And to make matters worse, it sometimes
+crashes the kernel with a memory access violation.
+
+This is because the return value of the sb_set_blocksize() call, which
+should be checked for errors, is not checked.
+
+The latter issue is due to out-of-buffer memory being accessed based on a
+large block size that caused sb_set_blocksize() to fail for buffers read
+with the initial minimum block size that remained unupdated in the
+super_block structure.
+
+Since nilfs2 mkfs tool does not accept block sizes larger than the system
+page size, this has been overlooked.  However, it is possible to create
+this situation by intentionally modifying the tool or by passing a
+filesystem image created on a system with a large page size to a system
+with a smaller page size and mounting it.
+
+Fix this issue by inserting the expected error handling for the call to
+sb_set_blocksize().
+
+Link: https://lkml.kernel.org/r/20231129141547.4726-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/the_nilfs.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/nilfs2/the_nilfs.c
++++ b/fs/nilfs2/the_nilfs.c
+@@ -688,7 +688,11 @@ int init_nilfs(struct the_nilfs *nilfs,
+                       goto failed_sbh;
+               }
+               nilfs_release_super_block(nilfs);
+-              sb_set_blocksize(sb, blocksize);
++              if (!sb_set_blocksize(sb, blocksize)) {
++                      nilfs_error(sb, "bad blocksize %d", blocksize);
++                      err = -EINVAL;
++                      goto out;
++              }
+               err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
+               if (err)
diff --git a/queue-4.19/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch b/queue-4.19/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch
new file mode 100644 (file)
index 0000000..f7c57fd
--- /dev/null
@@ -0,0 +1,109 @@
+From 675abf8df1353e0e3bde314993e0796c524cfbf0 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Tue, 5 Dec 2023 17:59:47 +0900
+Subject: nilfs2: prevent WARNING in nilfs_sufile_set_segment_usage()
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 675abf8df1353e0e3bde314993e0796c524cfbf0 upstream.
+
+If nilfs2 reads a disk image with corrupted segment usage metadata, and
+its segment usage information is marked as an error for the segment at the
+write location, nilfs_sufile_set_segment_usage() can trigger WARN_ONs
+during log writing.
+
+Segments newly allocated for writing with nilfs_sufile_alloc() will not
+have this error flag set, but this unexpected situation will occur if the
+segment indexed by either nilfs->ns_segnum or nilfs->ns_nextnum (active
+segment) was marked in error.
+
+Fix this issue by inserting a sanity check to treat it as a file system
+corruption.
+
+Since error returns are not allowed during the execution phase where
+nilfs_sufile_set_segment_usage() is used, this inserts the sanity check
+into nilfs_sufile_mark_dirty() which pre-reads the buffer containing the
+segment usage record to be updated and sets it up in a dirty state for
+writing.
+
+In addition, nilfs_sufile_set_segment_usage() is also called when
+canceling log writing and undoing segment usage update, so in order to
+avoid issuing the same kernel warning in that case, in case of
+cancellation, avoid checking the error flag in
+nilfs_sufile_set_segment_usage().
+
+Link: https://lkml.kernel.org/r/20231205085947.4431-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+14e9f834f6ddecece094@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=14e9f834f6ddecece094
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/sufile.c |   42 +++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 35 insertions(+), 7 deletions(-)
+
+--- a/fs/nilfs2/sufile.c
++++ b/fs/nilfs2/sufile.c
+@@ -504,15 +504,38 @@ int nilfs_sufile_mark_dirty(struct inode
+       down_write(&NILFS_MDT(sufile)->mi_sem);
+       ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
+-      if (!ret) {
+-              mark_buffer_dirty(bh);
+-              nilfs_mdt_mark_dirty(sufile);
+-              kaddr = kmap_atomic(bh->b_page);
+-              su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
++      if (ret)
++              goto out_sem;
++
++      kaddr = kmap_atomic(bh->b_page);
++      su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
++      if (unlikely(nilfs_segment_usage_error(su))) {
++              struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
++
++              kunmap_atomic(kaddr);
++              brelse(bh);
++              if (nilfs_segment_is_active(nilfs, segnum)) {
++                      nilfs_error(sufile->i_sb,
++                                  "active segment %llu is erroneous",
++                                  (unsigned long long)segnum);
++              } else {
++                      /*
++                       * Segments marked erroneous are never allocated by
++                       * nilfs_sufile_alloc(); only active segments, ie,
++                       * the segments indexed by ns_segnum or ns_nextnum,
++                       * can be erroneous here.
++                       */
++                      WARN_ON_ONCE(1);
++              }
++              ret = -EIO;
++      } else {
+               nilfs_segment_usage_set_dirty(su);
+               kunmap_atomic(kaddr);
++              mark_buffer_dirty(bh);
++              nilfs_mdt_mark_dirty(sufile);
+               brelse(bh);
+       }
++out_sem:
+       up_write(&NILFS_MDT(sufile)->mi_sem);
+       return ret;
+ }
+@@ -539,9 +562,14 @@ int nilfs_sufile_set_segment_usage(struc
+       kaddr = kmap_atomic(bh->b_page);
+       su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+-      WARN_ON(nilfs_segment_usage_error(su));
+-      if (modtime)
++      if (modtime) {
++              /*
++               * Check segusage error and set su_lastmod only when updating
++               * this entry with a valid timestamp, not for cancellation.
++               */
++              WARN_ON_ONCE(nilfs_segment_usage_error(su));
+               su->su_lastmod = cpu_to_le64(modtime);
++      }
+       su->su_nblocks = cpu_to_le32(nblocks);
+       kunmap_atomic(kaddr);
index 82d6eb3aed8a5b9d574fabe49ebbf416ab28a5a6..e6aee395d4bc02dc6d24a78e0a20f06bd634fd83 100644 (file)
@@ -29,3 +29,8 @@ arm-imx-check-return-value-of-devm_kasprintf-in-imx_.patch
 arm-dts-imx-make-gpt-node-name-generic.patch
 arm-dts-imx7-declare-timers-compatible-with-fsl-imx6.patch
 alsa-pcm-fix-out-of-bounds-in-snd_pcm_state_names.patch
+nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch
+nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch
+tracing-always-update-snapshot-buffer-size.patch
+tracing-fix-incomplete-locking-when-disabling-buffered-events.patch
+tracing-fix-a-possible-race-when-disabling-buffered-events.patch
diff --git a/queue-4.19/tracing-always-update-snapshot-buffer-size.patch b/queue-4.19/tracing-always-update-snapshot-buffer-size.patch
new file mode 100644 (file)
index 0000000..de78f0d
--- /dev/null
@@ -0,0 +1,83 @@
+From 7be76461f302ec05cbd62b90b2a05c64299ca01f Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Tue, 5 Dec 2023 16:52:09 -0500
+Subject: tracing: Always update snapshot buffer size
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 7be76461f302ec05cbd62b90b2a05c64299ca01f upstream.
+
+It use to be that only the top level instance had a snapshot buffer (for
+latency tracers like wakeup and irqsoff). The update of the ring buffer
+size would check if the instance was the top level and if so, it would
+also update the snapshot buffer as it needs to be the same as the main
+buffer.
+
+Now that lower level instances also has a snapshot buffer, they too need
+to update their snapshot buffer sizes when the main buffer is changed,
+otherwise the following can be triggered:
+
+ # cd /sys/kernel/tracing
+ # echo 1500 > buffer_size_kb
+ # mkdir instances/foo
+ # echo irqsoff > instances/foo/current_tracer
+ # echo 1000 > instances/foo/buffer_size_kb
+
+Produces:
+
+ WARNING: CPU: 2 PID: 856 at kernel/trace/trace.c:1938 update_max_tr_single.part.0+0x27d/0x320
+
+Which is:
+
+       ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
+
+       if (ret == -EBUSY) {
+               [..]
+       }
+
+       WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);  <== here
+
+That's because ring_buffer_swap_cpu() has:
+
+       int ret = -EINVAL;
+
+       [..]
+
+       /* At least make sure the two buffers are somewhat the same */
+       if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
+               goto out;
+
+       [..]
+ out:
+       return ret;
+ }
+
+Instead, update all instances' snapshot buffer sizes when their main
+buffer size is updated.
+
+Link: https://lkml.kernel.org/r/20231205220010.454662151@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -5258,8 +5258,7 @@ static int __tracing_resize_ring_buffer(
+               return ret;
+ #ifdef CONFIG_TRACER_MAX_TRACE
+-      if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
+-          !tr->current_trace->use_max_tr)
++      if (!tr->current_trace->use_max_tr)
+               goto out;
+       ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
diff --git a/queue-4.19/tracing-fix-a-possible-race-when-disabling-buffered-events.patch b/queue-4.19/tracing-fix-a-possible-race-when-disabling-buffered-events.patch
new file mode 100644 (file)
index 0000000..72d9305
--- /dev/null
@@ -0,0 +1,82 @@
+From c0591b1cccf708a47bc465c62436d669a4213323 Mon Sep 17 00:00:00 2001
+From: Petr Pavlu <petr.pavlu@suse.com>
+Date: Tue, 5 Dec 2023 17:17:36 +0100
+Subject: tracing: Fix a possible race when disabling buffered events
+
+From: Petr Pavlu <petr.pavlu@suse.com>
+
+commit c0591b1cccf708a47bc465c62436d669a4213323 upstream.
+
+Function trace_buffered_event_disable() is responsible for freeing pages
+backing buffered events and this process can run concurrently with
+trace_event_buffer_lock_reserve().
+
+The following race is currently possible:
+
+* Function trace_buffered_event_disable() is called on CPU 0. It
+  increments trace_buffered_event_cnt on each CPU and waits via
+  synchronize_rcu() for each user of trace_buffered_event to complete.
+
+* After synchronize_rcu() is finished, function
+  trace_buffered_event_disable() has the exclusive access to
+  trace_buffered_event. All counters trace_buffered_event_cnt are at 1
+  and all pointers trace_buffered_event are still valid.
+
+* At this point, on a different CPU 1, the execution reaches
+  trace_event_buffer_lock_reserve(). The function calls
+  preempt_disable_notrace() and only now enters an RCU read-side
+  critical section. The function proceeds and reads a still valid
+  pointer from trace_buffered_event[CPU1] into the local variable
+  "entry". However, it doesn't yet read trace_buffered_event_cnt[CPU1]
+  which happens later.
+
+* Function trace_buffered_event_disable() continues. It frees
+  trace_buffered_event[CPU1] and decrements
+  trace_buffered_event_cnt[CPU1] back to 0.
+
+* Function trace_event_buffer_lock_reserve() continues. It reads and
+  increments trace_buffered_event_cnt[CPU1] from 0 to 1. This makes it
+  believe that it can use the "entry" that it already obtained but the
+  pointer is now invalid and any access results in a use-after-free.
+
+Fix the problem by making a second synchronize_rcu() call after all
+trace_buffered_event values are set to NULL. This waits on all potential
+users in trace_event_buffer_lock_reserve() that still read a previous
+pointer from trace_buffered_event.
+
+Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/
+Link: https://lkml.kernel.org/r/20231205161736.19663-4-petr.pavlu@suse.com
+
+Cc: stable@vger.kernel.org
+Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events")
+Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2272,13 +2272,17 @@ void trace_buffered_event_disable(void)
+               free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
+               per_cpu(trace_buffered_event, cpu) = NULL;
+       }
++
+       /*
+-       * Make sure trace_buffered_event is NULL before clearing
+-       * trace_buffered_event_cnt.
++       * Wait for all CPUs that potentially started checking if they can use
++       * their event buffer only after the previous synchronize_rcu() call and
++       * they still read a valid pointer from trace_buffered_event. It must be
++       * ensured they don't see cleared trace_buffered_event_cnt else they
++       * could wrongly decide to use the pointed-to buffer which is now freed.
+        */
+-      smp_wmb();
++      synchronize_rcu();
+-      /* Do the work on each cpu */
++      /* For each CPU, relinquish the buffer */
+       on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
+                        true);
+ }
diff --git a/queue-4.19/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch b/queue-4.19/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch
new file mode 100644 (file)
index 0000000..07d1882
--- /dev/null
@@ -0,0 +1,153 @@
+From 7fed14f7ac9cf5e38c693836fe4a874720141845 Mon Sep 17 00:00:00 2001
+From: Petr Pavlu <petr.pavlu@suse.com>
+Date: Tue, 5 Dec 2023 17:17:34 +0100
+Subject: tracing: Fix incomplete locking when disabling buffered events
+
+From: Petr Pavlu <petr.pavlu@suse.com>
+
+commit 7fed14f7ac9cf5e38c693836fe4a874720141845 upstream.
+
+The following warning appears when using buffered events:
+
+[  203.556451] WARNING: CPU: 53 PID: 10220 at kernel/trace/ring_buffer.c:3912 ring_buffer_discard_commit+0x2eb/0x420
+[...]
+[  203.670690] CPU: 53 PID: 10220 Comm: stress-ng-sysin Tainted: G            E      6.7.0-rc2-default #4 56e6d0fcf5581e6e51eaaecbdaec2a2338c80f3a
+[  203.670704] Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017
+[  203.670709] RIP: 0010:ring_buffer_discard_commit+0x2eb/0x420
+[  203.735721] Code: 4c 8b 4a 50 48 8b 42 48 49 39 c1 0f 84 b3 00 00 00 49 83 e8 01 75 b1 48 8b 42 10 f0 ff 40 08 0f 0b e9 fc fe ff ff f0 ff 47 08 <0f> 0b e9 77 fd ff ff 48 8b 42 10 f0 ff 40 08 0f 0b e9 f5 fe ff ff
+[  203.735734] RSP: 0018:ffffb4ae4f7b7d80 EFLAGS: 00010202
+[  203.735745] RAX: 0000000000000000 RBX: ffffb4ae4f7b7de0 RCX: ffff8ac10662c000
+[  203.735754] RDX: ffff8ac0c750be00 RSI: ffff8ac10662c000 RDI: ffff8ac0c004d400
+[  203.781832] RBP: ffff8ac0c039cea0 R08: 0000000000000000 R09: 0000000000000000
+[  203.781839] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+[  203.781842] R13: ffff8ac10662c000 R14: ffff8ac0c004d400 R15: ffff8ac10662c008
+[  203.781846] FS:  00007f4cd8a67740(0000) GS:ffff8ad798880000(0000) knlGS:0000000000000000
+[  203.781851] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  203.781855] CR2: 0000559766a74028 CR3: 00000001804c4000 CR4: 00000000001506f0
+[  203.781862] Call Trace:
+[  203.781870]  <TASK>
+[  203.851949]  trace_event_buffer_commit+0x1ea/0x250
+[  203.851967]  trace_event_raw_event_sys_enter+0x83/0xe0
+[  203.851983]  syscall_trace_enter.isra.0+0x182/0x1a0
+[  203.851990]  do_syscall_64+0x3a/0xe0
+[  203.852075]  entry_SYSCALL_64_after_hwframe+0x6e/0x76
+[  203.852090] RIP: 0033:0x7f4cd870fa77
+[  203.982920] Code: 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 b8 89 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e9 43 0e 00 f7 d8 64 89 01 48
+[  203.982932] RSP: 002b:00007fff99717dd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000089
+[  203.982942] RAX: ffffffffffffffda RBX: 0000558ea1d7b6f0 RCX: 00007f4cd870fa77
+[  203.982948] RDX: 0000000000000000 RSI: 00007fff99717de0 RDI: 0000558ea1d7b6f0
+[  203.982957] RBP: 00007fff99717de0 R08: 00007fff997180e0 R09: 00007fff997180e0
+[  203.982962] R10: 00007fff997180e0 R11: 0000000000000246 R12: 00007fff99717f40
+[  204.049239] R13: 00007fff99718590 R14: 0000558e9f2127a8 R15: 00007fff997180b0
+[  204.049256]  </TASK>
+
+For instance, it can be triggered by running these two commands in
+parallel:
+
+ $ while true; do
+    echo hist:key=id.syscall:val=hitcount > \
+      /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger;
+  done
+ $ stress-ng --sysinfo $(nproc)
+
+The warning indicates that the current ring_buffer_per_cpu is not in the
+committing state. It happens because the active ring_buffer_event
+doesn't actually come from the ring_buffer_per_cpu but is allocated from
+trace_buffered_event.
+
+The bug is in function trace_buffered_event_disable() where the
+following normally happens:
+
+* The code invokes disable_trace_buffered_event() via
+  smp_call_function_many() and follows it by synchronize_rcu(). This
+  increments the per-CPU variable trace_buffered_event_cnt on each
+  target CPU and grants trace_buffered_event_disable() the exclusive
+  access to the per-CPU variable trace_buffered_event.
+
+* Maintenance is performed on trace_buffered_event, all per-CPU event
+  buffers get freed.
+
+* The code invokes enable_trace_buffered_event() via
+  smp_call_function_many(). This decrements trace_buffered_event_cnt and
+  releases the access to trace_buffered_event.
+
+A problem is that smp_call_function_many() runs a given function on all
+target CPUs except on the current one. The following can then occur:
+
+* Task X executing trace_buffered_event_disable() runs on CPU 0.
+
+* The control reaches synchronize_rcu() and the task gets rescheduled on
+  another CPU 1.
+
+* The RCU synchronization finishes. At this point,
+  trace_buffered_event_disable() has the exclusive access to all
+  trace_buffered_event variables except trace_buffered_event[CPU0]
+  because trace_buffered_event_cnt[CPU0] is never incremented and if the
+  buffer is currently unused, remains set to 0.
+
+* A different task Y is scheduled on CPU 0 and hits a trace event. The
+  code in trace_event_buffer_lock_reserve() sees that
+  trace_buffered_event_cnt[CPU0] is set to 0 and decides the use the
+  buffer provided by trace_buffered_event[CPU0].
+
+* Task X continues its execution in trace_buffered_event_disable(). The
+  code incorrectly frees the event buffer pointed by
+  trace_buffered_event[CPU0] and resets the variable to NULL.
+
+* Task Y writes event data to the now freed buffer and later detects the
+  created inconsistency.
+
+The issue is observable since commit dea499781a11 ("tracing: Fix warning
+in trace_buffered_event_disable()") which moved the call of
+trace_buffered_event_disable() in __ftrace_event_enable_disable()
+earlier, prior to invoking call->class->reg(.. TRACE_REG_UNREGISTER ..).
+The underlying problem in trace_buffered_event_disable() is however
+present since the original implementation in commit 0fc1b09ff1ff
+("tracing: Use temp buffer when filtering events").
+
+Fix the problem by replacing the two smp_call_function_many() calls with
+on_each_cpu_mask() which invokes a given callback on all CPUs.
+
+Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/
+Link: https://lkml.kernel.org/r/20231205161736.19663-2-petr.pavlu@suse.com
+
+Cc: stable@vger.kernel.org
+Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events")
+Fixes: dea499781a11 ("tracing: Fix warning in trace_buffered_event_disable()")
+Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |   12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2261,11 +2261,9 @@ void trace_buffered_event_disable(void)
+       if (--trace_buffered_event_ref)
+               return;
+-      preempt_disable();
+       /* For each CPU, set the buffer as used. */
+-      smp_call_function_many(tracing_buffer_mask,
+-                             disable_trace_buffered_event, NULL, 1);
+-      preempt_enable();
++      on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
++                       NULL, true);
+       /* Wait for all current users to finish */
+       synchronize_sched();
+@@ -2280,11 +2278,9 @@ void trace_buffered_event_disable(void)
+        */
+       smp_wmb();
+-      preempt_disable();
+       /* Do the work on each cpu */
+-      smp_call_function_many(tracing_buffer_mask,
+-                             enable_trace_buffered_event, NULL, 1);
+-      preempt_enable();
++      on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
++                       true);
+ }
+ static struct ring_buffer *temp_buffer;