From b2fb6d5fabc0e0696c4ca5241384e0fb230ff896 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 9 Dec 2023 13:38:32 +0100 Subject: [PATCH] 5.15-stable patches added patches: checkstack-fix-printed-address.patch nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch platform-surface-aggregator-fix-recv_buf-return-value.patch regmap-fix-bogus-error-on-regcache_sync-success.patch tracing-always-update-snapshot-buffer-size.patch tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch tracing-fix-a-possible-race-when-disabling-buffered-events.patch tracing-fix-incomplete-locking-when-disabling-buffered-events.patch --- .../checkstack-fix-printed-address.patch | 60 ++++++ ...rror-check-for-sb_set_blocksize-call.patch | 79 +++++++ ...ng-in-nilfs_sufile_set_segment_usage.patch | 109 ++++++++++ ...ount-in-packet_sock-to-atomic_long_t.patch | 109 ++++++++++ ...aggregator-fix-recv_buf-return-value.patch | 49 +++++ ...bogus-error-on-regcache_sync-success.patch | 45 ++++ queue-5.15/series | 10 + ...g-always-update-snapshot-buffer-size.patch | 83 +++++++ ...uffer-when-stopping-instance-tracers.patch | 203 ++++++++++++++++++ ...-race-when-disabling-buffered-events.patch | 82 +++++++ ...cking-when-disabling-buffered-events.patch | 153 +++++++++++++ 11 files changed, 982 insertions(+) create mode 100644 queue-5.15/checkstack-fix-printed-address.patch create mode 100644 queue-5.15/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch create mode 100644 queue-5.15/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch create mode 100644 queue-5.15/packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch create mode 100644 queue-5.15/platform-surface-aggregator-fix-recv_buf-return-value.patch create mode 100644 queue-5.15/regmap-fix-bogus-error-on-regcache_sync-success.patch create mode 100644 queue-5.15/tracing-always-update-snapshot-buffer-size.patch create mode 100644 queue-5.15/tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch create mode 100644 queue-5.15/tracing-fix-a-possible-race-when-disabling-buffered-events.patch create mode 100644 queue-5.15/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch diff --git a/queue-5.15/checkstack-fix-printed-address.patch b/queue-5.15/checkstack-fix-printed-address.patch new file mode 100644 index 00000000000..806647ae72e --- /dev/null +++ b/queue-5.15/checkstack-fix-printed-address.patch @@ -0,0 +1,60 @@ +From ee34db3f271cea4d4252048617919c2caafe698b Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Mon, 20 Nov 2023 19:37:17 +0100 +Subject: checkstack: fix printed address + +From: Heiko Carstens + +commit ee34db3f271cea4d4252048617919c2caafe698b upstream. + +All addresses printed by checkstack have an extra incorrect 0 appended at +the end. + +This was introduced with commit 677f1410e058 ("scripts/checkstack.pl: don't +display $dre as different entity"): since then the address is taken from +the line which contains the function name, instead of the line which +contains stack consumption. E.g. on s390: + +0000000000100a30 : +... + 100a44: e3 f0 ff 70 ff 71 lay %r15,-144(%r15) + +So the used regex which matches spaces and hexadecimal numbers to extract +an address now matches a different substring. Subsequently replacing spaces +with 0 appends a zero at the and, instead of replacing leading spaces. + +Fix this by using the proper regex, and simplify the code a bit. + +Link: https://lkml.kernel.org/r/20231120183719.2188479-2-hca@linux.ibm.com +Fixes: 677f1410e058 ("scripts/checkstack.pl: don't display $dre as different entity") +Signed-off-by: Heiko Carstens +Cc: Maninder Singh +Cc: Masahiro Yamada +Cc: Vaneet Narang +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + scripts/checkstack.pl | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/scripts/checkstack.pl ++++ b/scripts/checkstack.pl +@@ -142,15 +142,11 @@ $total_size = 0; + while (my $line = ) { + if ($line =~ m/$funcre/) { + $func = $1; +- next if $line !~ m/^($xs*)/; ++ next if $line !~ m/^($x*)/; + if ($total_size > $min_stack) { + push @stack, "$intro$total_size\n"; + } +- +- $addr = $1; +- $addr =~ s/ /0/g; +- $addr = "0x$addr"; +- ++ $addr = "0x$1"; + $intro = "$addr $func [$file]:"; + my $padlen = 56 - length($intro); + while ($padlen > 0) { diff --git a/queue-5.15/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch b/queue-5.15/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch new file mode 100644 index 00000000000..f7469c149d2 --- /dev/null +++ b/queue-5.15/nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch @@ -0,0 +1,79 @@ +From d61d0ab573649789bf9eb909c89a1a193b2e3d10 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Wed, 29 Nov 2023 23:15:47 +0900 +Subject: nilfs2: fix missing error check for sb_set_blocksize call + +From: Ryusuke Konishi + +commit d61d0ab573649789bf9eb909c89a1a193b2e3d10 upstream. + +When mounting a filesystem image with a block size larger than the page +size, nilfs2 repeatedly outputs long error messages with stack traces to +the kernel log, such as the following: + + getblk(): invalid block size 8192 requested + logical block size: 512 + ... + Call Trace: + dump_stack_lvl+0x92/0xd4 + dump_stack+0xd/0x10 + bdev_getblk+0x33a/0x354 + __breadahead+0x11/0x80 + nilfs_search_super_root+0xe2/0x704 [nilfs2] + load_nilfs+0x72/0x504 [nilfs2] + nilfs_mount+0x30f/0x518 [nilfs2] + legacy_get_tree+0x1b/0x40 + vfs_get_tree+0x18/0xc4 + path_mount+0x786/0xa88 + __ia32_sys_mount+0x147/0x1a8 + __do_fast_syscall_32+0x56/0xc8 + do_fast_syscall_32+0x29/0x58 + do_SYSENTER_32+0x15/0x18 + entry_SYSENTER_32+0x98/0xf1 + ... + +This overloads the system logger. And to make matters worse, it sometimes +crashes the kernel with a memory access violation. + +This is because the return value of the sb_set_blocksize() call, which +should be checked for errors, is not checked. + +The latter issue is due to out-of-buffer memory being accessed based on a +large block size that caused sb_set_blocksize() to fail for buffers read +with the initial minimum block size that remained unupdated in the +super_block structure. + +Since nilfs2 mkfs tool does not accept block sizes larger than the system +page size, this has been overlooked. However, it is possible to create +this situation by intentionally modifying the tool or by passing a +filesystem image created on a system with a large page size to a system +with a smaller page size and mounting it. + +Fix this issue by inserting the expected error handling for the call to +sb_set_blocksize(). + +Link: https://lkml.kernel.org/r/20231129141547.4726-1-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Tested-by: Ryusuke Konishi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/the_nilfs.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/nilfs2/the_nilfs.c ++++ b/fs/nilfs2/the_nilfs.c +@@ -717,7 +717,11 @@ int init_nilfs(struct the_nilfs *nilfs, + goto failed_sbh; + } + nilfs_release_super_block(nilfs); +- sb_set_blocksize(sb, blocksize); ++ if (!sb_set_blocksize(sb, blocksize)) { ++ nilfs_err(sb, "bad blocksize %d", blocksize); ++ err = -EINVAL; ++ goto out; ++ } + + err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); + if (err) diff --git a/queue-5.15/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch b/queue-5.15/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch new file mode 100644 index 00000000000..95d748bda8d --- /dev/null +++ b/queue-5.15/nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch @@ -0,0 +1,109 @@ +From 675abf8df1353e0e3bde314993e0796c524cfbf0 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Tue, 5 Dec 2023 17:59:47 +0900 +Subject: nilfs2: prevent WARNING in nilfs_sufile_set_segment_usage() + +From: Ryusuke Konishi + +commit 675abf8df1353e0e3bde314993e0796c524cfbf0 upstream. + +If nilfs2 reads a disk image with corrupted segment usage metadata, and +its segment usage information is marked as an error for the segment at the +write location, nilfs_sufile_set_segment_usage() can trigger WARN_ONs +during log writing. + +Segments newly allocated for writing with nilfs_sufile_alloc() will not +have this error flag set, but this unexpected situation will occur if the +segment indexed by either nilfs->ns_segnum or nilfs->ns_nextnum (active +segment) was marked in error. + +Fix this issue by inserting a sanity check to treat it as a file system +corruption. + +Since error returns are not allowed during the execution phase where +nilfs_sufile_set_segment_usage() is used, this inserts the sanity check +into nilfs_sufile_mark_dirty() which pre-reads the buffer containing the +segment usage record to be updated and sets it up in a dirty state for +writing. + +In addition, nilfs_sufile_set_segment_usage() is also called when +canceling log writing and undoing segment usage update, so in order to +avoid issuing the same kernel warning in that case, in case of +cancellation, avoid checking the error flag in +nilfs_sufile_set_segment_usage(). + +Link: https://lkml.kernel.org/r/20231205085947.4431-1-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Reported-by: syzbot+14e9f834f6ddecece094@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=14e9f834f6ddecece094 +Tested-by: Ryusuke Konishi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/sufile.c | 42 +++++++++++++++++++++++++++++++++++------- + 1 file changed, 35 insertions(+), 7 deletions(-) + +--- a/fs/nilfs2/sufile.c ++++ b/fs/nilfs2/sufile.c +@@ -501,15 +501,38 @@ int nilfs_sufile_mark_dirty(struct inode + + down_write(&NILFS_MDT(sufile)->mi_sem); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); +- if (!ret) { +- mark_buffer_dirty(bh); +- nilfs_mdt_mark_dirty(sufile); +- kaddr = kmap_atomic(bh->b_page); +- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); ++ if (ret) ++ goto out_sem; ++ ++ kaddr = kmap_atomic(bh->b_page); ++ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); ++ if (unlikely(nilfs_segment_usage_error(su))) { ++ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; ++ ++ kunmap_atomic(kaddr); ++ brelse(bh); ++ if (nilfs_segment_is_active(nilfs, segnum)) { ++ nilfs_error(sufile->i_sb, ++ "active segment %llu is erroneous", ++ (unsigned long long)segnum); ++ } else { ++ /* ++ * Segments marked erroneous are never allocated by ++ * nilfs_sufile_alloc(); only active segments, ie, ++ * the segments indexed by ns_segnum or ns_nextnum, ++ * can be erroneous here. ++ */ ++ WARN_ON_ONCE(1); ++ } ++ ret = -EIO; ++ } else { + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr); ++ mark_buffer_dirty(bh); ++ nilfs_mdt_mark_dirty(sufile); + brelse(bh); + } ++out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; + } +@@ -536,9 +559,14 @@ int nilfs_sufile_set_segment_usage(struc + + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); +- WARN_ON(nilfs_segment_usage_error(su)); +- if (modtime) ++ if (modtime) { ++ /* ++ * Check segusage error and set su_lastmod only when updating ++ * this entry with a valid timestamp, not for cancellation. ++ */ ++ WARN_ON_ONCE(nilfs_segment_usage_error(su)); + su->su_lastmod = cpu_to_le64(modtime); ++ } + su->su_nblocks = cpu_to_le32(nblocks); + kunmap_atomic(kaddr); + diff --git a/queue-5.15/packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch b/queue-5.15/packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch new file mode 100644 index 00000000000..8f18c45a0b6 --- /dev/null +++ b/queue-5.15/packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch @@ -0,0 +1,109 @@ +From db3fadacaf0c817b222090290d06ca2a338422d0 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Fri, 1 Dec 2023 14:10:21 +0100 +Subject: packet: Move reference count in packet_sock to atomic_long_t + +From: Daniel Borkmann + +commit db3fadacaf0c817b222090290d06ca2a338422d0 upstream. + +In some potential instances the reference count on struct packet_sock +could be saturated and cause overflows which gets the kernel a bit +confused. To prevent this, move to a 64-bit atomic reference count on +64-bit architectures to prevent the possibility of this type to overflow. + +Because we can not handle saturation, using refcount_t is not possible +in this place. Maybe someday in the future if it changes it could be +used. Also, instead of using plain atomic64_t, use atomic_long_t instead. +32-bit machines tend to be memory-limited (i.e. anything that increases +a reference uses so much memory that you can't actually get to 2**32 +references). 32-bit architectures also tend to have serious problems +with 64-bit atomics. Hence, atomic_long_t is the more natural solution. + +Reported-by: "The UK's National Cyber Security Centre (NCSC)" +Co-developed-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Daniel Borkmann +Cc: Linus Torvalds +Cc: stable@kernel.org +Reviewed-by: Willem de Bruijn +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20231201131021.19999-1-daniel@iogearbox.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 16 ++++++++-------- + net/packet/internal.h | 2 +- + 2 files changed, 9 insertions(+), 9 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -4244,7 +4244,7 @@ static void packet_mm_open(struct vm_are + struct sock *sk = sock->sk; + + if (sk) +- atomic_inc(&pkt_sk(sk)->mapped); ++ atomic_long_inc(&pkt_sk(sk)->mapped); + } + + static void packet_mm_close(struct vm_area_struct *vma) +@@ -4254,7 +4254,7 @@ static void packet_mm_close(struct vm_ar + struct sock *sk = sock->sk; + + if (sk) +- atomic_dec(&pkt_sk(sk)->mapped); ++ atomic_long_dec(&pkt_sk(sk)->mapped); + } + + static const struct vm_operations_struct packet_mmap_ops = { +@@ -4349,7 +4349,7 @@ static int packet_set_ring(struct sock * + + err = -EBUSY; + if (!closing) { +- if (atomic_read(&po->mapped)) ++ if (atomic_long_read(&po->mapped)) + goto out; + if (packet_read_pending(rb)) + goto out; +@@ -4452,7 +4452,7 @@ static int packet_set_ring(struct sock * + + err = -EBUSY; + mutex_lock(&po->pg_vec_lock); +- if (closing || atomic_read(&po->mapped) == 0) { ++ if (closing || atomic_long_read(&po->mapped) == 0) { + err = 0; + spin_lock_bh(&rb_queue->lock); + swap(rb->pg_vec, pg_vec); +@@ -4470,9 +4470,9 @@ static int packet_set_ring(struct sock * + po->prot_hook.func = (po->rx_ring.pg_vec) ? + tpacket_rcv : packet_rcv; + skb_queue_purge(rb_queue); +- if (atomic_read(&po->mapped)) +- pr_err("packet_mmap: vma is busy: %d\n", +- atomic_read(&po->mapped)); ++ if (atomic_long_read(&po->mapped)) ++ pr_err("packet_mmap: vma is busy: %ld\n", ++ atomic_long_read(&po->mapped)); + } + mutex_unlock(&po->pg_vec_lock); + +@@ -4550,7 +4550,7 @@ static int packet_mmap(struct file *file + } + } + +- atomic_inc(&po->mapped); ++ atomic_long_inc(&po->mapped); + vma->vm_ops = &packet_mmap_ops; + err = 0; + +--- a/net/packet/internal.h ++++ b/net/packet/internal.h +@@ -126,7 +126,7 @@ struct packet_sock { + __be16 num; + struct packet_rollover *rollover; + struct packet_mclist *mclist; +- atomic_t mapped; ++ atomic_long_t mapped; + enum tpacket_versions tp_version; + unsigned int tp_hdrlen; + unsigned int tp_reserve; diff --git a/queue-5.15/platform-surface-aggregator-fix-recv_buf-return-value.patch b/queue-5.15/platform-surface-aggregator-fix-recv_buf-return-value.patch new file mode 100644 index 00000000000..a4b3bf46ffd --- /dev/null +++ b/queue-5.15/platform-surface-aggregator-fix-recv_buf-return-value.patch @@ -0,0 +1,49 @@ +From c8820c92caf0770bec976b01fa9e82bb993c5865 Mon Sep 17 00:00:00 2001 +From: Francesco Dolcini +Date: Tue, 28 Nov 2023 20:49:35 +0100 +Subject: platform/surface: aggregator: fix recv_buf() return value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Francesco Dolcini + +commit c8820c92caf0770bec976b01fa9e82bb993c5865 upstream. + +Serdev recv_buf() callback is supposed to return the amount of bytes +consumed, therefore an int in between 0 and count. + +Do not return negative number in case of issue, when +ssam_controller_receive_buf() returns ESHUTDOWN just returns 0, e.g. no +bytes consumed, this keep the exact same behavior as it was before. + +This fixes a potential WARN in serdev-ttyport.c:ttyport_receive_buf(). + +Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") +Cc: stable@vger.kernel.org +Signed-off-by: Francesco Dolcini +Reviewed-by: Maximilian Luz +Link: https://lore.kernel.org/r/20231128194935.11350-1-francesco@dolcini.it +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/surface/aggregator/core.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/platform/surface/aggregator/core.c ++++ b/drivers/platform/surface/aggregator/core.c +@@ -230,9 +230,12 @@ static int ssam_receive_buf(struct serde + size_t n) + { + struct ssam_controller *ctrl; ++ int ret; + + ctrl = serdev_device_get_drvdata(dev); +- return ssam_controller_receive_buf(ctrl, buf, n); ++ ret = ssam_controller_receive_buf(ctrl, buf, n); ++ ++ return ret < 0 ? 0 : ret; + } + + static void ssam_write_wakeup(struct serdev_device *dev) diff --git a/queue-5.15/regmap-fix-bogus-error-on-regcache_sync-success.patch b/queue-5.15/regmap-fix-bogus-error-on-regcache_sync-success.patch new file mode 100644 index 00000000000..fea797b1cb8 --- /dev/null +++ b/queue-5.15/regmap-fix-bogus-error-on-regcache_sync-success.patch @@ -0,0 +1,45 @@ +From fea88064445a59584460f7f67d102b6e5fc1ca1d Mon Sep 17 00:00:00 2001 +From: Matthias Reichl +Date: Sun, 3 Dec 2023 23:22:16 +0100 +Subject: regmap: fix bogus error on regcache_sync success + +From: Matthias Reichl + +commit fea88064445a59584460f7f67d102b6e5fc1ca1d upstream. + +Since commit 0ec7731655de ("regmap: Ensure range selector registers +are updated after cache sync") opening pcm512x based soundcards fail +with EINVAL and dmesg shows sync cache and pm_runtime_get errors: + +[ 228.794676] pcm512x 1-004c: Failed to sync cache: -22 +[ 228.794740] pcm512x 1-004c: ASoC: error at snd_soc_pcm_component_pm_runtime_get on pcm512x.1-004c: -22 + +This is caused by the cache check result leaking out into the +regcache_sync return value. + +Fix this by making the check local-only, as the comment above the +regcache_read call states a non-zero return value means there's +nothing to do so the return value should not be altered. + +Fixes: 0ec7731655de ("regmap: Ensure range selector registers are updated after cache sync") +Cc: stable@vger.kernel.org +Signed-off-by: Matthias Reichl +Link: https://lore.kernel.org/r/20231203222216.96547-1-hias@horus.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/regmap/regcache.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/base/regmap/regcache.c ++++ b/drivers/base/regmap/regcache.c +@@ -404,8 +404,7 @@ out: + rb_entry(node, struct regmap_range_node, node); + + /* If there's nothing in the cache there's nothing to sync */ +- ret = regcache_read(map, this->selector_reg, &i); +- if (ret != 0) ++ if (regcache_read(map, this->selector_reg, &i) != 0) + continue; + + ret = _regmap_write(map, this->selector_reg, i); diff --git a/queue-5.15/series b/queue-5.15/series index 54cf23525c0..295d47994a3 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -81,3 +81,13 @@ io_uring-fix-mutex_unlock-with-unreferenced-ctx.patch alsa-usb-audio-add-pioneer-djm-450-mixer-controls.patch alsa-pcm-fix-out-of-bounds-in-snd_pcm_state_names.patch alsa-hda-realtek-enable-headset-on-lenovo-m90-gen5.patch +nilfs2-fix-missing-error-check-for-sb_set_blocksize-call.patch +nilfs2-prevent-warning-in-nilfs_sufile_set_segment_usage.patch +checkstack-fix-printed-address.patch +tracing-always-update-snapshot-buffer-size.patch +tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch +tracing-fix-incomplete-locking-when-disabling-buffered-events.patch +tracing-fix-a-possible-race-when-disabling-buffered-events.patch +packet-move-reference-count-in-packet_sock-to-atomic_long_t.patch +regmap-fix-bogus-error-on-regcache_sync-success.patch +platform-surface-aggregator-fix-recv_buf-return-value.patch diff --git a/queue-5.15/tracing-always-update-snapshot-buffer-size.patch b/queue-5.15/tracing-always-update-snapshot-buffer-size.patch new file mode 100644 index 00000000000..74a0005face --- /dev/null +++ b/queue-5.15/tracing-always-update-snapshot-buffer-size.patch @@ -0,0 +1,83 @@ +From 7be76461f302ec05cbd62b90b2a05c64299ca01f Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Tue, 5 Dec 2023 16:52:09 -0500 +Subject: tracing: Always update snapshot buffer size + +From: Steven Rostedt (Google) + +commit 7be76461f302ec05cbd62b90b2a05c64299ca01f upstream. + +It use to be that only the top level instance had a snapshot buffer (for +latency tracers like wakeup and irqsoff). The update of the ring buffer +size would check if the instance was the top level and if so, it would +also update the snapshot buffer as it needs to be the same as the main +buffer. + +Now that lower level instances also has a snapshot buffer, they too need +to update their snapshot buffer sizes when the main buffer is changed, +otherwise the following can be triggered: + + # cd /sys/kernel/tracing + # echo 1500 > buffer_size_kb + # mkdir instances/foo + # echo irqsoff > instances/foo/current_tracer + # echo 1000 > instances/foo/buffer_size_kb + +Produces: + + WARNING: CPU: 2 PID: 856 at kernel/trace/trace.c:1938 update_max_tr_single.part.0+0x27d/0x320 + +Which is: + + ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); + + if (ret == -EBUSY) { + [..] + } + + WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); <== here + +That's because ring_buffer_swap_cpu() has: + + int ret = -EINVAL; + + [..] + + /* At least make sure the two buffers are somewhat the same */ + if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) + goto out; + + [..] + out: + return ret; + } + +Instead, update all instances' snapshot buffer sizes when their main +buffer size is updated. + +Link: https://lkml.kernel.org/r/20231205220010.454662151@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6281,8 +6281,7 @@ static int __tracing_resize_ring_buffer( + return ret; + + #ifdef CONFIG_TRACER_MAX_TRACE +- if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) || +- !tr->current_trace->use_max_tr) ++ if (!tr->current_trace->use_max_tr) + goto out; + + ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); diff --git a/queue-5.15/tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch b/queue-5.15/tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch new file mode 100644 index 00000000000..a6486245732 --- /dev/null +++ b/queue-5.15/tracing-disable-snapshot-buffer-when-stopping-instance-tracers.patch @@ -0,0 +1,203 @@ +From b538bf7d0ec11ca49f536dfda742a5f6db90a798 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Tue, 5 Dec 2023 16:52:11 -0500 +Subject: tracing: Disable snapshot buffer when stopping instance tracers + +From: Steven Rostedt (Google) + +commit b538bf7d0ec11ca49f536dfda742a5f6db90a798 upstream. + +It use to be that only the top level instance had a snapshot buffer (for +latency tracers like wakeup and irqsoff). When stopping a tracer in an +instance would not disable the snapshot buffer. This could have some +unintended consequences if the irqsoff tracer is enabled. + +Consolidate the tracing_start/stop() with tracing_start/stop_tr() so that +all instances behave the same. The tracing_start/stop() functions will +just call their respective tracing_start/stop_tr() with the global_array +passed in. + +Link: https://lkml.kernel.org/r/20231205220011.041220035@goodmis.org + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 110 +++++++++++++++------------------------------------ + 1 file changed, 34 insertions(+), 76 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2284,13 +2284,7 @@ int is_tracing_stopped(void) + return global_trace.stop_count; + } + +-/** +- * tracing_start - quick start of the tracer +- * +- * If tracing is enabled but was stopped by tracing_stop, +- * this will start the tracer back up. +- */ +-void tracing_start(void) ++static void tracing_start_tr(struct trace_array *tr) + { + struct trace_buffer *buffer; + unsigned long flags; +@@ -2298,119 +2292,83 @@ void tracing_start(void) + if (tracing_disabled) + return; + +- raw_spin_lock_irqsave(&global_trace.start_lock, flags); +- if (--global_trace.stop_count) { +- if (global_trace.stop_count < 0) { ++ raw_spin_lock_irqsave(&tr->start_lock, flags); ++ if (--tr->stop_count) { ++ if (WARN_ON_ONCE(tr->stop_count < 0)) { + /* Someone screwed up their debugging */ +- WARN_ON_ONCE(1); +- global_trace.stop_count = 0; ++ tr->stop_count = 0; + } + goto out; + } + + /* Prevent the buffers from switching */ +- arch_spin_lock(&global_trace.max_lock); ++ arch_spin_lock(&tr->max_lock); + +- buffer = global_trace.array_buffer.buffer; ++ buffer = tr->array_buffer.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + #ifdef CONFIG_TRACER_MAX_TRACE +- buffer = global_trace.max_buffer.buffer; ++ buffer = tr->max_buffer.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + #endif + +- arch_spin_unlock(&global_trace.max_lock); +- +- out: +- raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); +-} +- +-static void tracing_start_tr(struct trace_array *tr) +-{ +- struct trace_buffer *buffer; +- unsigned long flags; +- +- if (tracing_disabled) +- return; +- +- /* If global, we need to also start the max tracer */ +- if (tr->flags & TRACE_ARRAY_FL_GLOBAL) +- return tracing_start(); +- +- raw_spin_lock_irqsave(&tr->start_lock, flags); +- +- if (--tr->stop_count) { +- if (tr->stop_count < 0) { +- /* Someone screwed up their debugging */ +- WARN_ON_ONCE(1); +- tr->stop_count = 0; +- } +- goto out; +- } +- +- buffer = tr->array_buffer.buffer; +- if (buffer) +- ring_buffer_record_enable(buffer); ++ arch_spin_unlock(&tr->max_lock); + + out: + raw_spin_unlock_irqrestore(&tr->start_lock, flags); + } + + /** +- * tracing_stop - quick stop of the tracer ++ * tracing_start - quick start of the tracer + * +- * Light weight way to stop tracing. Use in conjunction with +- * tracing_start. ++ * If tracing is enabled but was stopped by tracing_stop, ++ * this will start the tracer back up. + */ +-void tracing_stop(void) ++void tracing_start(void) ++ ++{ ++ return tracing_start_tr(&global_trace); ++} ++ ++static void tracing_stop_tr(struct trace_array *tr) + { + struct trace_buffer *buffer; + unsigned long flags; + +- raw_spin_lock_irqsave(&global_trace.start_lock, flags); +- if (global_trace.stop_count++) ++ raw_spin_lock_irqsave(&tr->start_lock, flags); ++ if (tr->stop_count++) + goto out; + + /* Prevent the buffers from switching */ +- arch_spin_lock(&global_trace.max_lock); ++ arch_spin_lock(&tr->max_lock); + +- buffer = global_trace.array_buffer.buffer; ++ buffer = tr->array_buffer.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + #ifdef CONFIG_TRACER_MAX_TRACE +- buffer = global_trace.max_buffer.buffer; ++ buffer = tr->max_buffer.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + #endif + +- arch_spin_unlock(&global_trace.max_lock); ++ arch_spin_unlock(&tr->max_lock); + + out: +- raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); ++ raw_spin_unlock_irqrestore(&tr->start_lock, flags); + } + +-static void tracing_stop_tr(struct trace_array *tr) ++/** ++ * tracing_stop - quick stop of the tracer ++ * ++ * Light weight way to stop tracing. Use in conjunction with ++ * tracing_start. ++ */ ++void tracing_stop(void) + { +- struct trace_buffer *buffer; +- unsigned long flags; +- +- /* If global, we need to also stop the max tracer */ +- if (tr->flags & TRACE_ARRAY_FL_GLOBAL) +- return tracing_stop(); +- +- raw_spin_lock_irqsave(&tr->start_lock, flags); +- if (tr->stop_count++) +- goto out; +- +- buffer = tr->array_buffer.buffer; +- if (buffer) +- ring_buffer_record_disable(buffer); +- +- out: +- raw_spin_unlock_irqrestore(&tr->start_lock, flags); ++ return tracing_stop_tr(&global_trace); + } + + static int trace_save_cmdline(struct task_struct *tsk) diff --git a/queue-5.15/tracing-fix-a-possible-race-when-disabling-buffered-events.patch b/queue-5.15/tracing-fix-a-possible-race-when-disabling-buffered-events.patch new file mode 100644 index 00000000000..9ef4b8c04da --- /dev/null +++ b/queue-5.15/tracing-fix-a-possible-race-when-disabling-buffered-events.patch @@ -0,0 +1,82 @@ +From c0591b1cccf708a47bc465c62436d669a4213323 Mon Sep 17 00:00:00 2001 +From: Petr Pavlu +Date: Tue, 5 Dec 2023 17:17:36 +0100 +Subject: tracing: Fix a possible race when disabling buffered events + +From: Petr Pavlu + +commit c0591b1cccf708a47bc465c62436d669a4213323 upstream. + +Function trace_buffered_event_disable() is responsible for freeing pages +backing buffered events and this process can run concurrently with +trace_event_buffer_lock_reserve(). + +The following race is currently possible: + +* Function trace_buffered_event_disable() is called on CPU 0. It + increments trace_buffered_event_cnt on each CPU and waits via + synchronize_rcu() for each user of trace_buffered_event to complete. + +* After synchronize_rcu() is finished, function + trace_buffered_event_disable() has the exclusive access to + trace_buffered_event. All counters trace_buffered_event_cnt are at 1 + and all pointers trace_buffered_event are still valid. + +* At this point, on a different CPU 1, the execution reaches + trace_event_buffer_lock_reserve(). The function calls + preempt_disable_notrace() and only now enters an RCU read-side + critical section. The function proceeds and reads a still valid + pointer from trace_buffered_event[CPU1] into the local variable + "entry". However, it doesn't yet read trace_buffered_event_cnt[CPU1] + which happens later. + +* Function trace_buffered_event_disable() continues. It frees + trace_buffered_event[CPU1] and decrements + trace_buffered_event_cnt[CPU1] back to 0. + +* Function trace_event_buffer_lock_reserve() continues. It reads and + increments trace_buffered_event_cnt[CPU1] from 0 to 1. This makes it + believe that it can use the "entry" that it already obtained but the + pointer is now invalid and any access results in a use-after-free. + +Fix the problem by making a second synchronize_rcu() call after all +trace_buffered_event values are set to NULL. This waits on all potential +users in trace_event_buffer_lock_reserve() that still read a previous +pointer from trace_buffered_event. + +Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ +Link: https://lkml.kernel.org/r/20231205161736.19663-4-petr.pavlu@suse.com + +Cc: stable@vger.kernel.org +Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") +Signed-off-by: Petr Pavlu +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2713,13 +2713,17 @@ void trace_buffered_event_disable(void) + free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); + per_cpu(trace_buffered_event, cpu) = NULL; + } ++ + /* +- * Make sure trace_buffered_event is NULL before clearing +- * trace_buffered_event_cnt. ++ * Wait for all CPUs that potentially started checking if they can use ++ * their event buffer only after the previous synchronize_rcu() call and ++ * they still read a valid pointer from trace_buffered_event. It must be ++ * ensured they don't see cleared trace_buffered_event_cnt else they ++ * could wrongly decide to use the pointed-to buffer which is now freed. + */ +- smp_wmb(); ++ synchronize_rcu(); + +- /* Do the work on each cpu */ ++ /* For each CPU, relinquish the buffer */ + on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, + true); + } diff --git a/queue-5.15/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch b/queue-5.15/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch new file mode 100644 index 00000000000..f22a2eeeb03 --- /dev/null +++ b/queue-5.15/tracing-fix-incomplete-locking-when-disabling-buffered-events.patch @@ -0,0 +1,153 @@ +From 7fed14f7ac9cf5e38c693836fe4a874720141845 Mon Sep 17 00:00:00 2001 +From: Petr Pavlu +Date: Tue, 5 Dec 2023 17:17:34 +0100 +Subject: tracing: Fix incomplete locking when disabling buffered events + +From: Petr Pavlu + +commit 7fed14f7ac9cf5e38c693836fe4a874720141845 upstream. + +The following warning appears when using buffered events: + +[ 203.556451] WARNING: CPU: 53 PID: 10220 at kernel/trace/ring_buffer.c:3912 ring_buffer_discard_commit+0x2eb/0x420 +[...] +[ 203.670690] CPU: 53 PID: 10220 Comm: stress-ng-sysin Tainted: G E 6.7.0-rc2-default #4 56e6d0fcf5581e6e51eaaecbdaec2a2338c80f3a +[ 203.670704] Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017 +[ 203.670709] RIP: 0010:ring_buffer_discard_commit+0x2eb/0x420 +[ 203.735721] Code: 4c 8b 4a 50 48 8b 42 48 49 39 c1 0f 84 b3 00 00 00 49 83 e8 01 75 b1 48 8b 42 10 f0 ff 40 08 0f 0b e9 fc fe ff ff f0 ff 47 08 <0f> 0b e9 77 fd ff ff 48 8b 42 10 f0 ff 40 08 0f 0b e9 f5 fe ff ff +[ 203.735734] RSP: 0018:ffffb4ae4f7b7d80 EFLAGS: 00010202 +[ 203.735745] RAX: 0000000000000000 RBX: ffffb4ae4f7b7de0 RCX: ffff8ac10662c000 +[ 203.735754] RDX: ffff8ac0c750be00 RSI: ffff8ac10662c000 RDI: ffff8ac0c004d400 +[ 203.781832] RBP: ffff8ac0c039cea0 R08: 0000000000000000 R09: 0000000000000000 +[ 203.781839] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 +[ 203.781842] R13: ffff8ac10662c000 R14: ffff8ac0c004d400 R15: ffff8ac10662c008 +[ 203.781846] FS: 00007f4cd8a67740(0000) GS:ffff8ad798880000(0000) knlGS:0000000000000000 +[ 203.781851] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 203.781855] CR2: 0000559766a74028 CR3: 00000001804c4000 CR4: 00000000001506f0 +[ 203.781862] Call Trace: +[ 203.781870] +[ 203.851949] trace_event_buffer_commit+0x1ea/0x250 +[ 203.851967] trace_event_raw_event_sys_enter+0x83/0xe0 +[ 203.851983] syscall_trace_enter.isra.0+0x182/0x1a0 +[ 203.851990] do_syscall_64+0x3a/0xe0 +[ 203.852075] entry_SYSCALL_64_after_hwframe+0x6e/0x76 +[ 203.852090] RIP: 0033:0x7f4cd870fa77 +[ 203.982920] Code: 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 b8 89 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e9 43 0e 00 f7 d8 64 89 01 48 +[ 203.982932] RSP: 002b:00007fff99717dd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000089 +[ 203.982942] RAX: ffffffffffffffda RBX: 0000558ea1d7b6f0 RCX: 00007f4cd870fa77 +[ 203.982948] RDX: 0000000000000000 RSI: 00007fff99717de0 RDI: 0000558ea1d7b6f0 +[ 203.982957] RBP: 00007fff99717de0 R08: 00007fff997180e0 R09: 00007fff997180e0 +[ 203.982962] R10: 00007fff997180e0 R11: 0000000000000246 R12: 00007fff99717f40 +[ 204.049239] R13: 00007fff99718590 R14: 0000558e9f2127a8 R15: 00007fff997180b0 +[ 204.049256] + +For instance, it can be triggered by running these two commands in +parallel: + + $ while true; do + echo hist:key=id.syscall:val=hitcount > \ + /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger; + done + $ stress-ng --sysinfo $(nproc) + +The warning indicates that the current ring_buffer_per_cpu is not in the +committing state. It happens because the active ring_buffer_event +doesn't actually come from the ring_buffer_per_cpu but is allocated from +trace_buffered_event. + +The bug is in function trace_buffered_event_disable() where the +following normally happens: + +* The code invokes disable_trace_buffered_event() via + smp_call_function_many() and follows it by synchronize_rcu(). This + increments the per-CPU variable trace_buffered_event_cnt on each + target CPU and grants trace_buffered_event_disable() the exclusive + access to the per-CPU variable trace_buffered_event. + +* Maintenance is performed on trace_buffered_event, all per-CPU event + buffers get freed. + +* The code invokes enable_trace_buffered_event() via + smp_call_function_many(). This decrements trace_buffered_event_cnt and + releases the access to trace_buffered_event. + +A problem is that smp_call_function_many() runs a given function on all +target CPUs except on the current one. The following can then occur: + +* Task X executing trace_buffered_event_disable() runs on CPU 0. + +* The control reaches synchronize_rcu() and the task gets rescheduled on + another CPU 1. + +* The RCU synchronization finishes. At this point, + trace_buffered_event_disable() has the exclusive access to all + trace_buffered_event variables except trace_buffered_event[CPU0] + because trace_buffered_event_cnt[CPU0] is never incremented and if the + buffer is currently unused, remains set to 0. + +* A different task Y is scheduled on CPU 0 and hits a trace event. The + code in trace_event_buffer_lock_reserve() sees that + trace_buffered_event_cnt[CPU0] is set to 0 and decides the use the + buffer provided by trace_buffered_event[CPU0]. + +* Task X continues its execution in trace_buffered_event_disable(). The + code incorrectly frees the event buffer pointed by + trace_buffered_event[CPU0] and resets the variable to NULL. + +* Task Y writes event data to the now freed buffer and later detects the + created inconsistency. + +The issue is observable since commit dea499781a11 ("tracing: Fix warning +in trace_buffered_event_disable()") which moved the call of +trace_buffered_event_disable() in __ftrace_event_enable_disable() +earlier, prior to invoking call->class->reg(.. TRACE_REG_UNREGISTER ..). +The underlying problem in trace_buffered_event_disable() is however +present since the original implementation in commit 0fc1b09ff1ff +("tracing: Use temp buffer when filtering events"). + +Fix the problem by replacing the two smp_call_function_many() calls with +on_each_cpu_mask() which invokes a given callback on all CPUs. + +Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ +Link: https://lkml.kernel.org/r/20231205161736.19663-2-petr.pavlu@suse.com + +Cc: stable@vger.kernel.org +Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") +Fixes: dea499781a11 ("tracing: Fix warning in trace_buffered_event_disable()") +Signed-off-by: Petr Pavlu +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2702,11 +2702,9 @@ void trace_buffered_event_disable(void) + if (--trace_buffered_event_ref) + return; + +- preempt_disable(); + /* For each CPU, set the buffer as used. */ +- smp_call_function_many(tracing_buffer_mask, +- disable_trace_buffered_event, NULL, 1); +- preempt_enable(); ++ on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, ++ NULL, true); + + /* Wait for all current users to finish */ + synchronize_rcu(); +@@ -2721,11 +2719,9 @@ void trace_buffered_event_disable(void) + */ + smp_wmb(); + +- preempt_disable(); + /* Do the work on each cpu */ +- smp_call_function_many(tracing_buffer_mask, +- enable_trace_buffered_event, NULL, 1); +- preempt_enable(); ++ on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, ++ true); + } + + static struct trace_buffer *temp_buffer; -- 2.47.3