From fd4dccd3335d705344e578397c07c93e4002257f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 18 Dec 2023 08:40:03 +0100 Subject: [PATCH] 5.10-stable patches added patches: ring-buffer-fix-a-race-in-rb_time_cmpxchg-for-32-bit-archs.patch ring-buffer-fix-memory-leak-of-free-page.patch ring-buffer-fix-writing-to-the-buffer-with-max_data_size.patch ring-buffer-have-saved-event-hold-the-entire-event.patch tracing-update-snapshot-buffer-on-resize-if-it-is-allocated.patch --- ...-in-rb_time_cmpxchg-for-32-bit-archs.patch | 74 ++++++++++++++++ ...-buffer-fix-memory-leak-of-free-page.patch | 48 ++++++++++ ...ing-to-the-buffer-with-max_data_size.patch | 87 +++++++++++++++++++ ...ve-saved-event-hold-the-entire-event.patch | 54 ++++++++++++ queue-5.10/series | 5 ++ ...-buffer-on-resize-if-it-is-allocated.patch | 59 +++++++++++++ 6 files changed, 327 insertions(+) create mode 100644 queue-5.10/ring-buffer-fix-a-race-in-rb_time_cmpxchg-for-32-bit-archs.patch create mode 100644 queue-5.10/ring-buffer-fix-memory-leak-of-free-page.patch create mode 100644 queue-5.10/ring-buffer-fix-writing-to-the-buffer-with-max_data_size.patch create mode 100644 queue-5.10/ring-buffer-have-saved-event-hold-the-entire-event.patch create mode 100644 queue-5.10/tracing-update-snapshot-buffer-on-resize-if-it-is-allocated.patch diff --git a/queue-5.10/ring-buffer-fix-a-race-in-rb_time_cmpxchg-for-32-bit-archs.patch b/queue-5.10/ring-buffer-fix-a-race-in-rb_time_cmpxchg-for-32-bit-archs.patch new file mode 100644 index 00000000000..570f37e2dc0 --- /dev/null +++ b/queue-5.10/ring-buffer-fix-a-race-in-rb_time_cmpxchg-for-32-bit-archs.patch @@ -0,0 +1,74 @@ +From fff88fa0fbc7067ba46dde570912d63da42c59a9 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Tue, 12 Dec 2023 11:53:01 -0500 +Subject: ring-buffer: Fix a race in rb_time_cmpxchg() for 32 bit archs + +From: Steven Rostedt (Google) + +commit fff88fa0fbc7067ba46dde570912d63da42c59a9 upstream. + +Mathieu Desnoyers pointed out an issue in the rb_time_cmpxchg() for 32 bit +architectures. That is: + + static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) + { + unsigned long cnt, top, bottom, msb; + unsigned long cnt2, top2, bottom2, msb2; + u64 val; + + /* The cmpxchg always fails if it interrupted an update */ + if (!__rb_time_read(t, &val, &cnt2)) + return false; + + if (val != expect) + return false; + +<<<< interrupted here! + + cnt = local_read(&t->cnt); + +The problem is that the synchronization counter in the rb_time_t is read +*after* the value of the timestamp is read. That means if an interrupt +were to come in between the value being read and the counter being read, +it can change the value and the counter and the interrupted process would +be clueless about it! + +The counter needs to be read first and then the value. That way it is easy +to tell if the value is stale or not. If the counter hasn't been updated, +then the value is still good. + +Link: https://lore.kernel.org/linux-trace-kernel/20231211201324.652870-1-mathieu.desnoyers@efficios.com/ +Link: https://lore.kernel.org/linux-trace-kernel/20231212115301.7a9c9a64@gandalf.local.home + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Fixes: 10464b4aa605e ("ring-buffer: Add rb_time_t 64 bit operations for speeding up 32 bit") +Reported-by: Mathieu Desnoyers +Reviewed-by: Mathieu Desnoyers +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -703,6 +703,9 @@ static int rb_time_cmpxchg(rb_time_t *t, + unsigned long cnt2, top2, bottom2; + u64 val; + ++ /* Any interruptions in this function should cause a failure */ ++ cnt = local_read(&t->cnt); ++ + /* The cmpxchg always fails if it interrupted an update */ + if (!__rb_time_read(t, &val, &cnt2)) + return false; +@@ -710,7 +713,6 @@ static int rb_time_cmpxchg(rb_time_t *t, + if (val != expect) + return false; + +- cnt = local_read(&t->cnt); + if ((cnt & 3) != cnt2) + return false; + diff --git a/queue-5.10/ring-buffer-fix-memory-leak-of-free-page.patch b/queue-5.10/ring-buffer-fix-memory-leak-of-free-page.patch new file mode 100644 index 00000000000..064fddafb20 --- /dev/null +++ b/queue-5.10/ring-buffer-fix-memory-leak-of-free-page.patch @@ -0,0 +1,48 @@ +From 17d801758157bec93f26faaf5ff1a8b9a552d67a Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Sun, 10 Dec 2023 22:12:50 -0500 +Subject: ring-buffer: Fix memory leak of free page + +From: Steven Rostedt (Google) + +commit 17d801758157bec93f26faaf5ff1a8b9a552d67a upstream. + +Reading the ring buffer does a swap of a sub-buffer within the ring buffer +with a empty sub-buffer. This allows the reader to have full access to the +content of the sub-buffer that was swapped out without having to worry +about contention with the writer. + +The readers call ring_buffer_alloc_read_page() to allocate a page that +will be used to swap with the ring buffer. When the code is finished with +the reader page, it calls ring_buffer_free_read_page(). Instead of freeing +the page, it stores it as a spare. Then next call to +ring_buffer_alloc_read_page() will return this spare instead of calling +into the memory management system to allocate a new page. + +Unfortunately, on freeing of the ring buffer, this spare page is not +freed, and causes a memory leak. + +Link: https://lore.kernel.org/linux-trace-kernel/20231210221250.7b9cc83c@rorschach.local.home + +Cc: stable@vger.kernel.org +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") +Acked-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -1667,6 +1667,8 @@ static void rb_free_cpu_buffer(struct ri + free_buffer_page(bpage); + } + ++ free_page((unsigned long)cpu_buffer->free_page); ++ + kfree(cpu_buffer); + } + diff --git a/queue-5.10/ring-buffer-fix-writing-to-the-buffer-with-max_data_size.patch b/queue-5.10/ring-buffer-fix-writing-to-the-buffer-with-max_data_size.patch new file mode 100644 index 00000000000..8f47ce207fe --- /dev/null +++ b/queue-5.10/ring-buffer-fix-writing-to-the-buffer-with-max_data_size.patch @@ -0,0 +1,87 @@ +From b3ae7b67b87fed771fa5bf95389df06b0433603e Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Tue, 12 Dec 2023 11:16:17 -0500 +Subject: ring-buffer: Fix writing to the buffer with max_data_size + +From: Steven Rostedt (Google) + +commit b3ae7b67b87fed771fa5bf95389df06b0433603e upstream. + +The maximum ring buffer data size is the maximum size of data that can be +recorded on the ring buffer. Events must be smaller than the sub buffer +data size minus any meta data. This size is checked before trying to +allocate from the ring buffer because the allocation assumes that the size +will fit on the sub buffer. + +The maximum size was calculated as the size of a sub buffer page (which is +currently PAGE_SIZE minus the sub buffer header) minus the size of the +meta data of an individual event. But it missed the possible adding of a +time stamp for events that are added long enough apart that the event meta +data can't hold the time delta. + +When an event is added that is greater than the current BUF_MAX_DATA_SIZE +minus the size of a time stamp, but still less than or equal to +BUF_MAX_DATA_SIZE, the ring buffer would go into an infinite loop, looking +for a page that can hold the event. Luckily, there's a check for this loop +and after 1000 iterations and a warning is emitted and the ring buffer is +disabled. But this should never happen. + +This can happen when a large event is added first, or after a long period +where an absolute timestamp is prefixed to the event, increasing its size +by 8 bytes. This passes the check and then goes into the algorithm that +causes the infinite loop. + +For events that are the first event on the sub-buffer, it does not need to +add a timestamp, because the sub-buffer itself contains an absolute +timestamp, and adding one is redundant. + +The fix is to check if the event is to be the first event on the +sub-buffer, and if it is, then do not add a timestamp. + +This also fixes 32 bit adding a timestamp when a read of before_stamp or +write_stamp is interrupted. There's still no need to add that timestamp if +the event is going to be the first event on the sub buffer. + +Also, if the buffer has "time_stamp_abs" set, then also check if the +length plus the timestamp is greater than the BUF_MAX_DATA_SIZE. + +Link: https://lore.kernel.org/all/20231212104549.58863438@gandalf.local.home/ +Link: https://lore.kernel.org/linux-trace-kernel/20231212071837.5fdd6c13@gandalf.local.home +Link: https://lore.kernel.org/linux-trace-kernel/20231212111617.39e02849@gandalf.local.home + +Cc: stable@vger.kernel.org +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Fixes: a4543a2fa9ef3 ("ring-buffer: Get timestamp after event is allocated") +Fixes: 58fbc3c63275c ("ring-buffer: Consolidate add_timestamp to remove some branches") +Reported-by: Kent Overstreet # (on IRC) +Acked-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -3301,7 +3301,10 @@ __rb_reserve_next(struct ring_buffer_per + * absolute timestamp. + * Don't bother if this is the start of a new page (w == 0). + */ +- if (unlikely(!a_ok || !b_ok || (info->before != info->after && w))) { ++ if (!w) { ++ /* Use the sub-buffer timestamp */ ++ info->delta = 0; ++ } else if (unlikely(!a_ok || !b_ok || info->before != info->after)) { + info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND; + info->length += RB_LEN_TIME_EXTEND; + } else { +@@ -3456,6 +3459,8 @@ rb_reserve_next_event(struct trace_buffe + if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) { + add_ts_default = RB_ADD_STAMP_ABSOLUTE; + info.length += RB_LEN_TIME_EXTEND; ++ if (info.length > BUF_MAX_DATA_SIZE) ++ goto out_fail; + } else { + add_ts_default = RB_ADD_STAMP_NONE; + } diff --git a/queue-5.10/ring-buffer-have-saved-event-hold-the-entire-event.patch b/queue-5.10/ring-buffer-have-saved-event-hold-the-entire-event.patch new file mode 100644 index 00000000000..097c5e87682 --- /dev/null +++ b/queue-5.10/ring-buffer-have-saved-event-hold-the-entire-event.patch @@ -0,0 +1,54 @@ +From b049525855fdd0024881c9b14b8fbec61c3f53d3 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Tue, 12 Dec 2023 07:25:58 -0500 +Subject: ring-buffer: Have saved event hold the entire event + +From: Steven Rostedt (Google) + +commit b049525855fdd0024881c9b14b8fbec61c3f53d3 upstream. + +For the ring buffer iterator (non-consuming read), the event needs to be +copied into the iterator buffer to make sure that a writer does not +overwrite it while the user is reading it. If a write happens during the +copy, the buffer is simply discarded. + +But the temp buffer itself was not big enough. The allocation of the +buffer was only BUF_MAX_DATA_SIZE, which is the maximum data size that can +be passed into the ring buffer and saved. But the temp buffer needs to +hold the meta data as well. That would be BUF_PAGE_SIZE and not +BUF_MAX_DATA_SIZE. + +Link: https://lore.kernel.org/linux-trace-kernel/20231212072558.61f76493@gandalf.local.home + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Fixes: 785888c544e04 ("ring-buffer: Have rb_iter_head_event() handle concurrent writer") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -2275,7 +2275,7 @@ rb_iter_head_event(struct ring_buffer_it + */ + barrier(); + +- if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE) ++ if ((iter->head + length) > commit || length > BUF_PAGE_SIZE) + /* Writer corrupted the read? */ + goto reset; + +@@ -4836,7 +4836,8 @@ ring_buffer_read_prepare(struct trace_bu + if (!iter) + return NULL; + +- iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags); ++ /* Holds the entire event: data and meta data */ ++ iter->event = kmalloc(BUF_PAGE_SIZE, flags); + if (!iter->event) { + kfree(iter); + return NULL; diff --git a/queue-5.10/series b/queue-5.10/series index e82b91efe3d..97bd82e8a5b 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -59,3 +59,8 @@ soundwire-stream-fix-null-pointer-dereference-for-multi_link.patch ext4-prevent-the-normalized-size-from-exceeding-ext_max_blocks.patch arm64-mm-always-make-sw-dirty-ptes-hw-dirty-in-pte_modify.patch team-fix-use-after-free-when-an-option-instance-allocation-fails.patch +ring-buffer-fix-memory-leak-of-free-page.patch +tracing-update-snapshot-buffer-on-resize-if-it-is-allocated.patch +ring-buffer-have-saved-event-hold-the-entire-event.patch +ring-buffer-fix-writing-to-the-buffer-with-max_data_size.patch +ring-buffer-fix-a-race-in-rb_time_cmpxchg-for-32-bit-archs.patch diff --git a/queue-5.10/tracing-update-snapshot-buffer-on-resize-if-it-is-allocated.patch b/queue-5.10/tracing-update-snapshot-buffer-on-resize-if-it-is-allocated.patch new file mode 100644 index 00000000000..beb1621a92c --- /dev/null +++ b/queue-5.10/tracing-update-snapshot-buffer-on-resize-if-it-is-allocated.patch @@ -0,0 +1,59 @@ +From d06aff1cb13d2a0d52b48e605462518149c98c81 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Sun, 10 Dec 2023 22:54:47 -0500 +Subject: tracing: Update snapshot buffer on resize if it is allocated + +From: Steven Rostedt (Google) + +commit d06aff1cb13d2a0d52b48e605462518149c98c81 upstream. + +The snapshot buffer is to mimic the main buffer so that when a snapshot is +needed, the snapshot and main buffer are swapped. When the snapshot buffer +is allocated, it is set to the minimal size that the ring buffer may be at +and still functional. When it is allocated it becomes the same size as the +main ring buffer, and when the main ring buffer changes in size, it should +do. + +Currently, the resize only updates the snapshot buffer if it's used by the +current tracer (ie. the preemptirqsoff tracer). But it needs to be updated +anytime it is allocated. + +When changing the size of the main buffer, instead of looking to see if +the current tracer is utilizing the snapshot buffer, just check if it is +allocated to know if it should be updated or not. + +Also fix typo in comment just above the code change. + +Link: https://lore.kernel.org/linux-trace-kernel/20231210225447.48476a6a@rorschach.local.home + +Cc: stable@vger.kernel.org +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Fixes: ad909e21bbe69 ("tracing: Add internal tracing_snapshot() functions") +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -5905,7 +5905,7 @@ static int __tracing_resize_ring_buffer( + if (!tr->array_buffer.buffer) + return 0; + +- /* Do not allow tracing while resizng ring buffer */ ++ /* Do not allow tracing while resizing ring buffer */ + tracing_stop_tr(tr); + + ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); +@@ -5913,7 +5913,7 @@ static int __tracing_resize_ring_buffer( + goto out_start; + + #ifdef CONFIG_TRACER_MAX_TRACE +- if (!tr->current_trace->use_max_tr) ++ if (!tr->allocated_snapshot) + goto out; + + ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); -- 2.47.3