From 8fd763d54475ec924fde5fa440b65e9bd4edc831 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Jan 2024 11:28:53 +0100 Subject: [PATCH] 5.15-stable patches added patches: dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch ring-buffer-fix-slowpath-of-interrupted-event.patch --- ...utable-bio_vec-in-integrity_metadata.patch | 66 +++++ ...et-commit-for-deleted-destroyed-sets.patch | 33 +++ ...er-fix-slowpath-of-interrupted-event.patch | 272 ++++++++++++++++++ queue-5.15/series | 3 + 4 files changed, 374 insertions(+) create mode 100644 queue-5.15/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch create mode 100644 queue-5.15/netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch create mode 100644 queue-5.15/ring-buffer-fix-slowpath-of-interrupted-event.patch diff --git a/queue-5.15/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch b/queue-5.15/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch new file mode 100644 index 00000000000..cc9a00fe7f0 --- /dev/null +++ b/queue-5.15/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch @@ -0,0 +1,66 @@ +From b86f4b790c998afdbc88fe1aa55cfe89c4068726 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Tue, 5 Dec 2023 16:39:16 +0100 +Subject: dm-integrity: don't modify bio's immutable bio_vec in integrity_metadata() + +From: Mikulas Patocka + +commit b86f4b790c998afdbc88fe1aa55cfe89c4068726 upstream. + +__bio_for_each_segment assumes that the first struct bio_vec argument +doesn't change - it calls "bio_advance_iter_single((bio), &(iter), +(bvl).bv_len)" to advance the iterator. Unfortunately, the dm-integrity +code changes the bio_vec with "bv.bv_len -= pos". When this code path +is taken, the iterator would be out of sync and dm-integrity would +report errors. This happens if the machine is out of memory and +"kmalloc" fails. + +Fix this bug by making a copy of "bv" and changing the copy instead. + +Fixes: 7eada909bfd7 ("dm: add integrity target") +Cc: stable@vger.kernel.org # v4.12+ +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-integrity.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -1762,11 +1762,12 @@ static void integrity_metadata(struct wo + sectors_to_process = dio->range.n_sectors; + + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { ++ struct bio_vec bv_copy = bv; + unsigned pos; + char *mem, *checksums_ptr; + + again: +- mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset; ++ mem = (char *)kmap_atomic(bv_copy.bv_page) + bv_copy.bv_offset; + pos = 0; + checksums_ptr = checksums; + do { +@@ -1775,7 +1776,7 @@ again: + sectors_to_process -= ic->sectors_per_block; + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; +- } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); ++ } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack); + kunmap_atomic(mem); + + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, +@@ -1796,9 +1797,9 @@ again: + if (!sectors_to_process) + break; + +- if (unlikely(pos < bv.bv_len)) { +- bv.bv_offset += pos; +- bv.bv_len -= pos; ++ if (unlikely(pos < bv_copy.bv_len)) { ++ bv_copy.bv_offset += pos; ++ bv_copy.bv_len -= pos; + goto again; + } + } diff --git a/queue-5.15/netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch b/queue-5.15/netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch new file mode 100644 index 00000000000..6fa10ad02ec --- /dev/null +++ b/queue-5.15/netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch @@ -0,0 +1,33 @@ +From 7315dc1e122c85ffdfc8defffbb8f8b616c2eb1a Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso +Date: Tue, 19 Dec 2023 19:44:49 +0100 +Subject: netfilter: nf_tables: skip set commit for deleted/destroyed sets + +From: Pablo Neira Ayuso + +commit 7315dc1e122c85ffdfc8defffbb8f8b616c2eb1a upstream. + +NFT_MSG_DELSET deactivates all elements in the set, skip +set->ops->commit() to avoid the unnecessary clone (for the pipapo case) +as well as the sync GC cycle, which could deactivate again expired +elements in such set. + +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Reported-by: Kevin Rich +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -9256,7 +9256,7 @@ static void nft_set_commit_update(struct + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + +- if (!set->ops->commit) ++ if (!set->ops->commit || set->dead) + continue; + + set->ops->commit(set); diff --git a/queue-5.15/ring-buffer-fix-slowpath-of-interrupted-event.patch b/queue-5.15/ring-buffer-fix-slowpath-of-interrupted-event.patch new file mode 100644 index 00000000000..0928f989ad2 --- /dev/null +++ b/queue-5.15/ring-buffer-fix-slowpath-of-interrupted-event.patch @@ -0,0 +1,272 @@ +From b803d7c664d55705831729d2f2e29c874bcd62ea Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Mon, 18 Dec 2023 23:07:12 -0500 +Subject: ring-buffer: Fix slowpath of interrupted event + +From: Steven Rostedt (Google) + +commit b803d7c664d55705831729d2f2e29c874bcd62ea upstream. + +To synchronize the timestamps with the ring buffer reservation, there are +two timestamps that are saved in the buffer meta data. + +1. before_stamp +2. write_stamp + +When the two are equal, the write_stamp is considered valid, as in, it may +be used to calculate the delta of the next event as the write_stamp is the +timestamp of the previous reserved event on the buffer. + +This is done by the following: + + /*A*/ w = current position on the ring buffer + before = before_stamp + after = write_stamp + ts = read current timestamp + + if (before != after) { + write_stamp is not valid, force adding an absolute + timestamp. + } + + /*B*/ before_stamp = ts + + /*C*/ write = local_add_return(event length, position on ring buffer) + + if (w == write - event length) { + /* Nothing interrupted between A and C */ + /*E*/ write_stamp = ts; + delta = ts - after + /* + * If nothing interrupted again, + * before_stamp == write_stamp and write_stamp + * can be used to calculate the delta for + * events that come in after this one. + */ + } else { + + /* + * The slow path! + * Was interrupted between A and C. + */ + +This is the place that there's a bug. We currently have: + + after = write_stamp + ts = read current timestamp + + /*F*/ if (write == current position on the ring buffer && + after < ts && cmpxchg(write_stamp, after, ts)) { + + delta = ts - after; + + } else { + delta = 0; + } + +The assumption is that if the current position on the ring buffer hasn't +moved between C and F, then it also was not interrupted, and that the last +event written has a timestamp that matches the write_stamp. That is the +write_stamp is valid. + +But this may not be the case: + +If a task context event was interrupted by softirq between B and C. + +And the softirq wrote an event that got interrupted by a hard irq between +C and E. + +and the hard irq wrote an event (does not need to be interrupted) + +We have: + + /*B*/ before_stamp = ts of normal context + + ---> interrupted by softirq + + /*B*/ before_stamp = ts of softirq context + + ---> interrupted by hardirq + + /*B*/ before_stamp = ts of hard irq context + /*E*/ write_stamp = ts of hard irq context + + /* matches and write_stamp valid */ + <---- + + /*E*/ write_stamp = ts of softirq context + + /* No longer matches before_stamp, write_stamp is not valid! */ + + <--- + + w != write - length, go to slow path + +// Right now the order of events in the ring buffer is: +// +// |-- softirq event --|-- hard irq event --|-- normal context event --| +// + + after = write_stamp (this is the ts of softirq) + ts = read current timestamp + + if (write == current position on the ring buffer [true] && + after < ts [true] && cmpxchg(write_stamp, after, ts) [true]) { + + delta = ts - after [Wrong!] + +The delta is to be between the hard irq event and the normal context +event, but the above logic made the delta between the softirq event and +the normal context event, where the hard irq event is between the two. This +will shift all the remaining event timestamps on the sub-buffer +incorrectly. + +The write_stamp is only valid if it matches the before_stamp. The cmpxchg +does nothing to help this. + +Instead, the following logic can be done to fix this: + + before = before_stamp + ts = read current timestamp + before_stamp = ts + + after = write_stamp + + if (write == current position on the ring buffer && + after == before && after < ts) { + + delta = ts - after + + } else { + delta = 0; + } + +The above will only use the write_stamp if it still matches before_stamp +and was tested to not have changed since C. + +As a bonus, with this logic we do not need any 64-bit cmpxchg() at all! + +This means the 32-bit rb_time_t workaround can finally be removed. But +that's for a later time. + +Link: https://lore.kernel.org/linux-trace-kernel/20231218175229.58ec3daf@gandalf.local.home/ +Link: https://lore.kernel.org/linux-trace-kernel/20231218230712.3a76b081@gandalf.local.home + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Linus Torvalds +Fixes: dd93942570789 ("ring-buffer: Do not try to put back write_stamp") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 77 ++++++++++++++------------------------------- + 1 file changed, 24 insertions(+), 53 deletions(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -691,44 +691,6 @@ rb_time_read_cmpxchg(local_t *l, unsigne + return ret == expect; + } + +-static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) +-{ +- unsigned long cnt, top, bottom; +- unsigned long cnt2, top2, bottom2; +- u64 val; +- +- /* Any interruptions in this function should cause a failure */ +- cnt = local_read(&t->cnt); +- +- /* The cmpxchg always fails if it interrupted an update */ +- if (!__rb_time_read(t, &val, &cnt2)) +- return false; +- +- if (val != expect) +- return false; +- +- if ((cnt & 3) != cnt2) +- return false; +- +- cnt2 = cnt + 1; +- +- rb_time_split(val, &top, &bottom); +- top = rb_time_val_cnt(top, cnt); +- bottom = rb_time_val_cnt(bottom, cnt); +- +- rb_time_split(set, &top2, &bottom2); +- top2 = rb_time_val_cnt(top2, cnt2); +- bottom2 = rb_time_val_cnt(bottom2, cnt2); +- +- if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2)) +- return false; +- if (!rb_time_read_cmpxchg(&t->top, top, top2)) +- return false; +- if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2)) +- return false; +- return true; +-} +- + #else /* 64 bits */ + + /* local64_t always succeeds */ +@@ -742,13 +704,6 @@ static void rb_time_set(rb_time_t *t, u6 + { + local64_set(&t->time, val); + } +- +-static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) +-{ +- u64 val; +- val = local64_cmpxchg(&t->time, expect, set); +- return val == expect; +-} + #endif + + /* +@@ -3568,20 +3523,36 @@ __rb_reserve_next(struct ring_buffer_per + } else { + u64 ts; + /* SLOW PATH - Interrupted between A and C */ +- a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); +- /* Was interrupted before here, write_stamp must be valid */ ++ ++ /* Save the old before_stamp */ ++ a_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); + RB_WARN_ON(cpu_buffer, !a_ok); ++ ++ /* ++ * Read a new timestamp and update the before_stamp to make ++ * the next event after this one force using an absolute ++ * timestamp. This is in case an interrupt were to come in ++ * between E and F. ++ */ + ts = rb_time_stamp(cpu_buffer->buffer); ++ rb_time_set(&cpu_buffer->before_stamp, ts); ++ ++ barrier(); ++ /*E*/ a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); ++ /* Was interrupted before here, write_stamp must be valid */ ++ RB_WARN_ON(cpu_buffer, !a_ok); + barrier(); +- /*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && +- info->after < ts && +- rb_time_cmpxchg(&cpu_buffer->write_stamp, +- info->after, ts)) { +- /* Nothing came after this event between C and E */ ++ /*F*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && ++ info->after == info->before && info->after < ts) { ++ /* ++ * Nothing came after this event between C and F, it is ++ * safe to use info->after for the delta as it ++ * matched info->before and is still valid. ++ */ + info->delta = ts - info->after; + } else { + /* +- * Interrupted between C and E: ++ * Interrupted between C and F: + * Lost the previous events time stamp. Just set the + * delta to zero, and this will be the same time as + * the event this event interrupted. And the events that diff --git a/queue-5.15/series b/queue-5.15/series index 6b161482809..7b5635a315d 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -88,3 +88,6 @@ ring-buffer-fix-wake-ups-when-buffer_percent-is-set-to-100.patch tracing-fix-blocked-reader-of-snapshot-buffer.patch ring-buffer-remove-useless-update-to-write_stamp-in-rb_try_to_discard.patch ksmbd-fix-slab-out-of-bounds-in-smb_strndup_from_utf16.patch +netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch +ring-buffer-fix-slowpath-of-interrupted-event.patch +dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch -- 2.47.3