]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 3 Jan 2024 10:28:53 +0000 (11:28 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 3 Jan 2024 10:28:53 +0000 (11:28 +0100)
added patches:
dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch
netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch
ring-buffer-fix-slowpath-of-interrupted-event.patch

queue-5.15/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch [new file with mode: 0644]
queue-5.15/netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch [new file with mode: 0644]
queue-5.15/ring-buffer-fix-slowpath-of-interrupted-event.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch b/queue-5.15/dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch
new file mode 100644 (file)
index 0000000..cc9a00f
--- /dev/null
@@ -0,0 +1,66 @@
+From b86f4b790c998afdbc88fe1aa55cfe89c4068726 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Tue, 5 Dec 2023 16:39:16 +0100
+Subject: dm-integrity: don't modify bio's immutable bio_vec in integrity_metadata()
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit b86f4b790c998afdbc88fe1aa55cfe89c4068726 upstream.
+
+__bio_for_each_segment assumes that the first struct bio_vec argument
+doesn't change - it calls "bio_advance_iter_single((bio), &(iter),
+(bvl).bv_len)" to advance the iterator. Unfortunately, the dm-integrity
+code changes the bio_vec with "bv.bv_len -= pos". When this code path
+is taken, the iterator would be out of sync and dm-integrity would
+report errors. This happens if the machine is out of memory and
+"kmalloc" fails.
+
+Fix this bug by making a copy of "bv" and changing the copy instead.
+
+Fixes: 7eada909bfd7 ("dm: add integrity target")
+Cc: stable@vger.kernel.org     # v4.12+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-integrity.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -1762,11 +1762,12 @@ static void integrity_metadata(struct wo
+               sectors_to_process = dio->range.n_sectors;
+               __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
++                      struct bio_vec bv_copy = bv;
+                       unsigned pos;
+                       char *mem, *checksums_ptr;
+ again:
+-                      mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
++                      mem = (char *)kmap_atomic(bv_copy.bv_page) + bv_copy.bv_offset;
+                       pos = 0;
+                       checksums_ptr = checksums;
+                       do {
+@@ -1775,7 +1776,7 @@ again:
+                               sectors_to_process -= ic->sectors_per_block;
+                               pos += ic->sectors_per_block << SECTOR_SHIFT;
+                               sector += ic->sectors_per_block;
+-                      } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
++                      } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack);
+                       kunmap_atomic(mem);
+                       r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
+@@ -1796,9 +1797,9 @@ again:
+                       if (!sectors_to_process)
+                               break;
+-                      if (unlikely(pos < bv.bv_len)) {
+-                              bv.bv_offset += pos;
+-                              bv.bv_len -= pos;
++                      if (unlikely(pos < bv_copy.bv_len)) {
++                              bv_copy.bv_offset += pos;
++                              bv_copy.bv_len -= pos;
+                               goto again;
+                       }
+               }
diff --git a/queue-5.15/netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch b/queue-5.15/netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch
new file mode 100644 (file)
index 0000000..6fa10ad
--- /dev/null
@@ -0,0 +1,33 @@
+From 7315dc1e122c85ffdfc8defffbb8f8b616c2eb1a Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 19 Dec 2023 19:44:49 +0100
+Subject: netfilter: nf_tables: skip set commit for deleted/destroyed sets
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 7315dc1e122c85ffdfc8defffbb8f8b616c2eb1a upstream.
+
+NFT_MSG_DELSET deactivates all elements in the set, skip
+set->ops->commit() to avoid the unnecessary clone (for the pipapo case)
+as well as the sync GC cycle, which could deactivate again expired
+elements in such set.
+
+Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
+Reported-by: Kevin Rich <kevinrich1337@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9256,7 +9256,7 @@ static void nft_set_commit_update(struct
+       list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+               list_del_init(&set->pending_update);
+-              if (!set->ops->commit)
++              if (!set->ops->commit || set->dead)
+                       continue;
+               set->ops->commit(set);
diff --git a/queue-5.15/ring-buffer-fix-slowpath-of-interrupted-event.patch b/queue-5.15/ring-buffer-fix-slowpath-of-interrupted-event.patch
new file mode 100644 (file)
index 0000000..0928f98
--- /dev/null
@@ -0,0 +1,272 @@
+From b803d7c664d55705831729d2f2e29c874bcd62ea Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Mon, 18 Dec 2023 23:07:12 -0500
+Subject: ring-buffer: Fix slowpath of interrupted event
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit b803d7c664d55705831729d2f2e29c874bcd62ea upstream.
+
+To synchronize the timestamps with the ring buffer reservation, there are
+two timestamps that are saved in the buffer meta data.
+
+1. before_stamp
+2. write_stamp
+
+When the two are equal, the write_stamp is considered valid, as in, it may
+be used to calculate the delta of the next event as the write_stamp is the
+timestamp of the previous reserved event on the buffer.
+
+This is done by the following:
+
+ /*A*/ w = current position on the ring buffer
+       before = before_stamp
+       after = write_stamp
+       ts = read current timestamp
+
+       if (before != after) {
+               write_stamp is not valid, force adding an absolute
+               timestamp.
+       }
+
+ /*B*/ before_stamp = ts
+
+ /*C*/ write = local_add_return(event length, position on ring buffer)
+
+       if (w == write - event length) {
+               /* Nothing interrupted between A and C */
+ /*E*/         write_stamp = ts;
+               delta = ts - after
+               /*
+                * If nothing interrupted again,
+                * before_stamp == write_stamp and write_stamp
+                * can be used to calculate the delta for
+                * events that come in after this one.
+                */
+       } else {
+
+               /*
+                * The slow path!
+                * Was interrupted between A and C.
+                */
+
+This is the place that there's a bug. We currently have:
+
+               after = write_stamp
+               ts = read current timestamp
+
+ /*F*/         if (write == current position on the ring buffer &&
+                   after < ts && cmpxchg(write_stamp, after, ts)) {
+
+                       delta = ts - after;
+
+               } else {
+                       delta = 0;
+               }
+
+The assumption is that if the current position on the ring buffer hasn't
+moved between C and F, then it also was not interrupted, and that the last
+event written has a timestamp that matches the write_stamp. That is the
+write_stamp is valid.
+
+But this may not be the case:
+
+If a task context event was interrupted by softirq between B and C.
+
+And the softirq wrote an event that got interrupted by a hard irq between
+C and E.
+
+and the hard irq wrote an event (does not need to be interrupted)
+
+We have:
+
+ /*B*/ before_stamp = ts of normal context
+
+   ---> interrupted by softirq
+
+       /*B*/ before_stamp = ts of softirq context
+
+         ---> interrupted by hardirq
+
+               /*B*/ before_stamp = ts of hard irq context
+               /*E*/ write_stamp = ts of hard irq context
+
+               /* matches and write_stamp valid */
+         <----
+
+       /*E*/ write_stamp = ts of softirq context
+
+       /* No longer matches before_stamp, write_stamp is not valid! */
+
+   <---
+
+ w != write - length, go to slow path
+
+// Right now the order of events in the ring buffer is:
+//
+// |-- softirq event --|-- hard irq event --|-- normal context event --|
+//
+
+ after = write_stamp (this is the ts of softirq)
+ ts = read current timestamp
+
+ if (write == current position on the ring buffer [true] &&
+     after < ts [true] && cmpxchg(write_stamp, after, ts) [true]) {
+
+       delta = ts - after  [Wrong!]
+
+The delta is to be between the hard irq event and the normal context
+event, but the above logic made the delta between the softirq event and
+the normal context event, where the hard irq event is between the two. This
+will shift all the remaining event timestamps on the sub-buffer
+incorrectly.
+
+The write_stamp is only valid if it matches the before_stamp. The cmpxchg
+does nothing to help this.
+
+Instead, the following logic can be done to fix this:
+
+       before = before_stamp
+       ts = read current timestamp
+       before_stamp = ts
+
+       after = write_stamp
+
+       if (write == current position on the ring buffer &&
+           after == before && after < ts) {
+
+               delta = ts - after
+
+       } else {
+               delta = 0;
+       }
+
+The above will only use the write_stamp if it still matches before_stamp
+and was tested to not have changed since C.
+
+As a bonus, with this logic we do not need any 64-bit cmpxchg() at all!
+
+This means the 32-bit rb_time_t workaround can finally be removed. But
+that's for a later time.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231218175229.58ec3daf@gandalf.local.home/
+Link: https://lore.kernel.org/linux-trace-kernel/20231218230712.3a76b081@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Fixes: dd93942570789 ("ring-buffer: Do not try to put back write_stamp")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/ring_buffer.c |   77 ++++++++++++++-------------------------------
+ 1 file changed, 24 insertions(+), 53 deletions(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -691,44 +691,6 @@ rb_time_read_cmpxchg(local_t *l, unsigne
+       return ret == expect;
+ }
+-static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
+-{
+-      unsigned long cnt, top, bottom;
+-      unsigned long cnt2, top2, bottom2;
+-      u64 val;
+-
+-      /* Any interruptions in this function should cause a failure */
+-      cnt = local_read(&t->cnt);
+-
+-      /* The cmpxchg always fails if it interrupted an update */
+-       if (!__rb_time_read(t, &val, &cnt2))
+-               return false;
+-
+-       if (val != expect)
+-               return false;
+-
+-       if ((cnt & 3) != cnt2)
+-               return false;
+-
+-       cnt2 = cnt + 1;
+-
+-       rb_time_split(val, &top, &bottom);
+-       top = rb_time_val_cnt(top, cnt);
+-       bottom = rb_time_val_cnt(bottom, cnt);
+-
+-       rb_time_split(set, &top2, &bottom2);
+-       top2 = rb_time_val_cnt(top2, cnt2);
+-       bottom2 = rb_time_val_cnt(bottom2, cnt2);
+-
+-      if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2))
+-              return false;
+-      if (!rb_time_read_cmpxchg(&t->top, top, top2))
+-              return false;
+-      if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2))
+-              return false;
+-      return true;
+-}
+-
+ #else /* 64 bits */
+ /* local64_t always succeeds */
+@@ -742,13 +704,6 @@ static void rb_time_set(rb_time_t *t, u6
+ {
+       local64_set(&t->time, val);
+ }
+-
+-static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
+-{
+-      u64 val;
+-      val = local64_cmpxchg(&t->time, expect, set);
+-      return val == expect;
+-}
+ #endif
+ /*
+@@ -3568,20 +3523,36 @@ __rb_reserve_next(struct ring_buffer_per
+       } else {
+               u64 ts;
+               /* SLOW PATH - Interrupted between A and C */
+-              a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
+-              /* Was interrupted before here, write_stamp must be valid */
++
++              /* Save the old before_stamp */
++              a_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
+               RB_WARN_ON(cpu_buffer, !a_ok);
++
++              /*
++               * Read a new timestamp and update the before_stamp to make
++               * the next event after this one force using an absolute
++               * timestamp. This is in case an interrupt were to come in
++               * between E and F.
++               */
+               ts = rb_time_stamp(cpu_buffer->buffer);
++              rb_time_set(&cpu_buffer->before_stamp, ts);
++
++              barrier();
++ /*E*/                a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
++              /* Was interrupted before here, write_stamp must be valid */
++              RB_WARN_ON(cpu_buffer, !a_ok);
+               barrier();
+- /*E*/                if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
+-                  info->after < ts &&
+-                  rb_time_cmpxchg(&cpu_buffer->write_stamp,
+-                                  info->after, ts)) {
+-                      /* Nothing came after this event between C and E */
++ /*F*/                if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
++                  info->after == info->before && info->after < ts) {
++                      /*
++                       * Nothing came after this event between C and F, it is
++                       * safe to use info->after for the delta as it
++                       * matched info->before and is still valid.
++                       */
+                       info->delta = ts - info->after;
+               } else {
+                       /*
+-                       * Interrupted between C and E:
++                       * Interrupted between C and F:
+                        * Lost the previous events time stamp. Just set the
+                        * delta to zero, and this will be the same time as
+                        * the event this event interrupted. And the events that
index 6b161482809009442f9e0f842aba74504ce9c07d..7b5635a315dde95652185284a378c42dc61858e0 100644 (file)
@@ -88,3 +88,6 @@ ring-buffer-fix-wake-ups-when-buffer_percent-is-set-to-100.patch
 tracing-fix-blocked-reader-of-snapshot-buffer.patch
 ring-buffer-remove-useless-update-to-write_stamp-in-rb_try_to_discard.patch
 ksmbd-fix-slab-out-of-bounds-in-smb_strndup_from_utf16.patch
+netfilter-nf_tables-skip-set-commit-for-deleted-destroyed-sets.patch
+ring-buffer-fix-slowpath-of-interrupted-event.patch
+dm-integrity-don-t-modify-bio-s-immutable-bio_vec-in-integrity_metadata.patch