]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Mar 2025 16:15:52 +0000 (17:15 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Mar 2025 16:15:52 +0000 (17:15 +0100)
added patches:
bpf-use-raw_spinlock_t-in-ringbuf.patch
hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch
zram-fix-null-pointer-in-comp_algorithm_show.patch

queue-6.6/bpf-use-raw_spinlock_t-in-ringbuf.patch [new file with mode: 0644]
queue-6.6/hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/zram-fix-null-pointer-in-comp_algorithm_show.patch [new file with mode: 0644]

diff --git a/queue-6.6/bpf-use-raw_spinlock_t-in-ringbuf.patch b/queue-6.6/bpf-use-raw_spinlock_t-in-ringbuf.patch
new file mode 100644 (file)
index 0000000..dee1993
--- /dev/null
@@ -0,0 +1,110 @@
+From 8b62645b09f870d70c7910e7550289d444239a46 Mon Sep 17 00:00:00 2001
+From: Wander Lairson Costa <wander.lairson@gmail.com>
+Date: Fri, 20 Sep 2024 16:06:59 -0300
+Subject: bpf: Use raw_spinlock_t in ringbuf
+
+From: Wander Lairson Costa <wander.lairson@gmail.com>
+
+commit 8b62645b09f870d70c7910e7550289d444239a46 upstream.
+
+The function __bpf_ringbuf_reserve is invoked from a tracepoint, which
+disables preemption. Using spinlock_t in this context can lead to a
+"sleep in atomic" warning in the RT variant. This issue is illustrated
+in the example below:
+
+BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
+in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 556208, name: test_progs
+preempt_count: 1, expected: 0
+RCU nest depth: 1, expected: 1
+INFO: lockdep is turned off.
+Preemption disabled at:
+[<ffffd33a5c88ea44>] migrate_enable+0xc0/0x39c
+CPU: 7 PID: 556208 Comm: test_progs Tainted: G
+Hardware name: Qualcomm SA8775P Ride (DT)
+Call trace:
+ dump_backtrace+0xac/0x130
+ show_stack+0x1c/0x30
+ dump_stack_lvl+0xac/0xe8
+ dump_stack+0x18/0x30
+ __might_resched+0x3bc/0x4fc
+ rt_spin_lock+0x8c/0x1a4
+ __bpf_ringbuf_reserve+0xc4/0x254
+ bpf_ringbuf_reserve_dynptr+0x5c/0xdc
+ bpf_prog_ac3d15160d62622a_test_read_write+0x104/0x238
+ trace_call_bpf+0x238/0x774
+ perf_call_bpf_enter.isra.0+0x104/0x194
+ perf_syscall_enter+0x2f8/0x510
+ trace_sys_enter+0x39c/0x564
+ syscall_trace_enter+0x220/0x3c0
+ do_el0_svc+0x138/0x1dc
+ el0_svc+0x54/0x130
+ el0t_64_sync_handler+0x134/0x150
+ el0t_64_sync+0x17c/0x180
+
+Switch the spinlock to raw_spinlock_t to avoid this error.
+
+Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it")
+Reported-by: Brian Grech <bgrech@redhat.com>
+Signed-off-by: Wander Lairson Costa <wander.lairson@gmail.com>
+Signed-off-by: Wander Lairson Costa <wander@redhat.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/r/20240920190700.617253-1-wander@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Jianqi Ren <jianqi.ren.cn@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+---
+ kernel/bpf/ringbuf.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/kernel/bpf/ringbuf.c
++++ b/kernel/bpf/ringbuf.c
+@@ -29,7 +29,7 @@ struct bpf_ringbuf {
+       u64 mask;
+       struct page **pages;
+       int nr_pages;
+-      spinlock_t spinlock ____cacheline_aligned_in_smp;
++      raw_spinlock_t spinlock ____cacheline_aligned_in_smp;
+       /* For user-space producer ring buffers, an atomic_t busy bit is used
+        * to synchronize access to the ring buffers in the kernel, rather than
+        * the spinlock that is used for kernel-producer ring buffers. This is
+@@ -173,7 +173,7 @@ static struct bpf_ringbuf *bpf_ringbuf_a
+       if (!rb)
+               return NULL;
+-      spin_lock_init(&rb->spinlock);
++      raw_spin_lock_init(&rb->spinlock);
+       atomic_set(&rb->busy, 0);
+       init_waitqueue_head(&rb->waitq);
+       init_irq_work(&rb->work, bpf_ringbuf_notify);
+@@ -417,10 +417,10 @@ static void *__bpf_ringbuf_reserve(struc
+       cons_pos = smp_load_acquire(&rb->consumer_pos);
+       if (in_nmi()) {
+-              if (!spin_trylock_irqsave(&rb->spinlock, flags))
++              if (!raw_spin_trylock_irqsave(&rb->spinlock, flags))
+                       return NULL;
+       } else {
+-              spin_lock_irqsave(&rb->spinlock, flags);
++              raw_spin_lock_irqsave(&rb->spinlock, flags);
+       }
+       pend_pos = rb->pending_pos;
+@@ -446,7 +446,7 @@ static void *__bpf_ringbuf_reserve(struc
+        */
+       if (new_prod_pos - cons_pos > rb->mask ||
+           new_prod_pos - pend_pos > rb->mask) {
+-              spin_unlock_irqrestore(&rb->spinlock, flags);
++              raw_spin_unlock_irqrestore(&rb->spinlock, flags);
+               return NULL;
+       }
+@@ -458,7 +458,7 @@ static void *__bpf_ringbuf_reserve(struc
+       /* pairs with consumer's smp_load_acquire() */
+       smp_store_release(&rb->producer_pos, new_prod_pos);
+-      spin_unlock_irqrestore(&rb->spinlock, flags);
++      raw_spin_unlock_irqrestore(&rb->spinlock, flags);
+       return (void *)hdr + BPF_RINGBUF_HDR_SZ;
+ }
diff --git a/queue-6.6/hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch b/queue-6.6/hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch
new file mode 100644 (file)
index 0000000..89d2f69
--- /dev/null
@@ -0,0 +1,167 @@
+From ed4fb6d7ef68111bb539283561953e5c6e9a6e38 Mon Sep 17 00:00:00 2001
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+Date: Wed, 14 Aug 2024 14:10:32 +0200
+Subject: hrtimer: Use and report correct timerslack values for realtime tasks
+
+From: Felix Moessbauer <felix.moessbauer@siemens.com>
+
+commit ed4fb6d7ef68111bb539283561953e5c6e9a6e38 upstream.
+
+The timerslack_ns setting is used to specify how much the hardware
+timers should be delayed, to potentially dispatch multiple timers in a
+single interrupt. This is a performance optimization. Timers of
+realtime tasks (having a realtime scheduling policy) should not be
+delayed.
+
+This logic was inconsitently applied to the hrtimers, leading to delays
+of realtime tasks which used timed waits for events (e.g. condition
+variables). Due to the downstream override of the slack for rt tasks,
+the procfs reported incorrect (non-zero) timerslack_ns values.
+
+This is changed by setting the timer_slack_ns task attribute to 0 for
+all tasks with a rt policy. By that, downstream users do not need to
+specially handle rt tasks (w.r.t. the slack), and the procfs entry
+shows the correct value of "0". Setting non-zero slack values (either
+via procfs or PR_SET_TIMERSLACK) on tasks with a rt policy is ignored,
+as stated in "man 2 PR_SET_TIMERSLACK":
+
+  Timer slack is not applied to threads that are scheduled under a
+  real-time scheduling policy (see sched_setscheduler(2)).
+
+The special handling of timerslack on rt tasks in downstream users
+is removed as well.
+
+Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/all/20240814121032.368444-2-felix.moessbauer@siemens.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/proc/base.c        |    9 +++++----
+ fs/select.c           |   11 ++++-------
+ kernel/sched/core.c   |    8 ++++++++
+ kernel/sys.c          |    2 ++
+ kernel/time/hrtimer.c |   18 +++---------------
+ 5 files changed, 22 insertions(+), 26 deletions(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -2633,10 +2633,11 @@ static ssize_t timerslack_ns_write(struc
+       }
+       task_lock(p);
+-      if (slack_ns == 0)
+-              p->timer_slack_ns = p->default_timer_slack_ns;
+-      else
+-              p->timer_slack_ns = slack_ns;
++      if (task_is_realtime(p))
++              slack_ns = 0;
++      else if (slack_ns == 0)
++              slack_ns = p->default_timer_slack_ns;
++      p->timer_slack_ns = slack_ns;
+       task_unlock(p);
+ out:
+--- a/fs/select.c
++++ b/fs/select.c
+@@ -77,19 +77,16 @@ u64 select_estimate_accuracy(struct time
+ {
+       u64 ret;
+       struct timespec64 now;
++      u64 slack = current->timer_slack_ns;
+-      /*
+-       * Realtime tasks get a slack of 0 for obvious reasons.
+-       */
+-
+-      if (rt_task(current))
++      if (slack == 0)
+               return 0;
+       ktime_get_ts64(&now);
+       now = timespec64_sub(*tv, now);
+       ret = __estimate_accuracy(&now);
+-      if (ret < current->timer_slack_ns)
+-              return current->timer_slack_ns;
++      if (ret < slack)
++              return slack;
+       return ret;
+ }
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7530,6 +7530,14 @@ static void __setscheduler_params(struct
+       else if (fair_policy(policy))
+               p->static_prio = NICE_TO_PRIO(attr->sched_nice);
++      /* rt-policy tasks do not have a timerslack */
++      if (task_is_realtime(p)) {
++              p->timer_slack_ns = 0;
++      } else if (p->timer_slack_ns == 0) {
++              /* when switching back to non-rt policy, restore timerslack */
++              p->timer_slack_ns = p->default_timer_slack_ns;
++      }
++
+       /*
+        * __sched_setscheduler() ensures attr->sched_priority == 0 when
+        * !rt_policy. Always setting this ensures that things like
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -2535,6 +2535,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsi
+                       error = current->timer_slack_ns;
+               break;
+       case PR_SET_TIMERSLACK:
++              if (task_is_realtime(current))
++                      break;
+               if (arg2 <= 0)
+                       current->timer_slack_ns =
+                                       current->default_timer_slack_ns;
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -2093,14 +2093,9 @@ long hrtimer_nanosleep(ktime_t rqtp, con
+       struct restart_block *restart;
+       struct hrtimer_sleeper t;
+       int ret = 0;
+-      u64 slack;
+-
+-      slack = current->timer_slack_ns;
+-      if (rt_task(current))
+-              slack = 0;
+       hrtimer_init_sleeper_on_stack(&t, clockid, mode);
+-      hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
++      hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns);
+       ret = do_nanosleep(&t, mode);
+       if (ret != -ERESTART_RESTARTBLOCK)
+               goto out;
+@@ -2281,7 +2276,7 @@ void __init hrtimers_init(void)
+ /**
+  * schedule_hrtimeout_range_clock - sleep until timeout
+  * @expires:  timeout value (ktime_t)
+- * @delta:    slack in expires timeout (ktime_t) for SCHED_OTHER tasks
++ * @delta:    slack in expires timeout (ktime_t)
+  * @mode:     timer mode
+  * @clock_id: timer clock to be used
+  */
+@@ -2308,13 +2303,6 @@ schedule_hrtimeout_range_clock(ktime_t *
+               return -EINTR;
+       }
+-      /*
+-       * Override any slack passed by the user if under
+-       * rt contraints.
+-       */
+-      if (rt_task(current))
+-              delta = 0;
+-
+       hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
+       hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
+       hrtimer_sleeper_start_expires(&t, mode);
+@@ -2334,7 +2322,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_ran
+ /**
+  * schedule_hrtimeout_range - sleep until timeout
+  * @expires:  timeout value (ktime_t)
+- * @delta:    slack in expires timeout (ktime_t) for SCHED_OTHER tasks
++ * @delta:    slack in expires timeout (ktime_t)
+  * @mode:     timer mode
+  *
+  * Make the current task sleep until the given expiry time has
index a47ea14d1139a877a4f2cb7669373c833579ad6e..8b9c56808abbb3cf49e3763fab5977264b66e358 100644 (file)
@@ -1,2 +1,5 @@
 clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch
 sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch
+zram-fix-null-pointer-in-comp_algorithm_show.patch
+hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch
+bpf-use-raw_spinlock_t-in-ringbuf.patch
diff --git a/queue-6.6/zram-fix-null-pointer-in-comp_algorithm_show.patch b/queue-6.6/zram-fix-null-pointer-in-comp_algorithm_show.patch
new file mode 100644 (file)
index 0000000..5ec5784
--- /dev/null
@@ -0,0 +1,89 @@
+From f364cdeb38938f9d03061682b8ff3779dd1730e5 Mon Sep 17 00:00:00 2001
+From: Liu Shixin <liushixin2@huawei.com>
+Date: Fri, 8 Nov 2024 18:01:47 +0800
+Subject: zram: fix NULL pointer in comp_algorithm_show()
+
+From: Liu Shixin <liushixin2@huawei.com>
+
+commit f364cdeb38938f9d03061682b8ff3779dd1730e5 upstream.
+
+LTP reported a NULL pointer dereference as followed:
+
+ CPU: 7 UID: 0 PID: 5995 Comm: cat Kdump: loaded Not tainted 6.12.0-rc6+ #3
+ Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
+ pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+ pc : __pi_strcmp+0x24/0x140
+ lr : zcomp_available_show+0x60/0x100 [zram]
+ sp : ffff800088b93b90
+ x29: ffff800088b93b90 x28: 0000000000000001 x27: 0000000000400cc0
+ x26: 0000000000000ffe x25: ffff80007b3e2388 x24: 0000000000000000
+ x23: ffff80007b3e2390 x22: ffff0004041a9000 x21: ffff80007b3e2900
+ x20: 0000000000000000 x19: 0000000000000000 x18: 0000000000000000
+ x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
+ x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
+ x11: 0000000000000000 x10: ffff80007b3e2900 x9 : ffff80007b3cb280
+ x8 : 0101010101010101 x7 : 0000000000000000 x6 : 0000000000000000
+ x5 : 0000000000000040 x4 : 0000000000000000 x3 : 00656c722d6f7a6c
+ x2 : 0000000000000000 x1 : ffff80007b3e2900 x0 : 0000000000000000
+ Call trace:
+  __pi_strcmp+0x24/0x140
+  comp_algorithm_show+0x40/0x70 [zram]
+  dev_attr_show+0x28/0x80
+  sysfs_kf_seq_show+0x90/0x140
+  kernfs_seq_show+0x34/0x48
+  seq_read_iter+0x1d4/0x4e8
+  kernfs_fop_read_iter+0x40/0x58
+  new_sync_read+0x9c/0x168
+  vfs_read+0x1a8/0x1f8
+  ksys_read+0x74/0x108
+  __arm64_sys_read+0x24/0x38
+  invoke_syscall+0x50/0x120
+  el0_svc_common.constprop.0+0xc8/0xf0
+  do_el0_svc+0x24/0x38
+  el0_svc+0x38/0x138
+  el0t_64_sync_handler+0xc0/0xc8
+  el0t_64_sync+0x188/0x190
+
+The zram->comp_algs[ZRAM_PRIMARY_COMP] can be NULL in zram_add() if
+comp_algorithm_set() has not been called.  User can access the zram device
+by sysfs after device_add_disk(), so there is a time window to trigger the
+NULL pointer dereference.  Move it ahead device_add_disk() to make sure
+when user can access the zram device, it is ready.  comp_algorithm_set()
+is protected by zram->init_lock in other places and no such problem.
+
+Link: https://lkml.kernel.org/r/20241108100147.3776123-1-liushixin2@huawei.com
+Fixes: 7ac07a26dea7 ("zram: preparation for multi-zcomp support")
+Signed-off-by: Liu Shixin <liushixin2@huawei.com>
+Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Minchan Kim <minchan@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[This fix does not backport zram_comp_params_reset which was introduced after
+ v6.6, in commit f2bac7ad187d ("zram: introduce zcomp_params structure")]
+Signed-off-by: Jianqi Ren <jianqi.ren.cn@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/zram/zram_drv.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -2247,6 +2247,8 @@ static int zram_add(void)
+       zram->disk->private_data = zram;
+       snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
++      comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
++
+       /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
+       set_capacity(zram->disk, 0);
+       /* zram devices sort of resembles non-rotational disks */
+@@ -2281,8 +2283,6 @@ static int zram_add(void)
+       if (ret)
+               goto out_cleanup_disk;
+-      comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
+-
+       zram_debugfs_register(zram);
+       pr_info("Added device: %s\n", zram->disk->disk_name);
+       return device_id;