From: Greg Kroah-Hartman Date: Thu, 13 Mar 2025 16:15:52 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v6.6.84~58 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=09d7fdc521c9c8a4bad2909ace7317f67616de1b;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: bpf-use-raw_spinlock_t-in-ringbuf.patch hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch zram-fix-null-pointer-in-comp_algorithm_show.patch --- diff --git a/queue-6.6/bpf-use-raw_spinlock_t-in-ringbuf.patch b/queue-6.6/bpf-use-raw_spinlock_t-in-ringbuf.patch new file mode 100644 index 0000000000..dee1993d80 --- /dev/null +++ b/queue-6.6/bpf-use-raw_spinlock_t-in-ringbuf.patch @@ -0,0 +1,110 @@ +From 8b62645b09f870d70c7910e7550289d444239a46 Mon Sep 17 00:00:00 2001 +From: Wander Lairson Costa +Date: Fri, 20 Sep 2024 16:06:59 -0300 +Subject: bpf: Use raw_spinlock_t in ringbuf + +From: Wander Lairson Costa + +commit 8b62645b09f870d70c7910e7550289d444239a46 upstream. + +The function __bpf_ringbuf_reserve is invoked from a tracepoint, which +disables preemption. Using spinlock_t in this context can lead to a +"sleep in atomic" warning in the RT variant. This issue is illustrated +in the example below: + +BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 +in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 556208, name: test_progs +preempt_count: 1, expected: 0 +RCU nest depth: 1, expected: 1 +INFO: lockdep is turned off. +Preemption disabled at: +[] migrate_enable+0xc0/0x39c +CPU: 7 PID: 556208 Comm: test_progs Tainted: G +Hardware name: Qualcomm SA8775P Ride (DT) +Call trace: + dump_backtrace+0xac/0x130 + show_stack+0x1c/0x30 + dump_stack_lvl+0xac/0xe8 + dump_stack+0x18/0x30 + __might_resched+0x3bc/0x4fc + rt_spin_lock+0x8c/0x1a4 + __bpf_ringbuf_reserve+0xc4/0x254 + bpf_ringbuf_reserve_dynptr+0x5c/0xdc + bpf_prog_ac3d15160d62622a_test_read_write+0x104/0x238 + trace_call_bpf+0x238/0x774 + perf_call_bpf_enter.isra.0+0x104/0x194 + perf_syscall_enter+0x2f8/0x510 + trace_sys_enter+0x39c/0x564 + syscall_trace_enter+0x220/0x3c0 + do_el0_svc+0x138/0x1dc + el0_svc+0x54/0x130 + el0t_64_sync_handler+0x134/0x150 + el0t_64_sync+0x17c/0x180 + +Switch the spinlock to raw_spinlock_t to avoid this error. + +Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") +Reported-by: Brian Grech +Signed-off-by: Wander Lairson Costa +Signed-off-by: Wander Lairson Costa +Signed-off-by: Daniel Borkmann +Acked-by: Daniel Borkmann +Link: https://lore.kernel.org/r/20240920190700.617253-1-wander@redhat.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Jianqi Ren +Signed-off-by: He Zhe +--- + kernel/bpf/ringbuf.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/kernel/bpf/ringbuf.c ++++ b/kernel/bpf/ringbuf.c +@@ -29,7 +29,7 @@ struct bpf_ringbuf { + u64 mask; + struct page **pages; + int nr_pages; +- spinlock_t spinlock ____cacheline_aligned_in_smp; ++ raw_spinlock_t spinlock ____cacheline_aligned_in_smp; + /* For user-space producer ring buffers, an atomic_t busy bit is used + * to synchronize access to the ring buffers in the kernel, rather than + * the spinlock that is used for kernel-producer ring buffers. This is +@@ -173,7 +173,7 @@ static struct bpf_ringbuf *bpf_ringbuf_a + if (!rb) + return NULL; + +- spin_lock_init(&rb->spinlock); ++ raw_spin_lock_init(&rb->spinlock); + atomic_set(&rb->busy, 0); + init_waitqueue_head(&rb->waitq); + init_irq_work(&rb->work, bpf_ringbuf_notify); +@@ -417,10 +417,10 @@ static void *__bpf_ringbuf_reserve(struc + cons_pos = smp_load_acquire(&rb->consumer_pos); + + if (in_nmi()) { +- if (!spin_trylock_irqsave(&rb->spinlock, flags)) ++ if (!raw_spin_trylock_irqsave(&rb->spinlock, flags)) + return NULL; + } else { +- spin_lock_irqsave(&rb->spinlock, flags); ++ raw_spin_lock_irqsave(&rb->spinlock, flags); + } + + pend_pos = rb->pending_pos; +@@ -446,7 +446,7 @@ static void *__bpf_ringbuf_reserve(struc + */ + if (new_prod_pos - cons_pos > rb->mask || + new_prod_pos - pend_pos > rb->mask) { +- spin_unlock_irqrestore(&rb->spinlock, flags); ++ raw_spin_unlock_irqrestore(&rb->spinlock, flags); + return NULL; + } + +@@ -458,7 +458,7 @@ static void *__bpf_ringbuf_reserve(struc + /* pairs with consumer's smp_load_acquire() */ + smp_store_release(&rb->producer_pos, new_prod_pos); + +- spin_unlock_irqrestore(&rb->spinlock, flags); ++ raw_spin_unlock_irqrestore(&rb->spinlock, flags); + + return (void *)hdr + BPF_RINGBUF_HDR_SZ; + } diff --git a/queue-6.6/hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch b/queue-6.6/hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch new file mode 100644 index 0000000000..89d2f695cb --- /dev/null +++ b/queue-6.6/hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch @@ -0,0 +1,167 @@ +From ed4fb6d7ef68111bb539283561953e5c6e9a6e38 Mon Sep 17 00:00:00 2001 +From: Felix Moessbauer +Date: Wed, 14 Aug 2024 14:10:32 +0200 +Subject: hrtimer: Use and report correct timerslack values for realtime tasks + +From: Felix Moessbauer + +commit ed4fb6d7ef68111bb539283561953e5c6e9a6e38 upstream. + +The timerslack_ns setting is used to specify how much the hardware +timers should be delayed, to potentially dispatch multiple timers in a +single interrupt. This is a performance optimization. Timers of +realtime tasks (having a realtime scheduling policy) should not be +delayed. + +This logic was inconsitently applied to the hrtimers, leading to delays +of realtime tasks which used timed waits for events (e.g. condition +variables). Due to the downstream override of the slack for rt tasks, +the procfs reported incorrect (non-zero) timerslack_ns values. + +This is changed by setting the timer_slack_ns task attribute to 0 for +all tasks with a rt policy. By that, downstream users do not need to +specially handle rt tasks (w.r.t. the slack), and the procfs entry +shows the correct value of "0". Setting non-zero slack values (either +via procfs or PR_SET_TIMERSLACK) on tasks with a rt policy is ignored, +as stated in "man 2 PR_SET_TIMERSLACK": + + Timer slack is not applied to threads that are scheduled under a + real-time scheduling policy (see sched_setscheduler(2)). + +The special handling of timerslack on rt tasks in downstream users +is removed as well. + +Signed-off-by: Felix Moessbauer +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/all/20240814121032.368444-2-felix.moessbauer@siemens.com +Signed-off-by: Greg Kroah-Hartman +--- + fs/proc/base.c | 9 +++++---- + fs/select.c | 11 ++++------- + kernel/sched/core.c | 8 ++++++++ + kernel/sys.c | 2 ++ + kernel/time/hrtimer.c | 18 +++--------------- + 5 files changed, 22 insertions(+), 26 deletions(-) + +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -2633,10 +2633,11 @@ static ssize_t timerslack_ns_write(struc + } + + task_lock(p); +- if (slack_ns == 0) +- p->timer_slack_ns = p->default_timer_slack_ns; +- else +- p->timer_slack_ns = slack_ns; ++ if (task_is_realtime(p)) ++ slack_ns = 0; ++ else if (slack_ns == 0) ++ slack_ns = p->default_timer_slack_ns; ++ p->timer_slack_ns = slack_ns; + task_unlock(p); + + out: +--- a/fs/select.c ++++ b/fs/select.c +@@ -77,19 +77,16 @@ u64 select_estimate_accuracy(struct time + { + u64 ret; + struct timespec64 now; ++ u64 slack = current->timer_slack_ns; + +- /* +- * Realtime tasks get a slack of 0 for obvious reasons. +- */ +- +- if (rt_task(current)) ++ if (slack == 0) + return 0; + + ktime_get_ts64(&now); + now = timespec64_sub(*tv, now); + ret = __estimate_accuracy(&now); +- if (ret < current->timer_slack_ns) +- return current->timer_slack_ns; ++ if (ret < slack) ++ return slack; + return ret; + } + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -7530,6 +7530,14 @@ static void __setscheduler_params(struct + else if (fair_policy(policy)) + p->static_prio = NICE_TO_PRIO(attr->sched_nice); + ++ /* rt-policy tasks do not have a timerslack */ ++ if (task_is_realtime(p)) { ++ p->timer_slack_ns = 0; ++ } else if (p->timer_slack_ns == 0) { ++ /* when switching back to non-rt policy, restore timerslack */ ++ p->timer_slack_ns = p->default_timer_slack_ns; ++ } ++ + /* + * __sched_setscheduler() ensures attr->sched_priority == 0 when + * !rt_policy. Always setting this ensures that things like +--- a/kernel/sys.c ++++ b/kernel/sys.c +@@ -2535,6 +2535,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsi + error = current->timer_slack_ns; + break; + case PR_SET_TIMERSLACK: ++ if (task_is_realtime(current)) ++ break; + if (arg2 <= 0) + current->timer_slack_ns = + current->default_timer_slack_ns; +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -2093,14 +2093,9 @@ long hrtimer_nanosleep(ktime_t rqtp, con + struct restart_block *restart; + struct hrtimer_sleeper t; + int ret = 0; +- u64 slack; +- +- slack = current->timer_slack_ns; +- if (rt_task(current)) +- slack = 0; + + hrtimer_init_sleeper_on_stack(&t, clockid, mode); +- hrtimer_set_expires_range_ns(&t.timer, rqtp, slack); ++ hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); + ret = do_nanosleep(&t, mode); + if (ret != -ERESTART_RESTARTBLOCK) + goto out; +@@ -2281,7 +2276,7 @@ void __init hrtimers_init(void) + /** + * schedule_hrtimeout_range_clock - sleep until timeout + * @expires: timeout value (ktime_t) +- * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks ++ * @delta: slack in expires timeout (ktime_t) + * @mode: timer mode + * @clock_id: timer clock to be used + */ +@@ -2308,13 +2303,6 @@ schedule_hrtimeout_range_clock(ktime_t * + return -EINTR; + } + +- /* +- * Override any slack passed by the user if under +- * rt contraints. +- */ +- if (rt_task(current)) +- delta = 0; +- + hrtimer_init_sleeper_on_stack(&t, clock_id, mode); + hrtimer_set_expires_range_ns(&t.timer, *expires, delta); + hrtimer_sleeper_start_expires(&t, mode); +@@ -2334,7 +2322,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_ran + /** + * schedule_hrtimeout_range - sleep until timeout + * @expires: timeout value (ktime_t) +- * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks ++ * @delta: slack in expires timeout (ktime_t) + * @mode: timer mode + * + * Make the current task sleep until the given expiry time has diff --git a/queue-6.6/series b/queue-6.6/series index a47ea14d11..8b9c56808a 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -1,2 +1,5 @@ clockevents-drivers-i8253-fix-stop-sequence-for-timer-0.patch sched-isolation-prevent-boot-crash-when-the-boot-cpu-is-nohz_full.patch +zram-fix-null-pointer-in-comp_algorithm_show.patch +hrtimer-use-and-report-correct-timerslack-values-for-realtime-tasks.patch +bpf-use-raw_spinlock_t-in-ringbuf.patch diff --git a/queue-6.6/zram-fix-null-pointer-in-comp_algorithm_show.patch b/queue-6.6/zram-fix-null-pointer-in-comp_algorithm_show.patch new file mode 100644 index 0000000000..5ec5784acf --- /dev/null +++ b/queue-6.6/zram-fix-null-pointer-in-comp_algorithm_show.patch @@ -0,0 +1,89 @@ +From f364cdeb38938f9d03061682b8ff3779dd1730e5 Mon Sep 17 00:00:00 2001 +From: Liu Shixin +Date: Fri, 8 Nov 2024 18:01:47 +0800 +Subject: zram: fix NULL pointer in comp_algorithm_show() + +From: Liu Shixin + +commit f364cdeb38938f9d03061682b8ff3779dd1730e5 upstream. + +LTP reported a NULL pointer dereference as followed: + + CPU: 7 UID: 0 PID: 5995 Comm: cat Kdump: loaded Not tainted 6.12.0-rc6+ #3 + Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 + pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : __pi_strcmp+0x24/0x140 + lr : zcomp_available_show+0x60/0x100 [zram] + sp : ffff800088b93b90 + x29: ffff800088b93b90 x28: 0000000000000001 x27: 0000000000400cc0 + x26: 0000000000000ffe x25: ffff80007b3e2388 x24: 0000000000000000 + x23: ffff80007b3e2390 x22: ffff0004041a9000 x21: ffff80007b3e2900 + x20: 0000000000000000 x19: 0000000000000000 x18: 0000000000000000 + x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 + x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 + x11: 0000000000000000 x10: ffff80007b3e2900 x9 : ffff80007b3cb280 + x8 : 0101010101010101 x7 : 0000000000000000 x6 : 0000000000000000 + x5 : 0000000000000040 x4 : 0000000000000000 x3 : 00656c722d6f7a6c + x2 : 0000000000000000 x1 : ffff80007b3e2900 x0 : 0000000000000000 + Call trace: + __pi_strcmp+0x24/0x140 + comp_algorithm_show+0x40/0x70 [zram] + dev_attr_show+0x28/0x80 + sysfs_kf_seq_show+0x90/0x140 + kernfs_seq_show+0x34/0x48 + seq_read_iter+0x1d4/0x4e8 + kernfs_fop_read_iter+0x40/0x58 + new_sync_read+0x9c/0x168 + vfs_read+0x1a8/0x1f8 + ksys_read+0x74/0x108 + __arm64_sys_read+0x24/0x38 + invoke_syscall+0x50/0x120 + el0_svc_common.constprop.0+0xc8/0xf0 + do_el0_svc+0x24/0x38 + el0_svc+0x38/0x138 + el0t_64_sync_handler+0xc0/0xc8 + el0t_64_sync+0x188/0x190 + +The zram->comp_algs[ZRAM_PRIMARY_COMP] can be NULL in zram_add() if +comp_algorithm_set() has not been called. User can access the zram device +by sysfs after device_add_disk(), so there is a time window to trigger the +NULL pointer dereference. Move it ahead device_add_disk() to make sure +when user can access the zram device, it is ready. comp_algorithm_set() +is protected by zram->init_lock in other places and no such problem. + +Link: https://lkml.kernel.org/r/20241108100147.3776123-1-liushixin2@huawei.com +Fixes: 7ac07a26dea7 ("zram: preparation for multi-zcomp support") +Signed-off-by: Liu Shixin +Reviewed-by: Sergey Senozhatsky +Cc: Jens Axboe +Cc: Minchan Kim +Signed-off-by: Andrew Morton +[This fix does not backport zram_comp_params_reset which was introduced after + v6.6, in commit f2bac7ad187d ("zram: introduce zcomp_params structure")] +Signed-off-by: Jianqi Ren +Signed-off-by: He Zhe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/zram/zram_drv.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -2247,6 +2247,8 @@ static int zram_add(void) + zram->disk->private_data = zram; + snprintf(zram->disk->disk_name, 16, "zram%d", device_id); + ++ comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); ++ + /* Actual capacity set using sysfs (/sys/block/zram/disksize */ + set_capacity(zram->disk, 0); + /* zram devices sort of resembles non-rotational disks */ +@@ -2281,8 +2283,6 @@ static int zram_add(void) + if (ret) + goto out_cleanup_disk; + +- comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); +- + zram_debugfs_register(zram); + pr_info("Added device: %s\n", zram->disk->disk_name); + return device_id;