Fixes for 5.4

author Sasha Levin <sashal@kernel.org>

Mon, 23 Jun 2025 12:42:57 +0000 (08:42 -0400)

committer Sasha Levin <sashal@kernel.org>

Mon, 23 Jun 2025 12:43:54 +0000 (08:43 -0400)
author Sasha Levin <sashal@kernel.org>
Mon, 23 Jun 2025 12:42:57 +0000 (08:42 -0400)
committer Sasha Levin <sashal@kernel.org>
Mon, 23 Jun 2025 12:43:54 +0000 (08:43 -0400)
diff --git a/queue-5.4/arm64-ptrace-fix-stack-out-of-bounds-read-in-regs_ge.patch b/queue-5.4/arm64-ptrace-fix-stack-out-of-bounds-read-in-regs_ge.patch

new file mode 100644 (file)

index 0000000..6acd227
--- /dev/null
+++ b/queue-5.4/arm64-ptrace-fix-stack-out-of-bounds-read-in-regs_ge.patch
@@ -0,0 +1,107 @@
+From 676caba86c8acd2fa2f65468b343481a7f360222 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Jun 2025 00:55:33 +0000
+Subject: arm64/ptrace: Fix stack-out-of-bounds read in
+ regs_get_kernel_stack_nth()
+
+From: Tengda Wu <wutengda@huaweicloud.com>
+
+[ Upstream commit 39dfc971e42d886e7df01371cd1bef505076d84c ]
+
+KASAN reports a stack-out-of-bounds read in regs_get_kernel_stack_nth().
+
+Call Trace:
+[   97.283505] BUG: KASAN: stack-out-of-bounds in regs_get_kernel_stack_nth+0xa8/0xc8
+[   97.284677] Read of size 8 at addr ffff800089277c10 by task 1.sh/2550
+[   97.285732]
+[   97.286067] CPU: 7 PID: 2550 Comm: 1.sh Not tainted 6.6.0+ #11
+[   97.287032] Hardware name: linux,dummy-virt (DT)
+[   97.287815] Call trace:
+[   97.288279]  dump_backtrace+0xa0/0x128
+[   97.288946]  show_stack+0x20/0x38
+[   97.289551]  dump_stack_lvl+0x78/0xc8
+[   97.290203]  print_address_description.constprop.0+0x84/0x3c8
+[   97.291159]  print_report+0xb0/0x280
+[   97.291792]  kasan_report+0x84/0xd0
+[   97.292421]  __asan_load8+0x9c/0xc0
+[   97.293042]  regs_get_kernel_stack_nth+0xa8/0xc8
+[   97.293835]  process_fetch_insn+0x770/0xa30
+[   97.294562]  kprobe_trace_func+0x254/0x3b0
+[   97.295271]  kprobe_dispatcher+0x98/0xe0
+[   97.295955]  kprobe_breakpoint_handler+0x1b0/0x210
+[   97.296774]  call_break_hook+0xc4/0x100
+[   97.297451]  brk_handler+0x24/0x78
+[   97.298073]  do_debug_exception+0xac/0x178
+[   97.298785]  el1_dbg+0x70/0x90
+[   97.299344]  el1h_64_sync_handler+0xcc/0xe8
+[   97.300066]  el1h_64_sync+0x78/0x80
+[   97.300699]  kernel_clone+0x0/0x500
+[   97.301331]  __arm64_sys_clone+0x70/0x90
+[   97.302084]  invoke_syscall+0x68/0x198
+[   97.302746]  el0_svc_common.constprop.0+0x11c/0x150
+[   97.303569]  do_el0_svc+0x38/0x50
+[   97.304164]  el0_svc+0x44/0x1d8
+[   97.304749]  el0t_64_sync_handler+0x100/0x130
+[   97.305500]  el0t_64_sync+0x188/0x190
+[   97.306151]
+[   97.306475] The buggy address belongs to stack of task 1.sh/2550
+[   97.307461]  and is located at offset 0 in frame:
+[   97.308257]  __se_sys_clone+0x0/0x138
+[   97.308910]
+[   97.309241] This frame has 1 object:
+[   97.309873]  [48, 184) 'args'
+[   97.309876]
+[   97.310749] The buggy address belongs to the virtual mapping at
+[   97.310749]  [ffff800089270000, ffff800089279000) created by:
+[   97.310749]  dup_task_struct+0xc0/0x2e8
+[   97.313347]
+[   97.313674] The buggy address belongs to the physical page:
+[   97.314604] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x14f69a
+[   97.315885] flags: 0x15ffffe00000000(node=1|zone=2|lastcpupid=0xfffff)
+[   97.316957] raw: 015ffffe00000000 0000000000000000 dead000000000122 0000000000000000
+[   97.318207] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
+[   97.319445] page dumped because: kasan: bad access detected
+[   97.320371]
+[   97.320694] Memory state around the buggy address:
+[   97.321511]  ffff800089277b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[   97.322681]  ffff800089277b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[   97.323846] >ffff800089277c00: 00 00 f1 f1 f1 f1 f1 f1 00 00 00 00 00 00 00 00
+[   97.325023]                          ^
+[   97.325683]  ffff800089277c80: 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 f3 f3
+[   97.326856]  ffff800089277d00: f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+
+This issue seems to be related to the behavior of some gcc compilers and
+was also fixed on the s390 architecture before:
+
+ commit d93a855c31b7 ("s390/ptrace: Avoid KASAN false positives in regs_get_kernel_stack_nth()")
+
+As described in that commit, regs_get_kernel_stack_nth() has confirmed that
+`addr` is on the stack, so reading the value at `*addr` should be allowed.
+Use READ_ONCE_NOCHECK() helper to silence the KASAN check for this case.
+
+Fixes: 0a8ea52c3eb1 ("arm64: Add HAVE_REGS_AND_STACK_ACCESS_API feature")
+Signed-off-by: Tengda Wu <wutengda@huaweicloud.com>
+Link: https://lore.kernel.org/r/20250604005533.1278992-1-wutengda@huaweicloud.com
+[will: Use '*addr' as the argument to READ_ONCE_NOCHECK()]
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/kernel/ptrace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
+index 8a95a013dfd3c..8fcf03968f111 100644
+--- a/arch/arm64/kernel/ptrace.c
++++ b/arch/arm64/kernel/ptrace.c
+@@ -140,7 +140,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+ 
+       addr += n;
+       if (regs_within_kernel_stack(regs, (unsigned long)addr))
+-              return *addr;
++              return READ_ONCE_NOCHECK(*addr);
+       else
+               return 0;
+ }
+-- 
+2.39.5
+
diff --git a/queue-5.4/perf-fix-sample-vs-do_exit.patch b/queue-5.4/perf-fix-sample-vs-do_exit.patch

new file mode 100644 (file)

index 0000000..ba92444
--- /dev/null
+++ b/queue-5.4/perf-fix-sample-vs-do_exit.patch
@@ -0,0 +1,102 @@
+From 84bb7bcb3aa6c702215cd9c90911e80199cd09fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jun 2025 12:31:45 +0200
+Subject: perf: Fix sample vs do_exit()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 4f6fc782128355931527cefe3eb45338abd8ab39 ]
+
+Baisheng Gao reported an ARM64 crash, which Mark decoded as being a
+synchronous external abort -- most likely due to trying to access
+MMIO in bad ways.
+
+The crash further shows perf trying to do a user stack sample while in
+exit_mmap()'s tlb_finish_mmu() -- i.e. while tearing down the address
+space it is trying to access.
+
+It turns out that we stop perf after we tear down the userspace mm; a
+receipie for disaster, since perf likes to access userspace for
+various reasons.
+
+Flip this order by moving up where we stop perf in do_exit().
+
+Additionally, harden PERF_SAMPLE_CALLCHAIN and PERF_SAMPLE_STACK_USER
+to abort when the current task does not have an mm (exit_mm() makes
+sure to set current->mm = NULL; before commencing with the actual
+teardown). Such that CPU wide events don't trip on this same problem.
+
+Fixes: c5ebcedb566e ("perf: Add ability to attach user stack dump to sample")
+Reported-by: Baisheng Gao <baisheng.gao@unisoc.com>
+Suggested-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20250605110815.GQ39944@noisy.programming.kicks-ass.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/core.c |  7 +++++++
+ kernel/exit.c        | 17 +++++++++--------
+ 2 files changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 7b97be4ed9d00..ecae7c7f895b9 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -6219,6 +6219,10 @@ perf_sample_ustack_size(u16 stack_size, u16 header_size,
+       if (!regs)
+               return 0;
+ 
++      /* No mm, no stack, no dump. */
++      if (!current->mm)
++              return 0;
++
+       /*
+        * Check if we fit in with the requested stack size into the:
+        * - TASK_SIZE
+@@ -6687,6 +6691,9 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
+       const u32 max_stack = event->attr.sample_max_stack;
+       struct perf_callchain_entry *callchain;
+ 
++      if (!current->mm)
++              user = false;
++
+       if (!kernel && !user)
+               return &__empty_callchain;
+ 
+diff --git a/kernel/exit.c b/kernel/exit.c
+index 56d3a099825fb..8cb4a82c4ed3a 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -844,6 +844,15 @@ void __noreturn do_exit(long code)
+       tsk->exit_code = code;
+       taskstats_exit(tsk, group_dead);
+ 
++      /*
++       * Since sampling can touch ->mm, make sure to stop everything before we
++       * tear it down.
++       *
++       * Also flushes inherited counters to the parent - before the parent
++       * gets woken up by child-exit notifications.
++       */
++      perf_event_exit_task(tsk);
++
+       exit_mm();
+ 
+       if (group_dead)
+@@ -861,14 +870,6 @@ void __noreturn do_exit(long code)
+       exit_thread(tsk);
+       exit_umh(tsk);
+ 
+-      /*
+-       * Flush inherited counters to the parent - before the parent
+-       * gets woken up by child-exit notifications.
+-       *
+-       * because of cgroup mode, must be called before cgroup_exit()
+-       */
+-      perf_event_exit_task(tsk);
+-
+       sched_autogroup_exit_task(tsk);
+       cgroup_exit(tsk);
+ 
+-- 
+2.39.5
+
diff --git a/queue-5.4/series b/queue-5.4/series

index 9fd9a88c5e80f7e9e4f5f2e781437383526cb513..2a4b1d452a61943abc1f7bbc437093f2ed7f2da5 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -218,3 +218,5 @@ mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch
  jbd2-fix-data-race-and-null-ptr-deref-in-jbd2_journal_dirty_metadata.patch
  rtc-test-fix-invalid-format-specifier.patch
  s390-pci-fix-__pcilg_mio_inuser-inline-assembly.patch
+perf-fix-sample-vs-do_exit.patch
+arm64-ptrace-fix-stack-out-of-bounds-read-in-regs_ge.patch
author	Sasha Levin <sashal@kernel.org>
	Mon, 23 Jun 2025 12:42:57 +0000 (08:42 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 23 Jun 2025 12:43:54 +0000 (08:43 -0400)
queue-5.4/arm64-ptrace-fix-stack-out-of-bounds-read-in-regs_ge.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/perf-fix-sample-vs-do_exit.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history