From be6e80f287ec21a9f0ce59dd61410fba31d486da Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 30 Jun 2024 20:09:14 -0400 Subject: [PATCH] Fixes for 5.4 Signed-off-by: Sasha Levin --- queue-5.4/series | 1 + ...op-playing-stack-games-in-profile_pc.patch | 95 +++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 queue-5.4/x86-stop-playing-stack-games-in-profile_pc.patch diff --git a/queue-5.4/series b/queue-5.4/series index 1876efe257d..22e3487c706 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -202,3 +202,4 @@ media-dvbdev-initialize-sbuf.patch soc-ti-wkup_m3_ipc-send-null-dummy-message-instead-o.patch nvme-fixup-comment-for-nvme-rdma-provider-type.patch gpio-davinci-validate-the-obtained-number-of-irqs.patch +x86-stop-playing-stack-games-in-profile_pc.patch diff --git a/queue-5.4/x86-stop-playing-stack-games-in-profile_pc.patch b/queue-5.4/x86-stop-playing-stack-games-in-profile_pc.patch new file mode 100644 index 00000000000..5a733fd74f9 --- /dev/null +++ b/queue-5.4/x86-stop-playing-stack-games-in-profile_pc.patch @@ -0,0 +1,95 @@ +From 5fe8e216748753e8b49802aadeaf00ab4bf9fdd6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jun 2024 14:27:22 -0700 +Subject: x86: stop playing stack games in profile_pc() + +From: Linus Torvalds + +[ Upstream commit 093d9603b60093a9aaae942db56107f6432a5dca ] + +The 'profile_pc()' function is used for timer-based profiling, which +isn't really all that relevant any more to begin with, but it also ends +up making assumptions based on the stack layout that aren't necessarily +valid. + +Basically, the code tries to account the time spent in spinlocks to the +caller rather than the spinlock, and while I support that as a concept, +it's not worth the code complexity or the KASAN warnings when no serious +profiling is done using timers anyway these days. + +And the code really does depend on stack layout that is only true in the +simplest of cases. We've lost the comment at some point (I think when +the 32-bit and 64-bit code was unified), but it used to say: + + Assume the lock function has either no stack frame or a copy + of eflags from PUSHF. + +which explains why it just blindly loads a word or two straight off the +stack pointer and then takes a minimal look at the values to just check +if they might be eflags or the return pc: + + Eflags always has bits 22 and up cleared unlike kernel addresses + +but that basic stack layout assumption assumes that there isn't any lock +debugging etc going on that would complicate the code and cause a stack +frame. + +It causes KASAN unhappiness reported for years by syzkaller [1] and +others [2]. + +With no real practical reason for this any more, just remove the code. + +Just for historical interest, here's some background commits relating to +this code from 2006: + + 0cb91a229364 ("i386: Account spinlocks to the caller during profiling for !FP kernels") + 31679f38d886 ("Simplify profile_pc on x86-64") + +and a code unification from 2009: + + ef4512882dbe ("x86: time_32/64.c unify profile_pc") + +but the basics of this thing actually goes back to before the git tree. + +Link: https://syzkaller.appspot.com/bug?extid=84fe685c02cd112a2ac3 [1] +Link: https://lore.kernel.org/all/CAK55_s7Xyq=nh97=K=G1sxueOFrJDAvPOJAL4TPTCAYvmxO9_A@mail.gmail.com/ [2] +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/time.c | 20 +------------------- + 1 file changed, 1 insertion(+), 19 deletions(-) + +diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c +index 36a585b80d9e3..d4352ae0deb3b 100644 +--- a/arch/x86/kernel/time.c ++++ b/arch/x86/kernel/time.c +@@ -27,25 +27,7 @@ + + unsigned long profile_pc(struct pt_regs *regs) + { +- unsigned long pc = instruction_pointer(regs); +- +- if (!user_mode(regs) && in_lock_functions(pc)) { +-#ifdef CONFIG_FRAME_POINTER +- return *(unsigned long *)(regs->bp + sizeof(long)); +-#else +- unsigned long *sp = (unsigned long *)regs->sp; +- /* +- * Return address is either directly at stack pointer +- * or above a saved flags. Eflags has bits 22-31 zero, +- * kernel addresses don't. +- */ +- if (sp[0] >> 22) +- return sp[0]; +- if (sp[1] >> 22) +- return sp[1]; +-#endif +- } +- return pc; ++ return instruction_pointer(regs); + } + EXPORT_SYMBOL(profile_pc); + +-- +2.43.0 + -- 2.47.3