]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.4
authorSasha Levin <sashal@kernel.org>
Mon, 1 Jul 2024 00:09:14 +0000 (20:09 -0400)
committerSasha Levin <sashal@kernel.org>
Mon, 1 Jul 2024 00:09:14 +0000 (20:09 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.4/series
queue-5.4/x86-stop-playing-stack-games-in-profile_pc.patch [new file with mode: 0644]

index 1876efe257dd506aa9d0aee837ec90d91c6e54aa..22e3487c706f76ae1b7b58c1058fb39809f2aa8c 100644 (file)
@@ -202,3 +202,4 @@ media-dvbdev-initialize-sbuf.patch
 soc-ti-wkup_m3_ipc-send-null-dummy-message-instead-o.patch
 nvme-fixup-comment-for-nvme-rdma-provider-type.patch
 gpio-davinci-validate-the-obtained-number-of-irqs.patch
+x86-stop-playing-stack-games-in-profile_pc.patch
diff --git a/queue-5.4/x86-stop-playing-stack-games-in-profile_pc.patch b/queue-5.4/x86-stop-playing-stack-games-in-profile_pc.patch
new file mode 100644 (file)
index 0000000..5a733fd
--- /dev/null
@@ -0,0 +1,95 @@
+From 5fe8e216748753e8b49802aadeaf00ab4bf9fdd6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jun 2024 14:27:22 -0700
+Subject: x86: stop playing stack games in profile_pc()
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit 093d9603b60093a9aaae942db56107f6432a5dca ]
+
+The 'profile_pc()' function is used for timer-based profiling, which
+isn't really all that relevant any more to begin with, but it also ends
+up making assumptions based on the stack layout that aren't necessarily
+valid.
+
+Basically, the code tries to account the time spent in spinlocks to the
+caller rather than the spinlock, and while I support that as a concept,
+it's not worth the code complexity or the KASAN warnings when no serious
+profiling is done using timers anyway these days.
+
+And the code really does depend on stack layout that is only true in the
+simplest of cases.  We've lost the comment at some point (I think when
+the 32-bit and 64-bit code was unified), but it used to say:
+
+       Assume the lock function has either no stack frame or a copy
+       of eflags from PUSHF.
+
+which explains why it just blindly loads a word or two straight off the
+stack pointer and then takes a minimal look at the values to just check
+if they might be eflags or the return pc:
+
+       Eflags always has bits 22 and up cleared unlike kernel addresses
+
+but that basic stack layout assumption assumes that there isn't any lock
+debugging etc going on that would complicate the code and cause a stack
+frame.
+
+It causes KASAN unhappiness reported for years by syzkaller [1] and
+others [2].
+
+With no real practical reason for this any more, just remove the code.
+
+Just for historical interest, here's some background commits relating to
+this code from 2006:
+
+  0cb91a229364 ("i386: Account spinlocks to the caller during profiling for !FP kernels")
+  31679f38d886 ("Simplify profile_pc on x86-64")
+
+and a code unification from 2009:
+
+  ef4512882dbe ("x86: time_32/64.c unify profile_pc")
+
+but the basics of this thing actually goes back to before the git tree.
+
+Link: https://syzkaller.appspot.com/bug?extid=84fe685c02cd112a2ac3 [1]
+Link: https://lore.kernel.org/all/CAK55_s7Xyq=nh97=K=G1sxueOFrJDAvPOJAL4TPTCAYvmxO9_A@mail.gmail.com/ [2]
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/time.c | 20 +-------------------
+ 1 file changed, 1 insertion(+), 19 deletions(-)
+
+diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
+index 36a585b80d9e3..d4352ae0deb3b 100644
+--- a/arch/x86/kernel/time.c
++++ b/arch/x86/kernel/time.c
+@@ -27,25 +27,7 @@
+ unsigned long profile_pc(struct pt_regs *regs)
+ {
+-      unsigned long pc = instruction_pointer(regs);
+-
+-      if (!user_mode(regs) && in_lock_functions(pc)) {
+-#ifdef CONFIG_FRAME_POINTER
+-              return *(unsigned long *)(regs->bp + sizeof(long));
+-#else
+-              unsigned long *sp = (unsigned long *)regs->sp;
+-              /*
+-               * Return address is either directly at stack pointer
+-               * or above a saved flags. Eflags has bits 22-31 zero,
+-               * kernel addresses don't.
+-               */
+-              if (sp[0] >> 22)
+-                      return sp[0];
+-              if (sp[1] >> 22)
+-                      return sp[1];
+-#endif
+-      }
+-      return pc;
++      return instruction_pointer(regs);
+ }
+ EXPORT_SYMBOL(profile_pc);
+-- 
+2.43.0
+