]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 12 Jun 2024 13:56:50 +0000 (15:56 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 12 Jun 2024 13:56:50 +0000 (15:56 +0200)
added patches:
x86-mm-remove-broken-vsyscall-emulation-code-from-the-page-fault-code.patch

queue-5.4/series
queue-5.4/x86-mm-remove-broken-vsyscall-emulation-code-from-the-page-fault-code.patch [new file with mode: 0644]

index 69d098e4e9600c7ccae01d17e8be6525e613eeb5..9dd5437e45709f8e2dcacda74a4e26345ed22dd2 100644 (file)
@@ -168,3 +168,4 @@ io_uring-fail-nop-if-non-zero-op-flags-is-passed-in.patch
 afs-don-t-cross-.backup-mountpoint-from-backup-volume.patch
 nilfs2-fix-use-after-free-of-timer-for-log-writer-thread.patch
 vxlan-fix-regression-when-dropping-packets-due-to-invalid-src-addresses.patch
+x86-mm-remove-broken-vsyscall-emulation-code-from-the-page-fault-code.patch
diff --git a/queue-5.4/x86-mm-remove-broken-vsyscall-emulation-code-from-the-page-fault-code.patch b/queue-5.4/x86-mm-remove-broken-vsyscall-emulation-code-from-the-page-fault-code.patch
new file mode 100644 (file)
index 0000000..b3ab6fe
--- /dev/null
@@ -0,0 +1,225 @@
+From 02b670c1f88e78f42a6c5aee155c7b26960ca054 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Mon, 29 Apr 2024 10:00:51 +0200
+Subject: x86/mm: Remove broken vsyscall emulation code from the page fault code
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 02b670c1f88e78f42a6c5aee155c7b26960ca054 upstream.
+
+The syzbot-reported stack trace from hell in this discussion thread
+actually has three nested page faults:
+
+  https://lore.kernel.org/r/000000000000d5f4fc0616e816d4@google.com
+
+... and I think that's actually the important thing here:
+
+ - the first page fault is from user space, and triggers the vsyscall
+   emulation.
+
+ - the second page fault is from __do_sys_gettimeofday(), and that should
+   just have caused the exception that then sets the return value to
+   -EFAULT
+
+ - the third nested page fault is due to _raw_spin_unlock_irqrestore() ->
+   preempt_schedule() -> trace_sched_switch(), which then causes a BPF
+   trace program to run, which does that bpf_probe_read_compat(), which
+   causes that page fault under pagefault_disable().
+
+It's quite the nasty backtrace, and there's a lot going on.
+
+The problem is literally the vsyscall emulation, which sets
+
+        current->thread.sig_on_uaccess_err = 1;
+
+and that causes the fixup_exception() code to send the signal *despite* the
+exception being caught.
+
+And I think that is in fact completely bogus.  It's completely bogus
+exactly because it sends that signal even when it *shouldn't* be sent -
+like for the BPF user mode trace gathering.
+
+In other words, I think the whole "sig_on_uaccess_err" thing is entirely
+broken, because it makes any nested page-faults do all the wrong things.
+
+Now, arguably, I don't think anybody should enable vsyscall emulation any
+more, but this test case clearly does.
+
+I think we should just make the "send SIGSEGV" be something that the
+vsyscall emulation does on its own, not this broken per-thread state for
+something that isn't actually per thread.
+
+The x86 page fault code actually tried to deal with the "incorrect nesting"
+by having that:
+
+                if (in_interrupt())
+                        return;
+
+which ignores the sig_on_uaccess_err case when it happens in interrupts,
+but as shown by this example, these nested page faults do not need to be
+about interrupts at all.
+
+IOW, I think the only right thing is to remove that horrendously broken
+code.
+
+The attached patch looks like the ObviouslyCorrect(tm) thing to do.
+
+NOTE! This broken code goes back to this commit in 2011:
+
+  4fc3490114bb ("x86-64: Set siginfo and context on vsyscall emulation faults")
+
+... and back then the reason was to get all the siginfo details right.
+Honestly, I do not for a moment believe that it's worth getting the siginfo
+details right here, but part of the commit says:
+
+    This fixes issues with UML when vsyscall=emulate.
+
+... and so my patch to remove this garbage will probably break UML in this
+situation.
+
+I do not believe that anybody should be running with vsyscall=emulate in
+2024 in the first place, much less if you are doing things like UML. But
+let's see if somebody screams.
+
+Reported-and-tested-by: syzbot+83e7f982ca045ab4405c@syzkaller.appspotmail.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Jiri Olsa <jolsa@kernel.org>
+Acked-by: Andy Lutomirski <luto@kernel.org>
+Link: https://lore.kernel.org/r/CAHk-=wh9D6f7HUkDgZHKmDCHUQmp+Co89GP+b8+z+G56BKeyNg@mail.gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+[gpiccoli: Backport the patch due to differences in the trees. The main changes
+ between 5.4.y and 5.15.y are due to renaming the fixup function, by
+ commit 6456a2a69ee1 ("x86/fault: Rename no_context() to kernelmode_fixup_or_oops()"),
+ and on processor.h thread_struct due to commit cf122cfba5b1 ("kill uaccess_try()").
+ Following 2 commits cause divergence in the diffs too (in the removed lines):
+ cd072dab453a ("x86/fault: Add a helper function to sanitize error code")
+ d4ffd5df9d18 ("x86/fault: Fix wrong signal when vsyscall fails with pkey").]
+Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c |   28 ++--------------------------
+ arch/x86/include/asm/processor.h      |    1 -
+ arch/x86/mm/fault.c                   |   27 +--------------------------
+ 3 files changed, 3 insertions(+), 53 deletions(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned
+ static bool write_ok_or_segv(unsigned long ptr, size_t size)
+ {
+-      /*
+-       * XXX: if access_ok, get_user, and put_user handled
+-       * sig_on_uaccess_err, this could go away.
+-       */
+-
+       if (!access_ok((void __user *)ptr, size)) {
+               struct thread_struct *thread = &current->thread;
+@@ -120,10 +115,8 @@ static bool write_ok_or_segv(unsigned lo
+ bool emulate_vsyscall(unsigned long error_code,
+                     struct pt_regs *regs, unsigned long address)
+ {
+-      struct task_struct *tsk;
+       unsigned long caller;
+       int vsyscall_nr, syscall_nr, tmp;
+-      int prev_sig_on_uaccess_err;
+       long ret;
+       unsigned long orig_dx;
+@@ -172,8 +165,6 @@ bool emulate_vsyscall(unsigned long erro
+               goto sigsegv;
+       }
+-      tsk = current;
+-
+       /*
+        * Check for access_ok violations and find the syscall nr.
+        *
+@@ -233,12 +224,8 @@ bool emulate_vsyscall(unsigned long erro
+               goto do_ret;  /* skip requested */
+       /*
+-       * With a real vsyscall, page faults cause SIGSEGV.  We want to
+-       * preserve that behavior to make writing exploits harder.
++       * With a real vsyscall, page faults cause SIGSEGV.
+        */
+-      prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
+-      current->thread.sig_on_uaccess_err = 1;
+-
+       ret = -EFAULT;
+       switch (vsyscall_nr) {
+       case 0:
+@@ -261,23 +248,12 @@ bool emulate_vsyscall(unsigned long erro
+               break;
+       }
+-      current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
+-
+ check_fault:
+       if (ret == -EFAULT) {
+               /* Bad news -- userspace fed a bad pointer to a vsyscall. */
+               warn_bad_vsyscall(KERN_INFO, regs,
+                                 "vsyscall fault (exploit attempt?)");
+-
+-              /*
+-               * If we failed to generate a signal for any reason,
+-               * generate one here.  (This should be impossible.)
+-               */
+-              if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
+-                               !sigismember(&tsk->pending.signal, SIGSEGV)))
+-                      goto sigsegv;
+-
+-              return true;  /* Don't emulate the ret. */
++              goto sigsegv;
+       }
+       regs->ax = ret;
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -487,7 +487,6 @@ struct thread_struct {
+       mm_segment_t            addr_limit;
+-      unsigned int            sig_on_uaccess_err:1;
+       unsigned int            uaccess_err:1;  /* uaccess failed */
+       /* Floating point and extended processor state */
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -743,33 +743,8 @@ no_context(struct pt_regs *regs, unsigne
+       }
+       /* Are we prepared to handle this kernel fault? */
+-      if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
+-              /*
+-               * Any interrupt that takes a fault gets the fixup. This makes
+-               * the below recursive fault logic only apply to a faults from
+-               * task context.
+-               */
+-              if (in_interrupt())
+-                      return;
+-
+-              /*
+-               * Per the above we're !in_interrupt(), aka. task context.
+-               *
+-               * In this case we need to make sure we're not recursively
+-               * faulting through the emulate_vsyscall() logic.
+-               */
+-              if (current->thread.sig_on_uaccess_err && signal) {
+-                      set_signal_archinfo(address, error_code);
+-
+-                      /* XXX: hwpoison faults will set the wrong code. */
+-                      force_sig_fault(signal, si_code, (void __user *)address);
+-              }
+-
+-              /*
+-               * Barring that, we can do the fixup and be happy.
+-               */
++      if (fixup_exception(regs, X86_TRAP_PF, error_code, address))
+               return;
+-      }
+ #ifdef CONFIG_VMAP_STACK
+       /*