]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.8-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Apr 2024 09:46:29 +0000 (11:46 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Apr 2024 09:46:29 +0000 (11:46 +0200)
added patches:
aio-fix-null-ptr-deref-in-aio_complete-wakeup.patch
perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch
riscv-fix-vector-state-restore-in-rt_sigreturn.patch
x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch
x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch

queue-6.8/aio-fix-null-ptr-deref-in-aio_complete-wakeup.patch [new file with mode: 0644]
queue-6.8/perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch [new file with mode: 0644]
queue-6.8/riscv-fix-vector-state-restore-in-rt_sigreturn.patch [new file with mode: 0644]
queue-6.8/series
queue-6.8/x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch [new file with mode: 0644]
queue-6.8/x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch [new file with mode: 0644]

diff --git a/queue-6.8/aio-fix-null-ptr-deref-in-aio_complete-wakeup.patch b/queue-6.8/aio-fix-null-ptr-deref-in-aio_complete-wakeup.patch
new file mode 100644 (file)
index 0000000..ea7273f
--- /dev/null
@@ -0,0 +1,41 @@
+From caeb4b0a11b3393e43f7fa8e0a5a18462acc66bd Mon Sep 17 00:00:00 2001
+From: Kent Overstreet <kent.overstreet@linux.dev>
+Date: Sun, 31 Mar 2024 17:52:12 -0400
+Subject: aio: Fix null ptr deref in aio_complete() wakeup
+
+From: Kent Overstreet <kent.overstreet@linux.dev>
+
+commit caeb4b0a11b3393e43f7fa8e0a5a18462acc66bd upstream.
+
+list_del_init_careful() needs to be the last access to the wait queue
+entry - it effectively unlocks access.
+
+Previously, finish_wait() would see the empty list head and skip taking
+the lock, and then we'd return - but the completion path would still
+attempt to do the wakeup after the task_struct pointer had been
+overwritten.
+
+Fixes: 71eb6b6b0ba9 ("fs/aio: obey min_nr when doing wakeups")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/linux-fsdevel/CAHTA-ubfwwB51A5Wg5M6H_rPEQK9pNf8FkAGH=vr=FEkyRrtqw@mail.gmail.com/
+Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
+Link: https://lore.kernel.org/stable/20240331215212.522544-1-kent.overstreet%40linux.dev
+Link: https://lore.kernel.org/r/20240331215212.522544-1-kent.overstreet@linux.dev
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/aio.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -1202,8 +1202,8 @@ static void aio_complete(struct aio_kioc
+               spin_lock_irqsave(&ctx->wait.lock, flags);
+               list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
+                       if (avail >= curr->min_nr) {
+-                              list_del_init_careful(&curr->w.entry);
+                               wake_up_process(curr->w.private);
++                              list_del_init_careful(&curr->w.entry);
+                       }
+               spin_unlock_irqrestore(&ctx->wait.lock, flags);
+       }
diff --git a/queue-6.8/perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch b/queue-6.8/perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch
new file mode 100644 (file)
index 0000000..521c9f7
--- /dev/null
@@ -0,0 +1,83 @@
+From 312be9fc2234c8acfb8148a9f4c358b70d358dee Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Mon, 1 Apr 2024 06:33:20 -0700
+Subject: perf/x86/intel/ds: Don't clear ->pebs_data_cfg for the last PEBS event
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 312be9fc2234c8acfb8148a9f4c358b70d358dee upstream.
+
+The MSR_PEBS_DATA_CFG MSR register is used to configure which data groups
+should be generated into a PEBS record, and it's shared among all counters.
+
+If there are different configurations among counters, perf combines all the
+configurations.
+
+The first perf command as below requires a complete PEBS record
+(including memory info, GPRs, XMMs, and LBRs). The second perf command
+only requires a basic group. However, after the second perf command is
+running, the MSR_PEBS_DATA_CFG register is cleared. Only a basic group is
+generated in a PEBS record, which is wrong. The required information
+for the first perf command is missed.
+
+ $ perf record --intr-regs=AX,SP,XMM0 -a -C 8 -b -W -d -c 100000003 -o /dev/null -e cpu/event=0xd0,umask=0x81/upp &
+ $ sleep 5
+ $ perf record  --per-thread  -c 1  -e cycles:pp --no-timestamp --no-tid taskset -c 8 ./noploop 1000
+
+The first PEBS event is a system-wide PEBS event. The second PEBS event
+is a per-thread event. When the thread is scheduled out, the
+intel_pmu_pebs_del() function is invoked to update the PEBS state.
+Since the system-wide event is still available, the cpuc->n_pebs is 1.
+The cpuc->pebs_data_cfg is cleared. The data configuration for the
+system-wide PEBS event is lost.
+
+The (cpuc->n_pebs == 1) check was introduced in commit:
+
+  b6a32f023fcc ("perf/x86: Fix PEBS threshold initialization")
+
+At that time, it indeed didn't hurt whether the state was updated
+during the removal, because only the threshold is updated.
+
+The calculation of the threshold takes the last PEBS event into
+account.
+
+However, since commit:
+
+  b752ea0c28e3 ("perf/x86/intel/ds: Flush PEBS DS when changing PEBS_DATA_CFG")
+
+we delay the threshold update, and clear the PEBS data config, which triggers
+the bug.
+
+The PEBS data config update scope should not be shrunk during removal.
+
+[ mingo: Improved the changelog & comments. ]
+
+Fixes: b752ea0c28e3 ("perf/x86/intel/ds: Flush PEBS DS when changing PEBS_DATA_CFG")
+Reported-by: Stephane Eranian <eranian@google.com>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240401133320.703971-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/ds.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -1236,11 +1236,11 @@ pebs_update_state(bool needed_cb, struct
+       struct pmu *pmu = event->pmu;
+       /*
+-       * Make sure we get updated with the first PEBS
+-       * event. It will trigger also during removal, but
+-       * that does not hurt:
++       * Make sure we get updated with the first PEBS event.
++       * During removal, ->pebs_data_cfg is still valid for
++       * the last PEBS event. Don't clear it.
+        */
+-      if (cpuc->n_pebs == 1)
++      if ((cpuc->n_pebs == 1) && add)
+               cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
+       if (needed_cb != pebs_needs_sched_cb(cpuc)) {
diff --git a/queue-6.8/riscv-fix-vector-state-restore-in-rt_sigreturn.patch b/queue-6.8/riscv-fix-vector-state-restore-in-rt_sigreturn.patch
new file mode 100644 (file)
index 0000000..85b7127
--- /dev/null
@@ -0,0 +1,103 @@
+From c27fa53b858b4ee6552a719aa599c250cf98a586 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn@rivosinc.com>
+Date: Wed, 3 Apr 2024 09:26:38 +0200
+Subject: riscv: Fix vector state restore in rt_sigreturn()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Björn Töpel <bjorn@rivosinc.com>
+
+commit c27fa53b858b4ee6552a719aa599c250cf98a586 upstream.
+
+The RISC-V Vector specification states in "Appendix D: Calling
+Convention for Vector State" [1] that "Executing a system call causes
+all caller-saved vector registers (v0-v31, vl, vtype) and vstart to
+become unspecified.". In the RISC-V kernel this is called "discarding
+the vstate".
+
+Returning from a signal handler via the rt_sigreturn() syscall, vector
+discard is also performed. However, this is not an issue since the
+vector state should be restored from the sigcontext, and therefore not
+care about the vector discard.
+
+The "live state" is the actual vector register in the running context,
+and the "vstate" is the vector state of the task. A dirty live state,
+means that the vstate and live state are not in synch.
+
+When vectorized user_from_copy() was introduced, an bug sneaked in at
+the restoration code, related to the discard of the live state.
+
+An example when this go wrong:
+
+  1. A userland application is executing vector code
+  2. The application receives a signal, and the signal handler is
+     entered.
+  3. The application returns from the signal handler, using the
+     rt_sigreturn() syscall.
+  4. The live vector state is discarded upon entering the
+     rt_sigreturn(), and the live state is marked as "dirty", indicating
+     that the live state need to be synchronized with the current
+     vstate.
+  5. rt_sigreturn() restores the vstate, except the Vector registers,
+     from the sigcontext
+  6. rt_sigreturn() restores the Vector registers, from the sigcontext,
+     and now the vectorized user_from_copy() is used. The dirty live
+     state from the discard is saved to the vstate, making the vstate
+     corrupt.
+  7. rt_sigreturn() returns to the application, which crashes due to
+     corrupted vstate.
+
+Note that the vectorized user_from_copy() is invoked depending on the
+value of CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD. Default is 768, which
+means that vlen has to be larger than 128b for this bug to trigger.
+
+The fix is simply to mark the live state as non-dirty/clean prior
+performing the vstate restore.
+
+Link: https://github.com/riscv/riscv-isa-manual/releases/download/riscv-isa-release-8abdb41-2024-03-26/unpriv-isa-asciidoc.pdf # [1]
+Reported-by: Charlie Jenkins <charlie@rivosinc.com>
+Reported-by: Vineet Gupta <vgupta@kernel.org>
+Fixes: c2a658d41924 ("riscv: lib: vectorize copy_to_user/copy_from_user")
+Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
+Reviewed-by: Andy Chiu <andy.chiu@sifive.com>
+Tested-by: Vineet Gupta <vineetg@rivosinc.com>
+Link: https://lore.kernel.org/r/20240403072638.567446-1-bjorn@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/riscv/kernel/signal.c |   15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/arch/riscv/kernel/signal.c
++++ b/arch/riscv/kernel/signal.c
+@@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_
+       struct __sc_riscv_v_state __user *state = sc_vec;
+       void __user *datap;
++      /*
++       * Mark the vstate as clean prior performing the actual copy,
++       * to avoid getting the vstate incorrectly clobbered by the
++       *  discarded vector state.
++       */
++      riscv_v_vstate_set_restore(current, regs);
++
+       /* Copy everything of __sc_riscv_v_state except datap. */
+       err = __copy_from_user(&current->thread.vstate, &state->v_state,
+                              offsetof(struct __riscv_v_ext_state, datap));
+@@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_
+        * Copy the whole vector content from user space datap. Use
+        * copy_from_user to prevent information leak.
+        */
+-      err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
+-      if (unlikely(err))
+-              return err;
+-
+-      riscv_v_vstate_set_restore(current, regs);
+-
+-      return err;
++      return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
+ }
+ #else
+ #define save_v_state(task, regs) (0)
index d954edb0e2d56fe3e4d6c407cf8a864b6103ba09..be1e576a04d9e1e768e22df0b81d139cbe537c46 100644 (file)
@@ -232,3 +232,8 @@ driver-core-introduce-device_link_wait_removal.patch
 of-dynamic-synchronize-of_changeset_destroy-with-the-devlink-removals.patch
 of-module-prevent-null-pointer-dereference-in-vsnprintf.patch
 x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.patch
+x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch
+x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch
+perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch
+aio-fix-null-ptr-deref-in-aio_complete-wakeup.patch
+riscv-fix-vector-state-restore-in-rt_sigreturn.patch
diff --git a/queue-6.8/x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch b/queue-6.8/x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch
new file mode 100644 (file)
index 0000000..493d360
--- /dev/null
@@ -0,0 +1,153 @@
+From 99485c4c026f024e7cb82da84c7951dbe3deb584 Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Tue, 26 Mar 2024 17:07:35 +0100
+Subject: x86/coco: Require seeding RNG with RDRAND on CoCo systems
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit 99485c4c026f024e7cb82da84c7951dbe3deb584 upstream.
+
+There are few uses of CoCo that don't rely on working cryptography and
+hence a working RNG. Unfortunately, the CoCo threat model means that the
+VM host cannot be trusted and may actively work against guests to
+extract secrets or manipulate computation. Since a malicious host can
+modify or observe nearly all inputs to guests, the only remaining source
+of entropy for CoCo guests is RDRAND.
+
+If RDRAND is broken -- due to CPU hardware fault -- the RNG as a whole
+is meant to gracefully continue on gathering entropy from other sources,
+but since there aren't other sources on CoCo, this is catastrophic.
+This is mostly a concern at boot time when initially seeding the RNG, as
+after that the consequences of a broken RDRAND are much more
+theoretical.
+
+So, try at boot to seed the RNG using 256 bits of RDRAND output. If this
+fails, panic(). This will also trigger if the system is booted without
+RDRAND, as RDRAND is essential for a safe CoCo boot.
+
+Add this deliberately to be "just a CoCo x86 driver feature" and not
+part of the RNG itself. Many device drivers and platforms have some
+desire to contribute something to the RNG, and add_device_randomness()
+is specifically meant for this purpose.
+
+Any driver can call it with seed data of any quality, or even garbage
+quality, and it can only possibly make the quality of the RNG better or
+have no effect, but can never make it worse.
+
+Rather than trying to build something into the core of the RNG, consider
+the particular CoCo issue just a CoCo issue, and therefore separate it
+all out into driver (well, arch/platform) code.
+
+  [ bp: Massage commit message. ]
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Elena Reshetova <elena.reshetova@intel.com>
+Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240326160735.73531-1-Jason@zx2c4.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/coco/core.c        |   41 +++++++++++++++++++++++++++++++++++++++++
+ arch/x86/include/asm/coco.h |    2 ++
+ arch/x86/kernel/setup.c     |    2 ++
+ 3 files changed, 45 insertions(+)
+
+--- a/arch/x86/coco/core.c
++++ b/arch/x86/coco/core.c
+@@ -3,13 +3,17 @@
+  * Confidential Computing Platform Capability checks
+  *
+  * Copyright (C) 2021 Advanced Micro Devices, Inc.
++ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+  *
+  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+  */
+ #include <linux/export.h>
+ #include <linux/cc_platform.h>
++#include <linux/string.h>
++#include <linux/random.h>
++#include <asm/archrandom.h>
+ #include <asm/coco.h>
+ #include <asm/processor.h>
+@@ -148,3 +152,40 @@ u64 cc_mkdec(u64 val)
+       }
+ }
+ EXPORT_SYMBOL_GPL(cc_mkdec);
++
++__init void cc_random_init(void)
++{
++      /*
++       * The seed is 32 bytes (in units of longs), which is 256 bits, which
++       * is the security level that the RNG is targeting.
++       */
++      unsigned long rng_seed[32 / sizeof(long)];
++      size_t i, longs;
++
++      if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
++              return;
++
++      /*
++       * Since the CoCo threat model includes the host, the only reliable
++       * source of entropy that can be neither observed nor manipulated is
++       * RDRAND. Usually, RDRAND failure is considered tolerable, but since
++       * CoCo guests have no other unobservable source of entropy, it's
++       * important to at least ensure the RNG gets some initial random seeds.
++       */
++      for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) {
++              longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i);
++
++              /*
++               * A zero return value means that the guest doesn't have RDRAND
++               * or the CPU is physically broken, and in both cases that
++               * means most crypto inside of the CoCo instance will be
++               * broken, defeating the purpose of CoCo in the first place. So
++               * just panic here because it's absolutely unsafe to continue
++               * executing.
++               */
++              if (longs == 0)
++                      panic("RDRAND is defective.");
++      }
++      add_device_randomness(rng_seed, sizeof(rng_seed));
++      memzero_explicit(rng_seed, sizeof(rng_seed));
++}
+--- a/arch/x86/include/asm/coco.h
++++ b/arch/x86/include/asm/coco.h
+@@ -21,6 +21,7 @@ static inline void cc_set_mask(u64 mask)
+ u64 cc_mkenc(u64 val);
+ u64 cc_mkdec(u64 val);
++void cc_random_init(void);
+ #else
+ #define cc_vendor (CC_VENDOR_NONE)
+@@ -33,6 +34,7 @@ static inline u64 cc_mkdec(u64 val)
+ {
+       return val;
+ }
++static inline void cc_random_init(void) { }
+ #endif
+ #endif /* _ASM_X86_COCO_H */
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -35,6 +35,7 @@
+ #include <asm/bios_ebda.h>
+ #include <asm/bugs.h>
+ #include <asm/cacheinfo.h>
++#include <asm/coco.h>
+ #include <asm/cpu.h>
+ #include <asm/efi.h>
+ #include <asm/gart.h>
+@@ -993,6 +994,7 @@ void __init setup_arch(char **cmdline_p)
+        * memory size.
+        */
+       mem_encrypt_setup_arch();
++      cc_random_init();
+       efi_fake_memmap();
+       efi_find_mirror();
diff --git a/queue-6.8/x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch b/queue-6.8/x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch
new file mode 100644 (file)
index 0000000..22657bf
--- /dev/null
@@ -0,0 +1,67 @@
+From 3ddf944b32f88741c303f0b21459dbb3872b8bc5 Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Wed, 13 Mar 2024 14:48:27 +0100
+Subject: x86/mce: Make sure to grab mce_sysfs_mutex in set_bank()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit 3ddf944b32f88741c303f0b21459dbb3872b8bc5 upstream.
+
+Modifying a MCA bank's MCA_CTL bits which control which error types to
+be reported is done over
+
+  /sys/devices/system/machinecheck/
+  ├── machinecheck0
+  │   ├── bank0
+  │   ├── bank1
+  │   ├── bank10
+  │   ├── bank11
+  ...
+
+sysfs nodes by writing the new bit mask of events to enable.
+
+When the write is accepted, the kernel deletes all current timers and
+reinits all banks.
+
+Doing that in parallel can lead to initializing a timer which is already
+armed and in the timer wheel, i.e., in use already:
+
+  ODEBUG: init active (active state 0) object: ffff888063a28000 object
+  type: timer_list hint: mce_timer_fn+0x0/0x240 arch/x86/kernel/cpu/mce/core.c:2642
+  WARNING: CPU: 0 PID: 8120 at lib/debugobjects.c:514
+  debug_print_object+0x1a0/0x2a0 lib/debugobjects.c:514
+
+Fix that by grabbing the sysfs mutex as the rest of the MCA sysfs code
+does.
+
+Reported by: Yue Sun <samsun1006219@gmail.com>
+Reported by: xingwei lee <xrivendell7@gmail.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/CAEkJfYNiENwQY8yV1LYJ9LjJs%2Bx_-PqMv98gKig55=2vbzffRw@mail.gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/mce/core.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/mce/core.c
++++ b/arch/x86/kernel/cpu/mce/core.c
+@@ -2474,12 +2474,14 @@ static ssize_t set_bank(struct device *s
+               return -EINVAL;
+       b = &per_cpu(mce_banks_array, s->id)[bank];
+-
+       if (!b->init)
+               return -ENODEV;
+       b->ctl = new;
++
++      mutex_lock(&mce_sysfs_mutex);
+       mce_restart();
++      mutex_unlock(&mce_sysfs_mutex);
+       return size;
+ }