From 25cc05be96ecac06251119c8ee30f072336de2ee Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 8 Apr 2024 11:45:51 +0200 Subject: [PATCH] 6.6-stable patches added patches: perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch --- ...ebs_data_cfg-for-the-last-pebs-event.patch | 83 ++++++++++ queue-6.6/series | 3 + ...ding-rng-with-rdrand-on-coco-systems.patch | 153 ++++++++++++++++++ ...-to-grab-mce_sysfs_mutex-in-set_bank.patch | 67 ++++++++ 4 files changed, 306 insertions(+) create mode 100644 queue-6.6/perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch create mode 100644 queue-6.6/x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch create mode 100644 queue-6.6/x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch diff --git a/queue-6.6/perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch b/queue-6.6/perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch new file mode 100644 index 00000000000..521c9f75cfd --- /dev/null +++ b/queue-6.6/perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch @@ -0,0 +1,83 @@ +From 312be9fc2234c8acfb8148a9f4c358b70d358dee Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Mon, 1 Apr 2024 06:33:20 -0700 +Subject: perf/x86/intel/ds: Don't clear ->pebs_data_cfg for the last PEBS event + +From: Kan Liang + +commit 312be9fc2234c8acfb8148a9f4c358b70d358dee upstream. + +The MSR_PEBS_DATA_CFG MSR register is used to configure which data groups +should be generated into a PEBS record, and it's shared among all counters. + +If there are different configurations among counters, perf combines all the +configurations. + +The first perf command as below requires a complete PEBS record +(including memory info, GPRs, XMMs, and LBRs). The second perf command +only requires a basic group. However, after the second perf command is +running, the MSR_PEBS_DATA_CFG register is cleared. Only a basic group is +generated in a PEBS record, which is wrong. The required information +for the first perf command is missed. + + $ perf record --intr-regs=AX,SP,XMM0 -a -C 8 -b -W -d -c 100000003 -o /dev/null -e cpu/event=0xd0,umask=0x81/upp & + $ sleep 5 + $ perf record --per-thread -c 1 -e cycles:pp --no-timestamp --no-tid taskset -c 8 ./noploop 1000 + +The first PEBS event is a system-wide PEBS event. The second PEBS event +is a per-thread event. When the thread is scheduled out, the +intel_pmu_pebs_del() function is invoked to update the PEBS state. +Since the system-wide event is still available, the cpuc->n_pebs is 1. +The cpuc->pebs_data_cfg is cleared. The data configuration for the +system-wide PEBS event is lost. + +The (cpuc->n_pebs == 1) check was introduced in commit: + + b6a32f023fcc ("perf/x86: Fix PEBS threshold initialization") + +At that time, it indeed didn't hurt whether the state was updated +during the removal, because only the threshold is updated. + +The calculation of the threshold takes the last PEBS event into +account. + +However, since commit: + + b752ea0c28e3 ("perf/x86/intel/ds: Flush PEBS DS when changing PEBS_DATA_CFG") + +we delay the threshold update, and clear the PEBS data config, which triggers +the bug. + +The PEBS data config update scope should not be shrunk during removal. + +[ mingo: Improved the changelog & comments. ] + +Fixes: b752ea0c28e3 ("perf/x86/intel/ds: Flush PEBS DS when changing PEBS_DATA_CFG") +Reported-by: Stephane Eranian +Signed-off-by: Kan Liang +Signed-off-by: Ingo Molnar +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240401133320.703971-1-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/ds.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -1236,11 +1236,11 @@ pebs_update_state(bool needed_cb, struct + struct pmu *pmu = event->pmu; + + /* +- * Make sure we get updated with the first PEBS +- * event. It will trigger also during removal, but +- * that does not hurt: ++ * Make sure we get updated with the first PEBS event. ++ * During removal, ->pebs_data_cfg is still valid for ++ * the last PEBS event. Don't clear it. + */ +- if (cpuc->n_pebs == 1) ++ if ((cpuc->n_pebs == 1) && add) + cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; + + if (needed_cb != pebs_needs_sched_cb(cpuc)) { diff --git a/queue-6.6/series b/queue-6.6/series index 6ec888ceb46..fede015b918 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -214,4 +214,7 @@ io_uring-kbuf-hold-io_buffer_list-reference-over-mmap.patch driver-core-introduce-device_link_wait_removal.patch of-dynamic-synchronize-of_changeset_destroy-with-the-devlink-removals.patch x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.patch +x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch +x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch +perf-x86-intel-ds-don-t-clear-pebs_data_cfg-for-the-last-pebs-event.patch of-module-prevent-null-pointer-dereference-in-vsnprintf.patch diff --git a/queue-6.6/x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch b/queue-6.6/x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch new file mode 100644 index 00000000000..7d22e73f127 --- /dev/null +++ b/queue-6.6/x86-coco-require-seeding-rng-with-rdrand-on-coco-systems.patch @@ -0,0 +1,153 @@ +From 99485c4c026f024e7cb82da84c7951dbe3deb584 Mon Sep 17 00:00:00 2001 +From: "Jason A. Donenfeld" +Date: Tue, 26 Mar 2024 17:07:35 +0100 +Subject: x86/coco: Require seeding RNG with RDRAND on CoCo systems + +From: Jason A. Donenfeld + +commit 99485c4c026f024e7cb82da84c7951dbe3deb584 upstream. + +There are few uses of CoCo that don't rely on working cryptography and +hence a working RNG. Unfortunately, the CoCo threat model means that the +VM host cannot be trusted and may actively work against guests to +extract secrets or manipulate computation. Since a malicious host can +modify or observe nearly all inputs to guests, the only remaining source +of entropy for CoCo guests is RDRAND. + +If RDRAND is broken -- due to CPU hardware fault -- the RNG as a whole +is meant to gracefully continue on gathering entropy from other sources, +but since there aren't other sources on CoCo, this is catastrophic. +This is mostly a concern at boot time when initially seeding the RNG, as +after that the consequences of a broken RDRAND are much more +theoretical. + +So, try at boot to seed the RNG using 256 bits of RDRAND output. If this +fails, panic(). This will also trigger if the system is booted without +RDRAND, as RDRAND is essential for a safe CoCo boot. + +Add this deliberately to be "just a CoCo x86 driver feature" and not +part of the RNG itself. Many device drivers and platforms have some +desire to contribute something to the RNG, and add_device_randomness() +is specifically meant for this purpose. + +Any driver can call it with seed data of any quality, or even garbage +quality, and it can only possibly make the quality of the RNG better or +have no effect, but can never make it worse. + +Rather than trying to build something into the core of the RNG, consider +the particular CoCo issue just a CoCo issue, and therefore separate it +all out into driver (well, arch/platform) code. + + [ bp: Massage commit message. ] + +Signed-off-by: Jason A. Donenfeld +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Elena Reshetova +Reviewed-by: Kirill A. Shutemov +Reviewed-by: Theodore Ts'o +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240326160735.73531-1-Jason@zx2c4.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/coco/core.c | 41 +++++++++++++++++++++++++++++++++++++++++ + arch/x86/include/asm/coco.h | 2 ++ + arch/x86/kernel/setup.c | 2 ++ + 3 files changed, 45 insertions(+) + +--- a/arch/x86/coco/core.c ++++ b/arch/x86/coco/core.c +@@ -3,13 +3,17 @@ + * Confidential Computing Platform Capability checks + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. ++ * Copyright (C) 2024 Jason A. Donenfeld . All Rights Reserved. + * + * Author: Tom Lendacky + */ + + #include + #include ++#include ++#include + ++#include + #include + #include + +@@ -148,3 +152,40 @@ u64 cc_mkdec(u64 val) + } + } + EXPORT_SYMBOL_GPL(cc_mkdec); ++ ++__init void cc_random_init(void) ++{ ++ /* ++ * The seed is 32 bytes (in units of longs), which is 256 bits, which ++ * is the security level that the RNG is targeting. ++ */ ++ unsigned long rng_seed[32 / sizeof(long)]; ++ size_t i, longs; ++ ++ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) ++ return; ++ ++ /* ++ * Since the CoCo threat model includes the host, the only reliable ++ * source of entropy that can be neither observed nor manipulated is ++ * RDRAND. Usually, RDRAND failure is considered tolerable, but since ++ * CoCo guests have no other unobservable source of entropy, it's ++ * important to at least ensure the RNG gets some initial random seeds. ++ */ ++ for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) { ++ longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i); ++ ++ /* ++ * A zero return value means that the guest doesn't have RDRAND ++ * or the CPU is physically broken, and in both cases that ++ * means most crypto inside of the CoCo instance will be ++ * broken, defeating the purpose of CoCo in the first place. So ++ * just panic here because it's absolutely unsafe to continue ++ * executing. ++ */ ++ if (longs == 0) ++ panic("RDRAND is defective."); ++ } ++ add_device_randomness(rng_seed, sizeof(rng_seed)); ++ memzero_explicit(rng_seed, sizeof(rng_seed)); ++} +--- a/arch/x86/include/asm/coco.h ++++ b/arch/x86/include/asm/coco.h +@@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask) + + u64 cc_mkenc(u64 val); + u64 cc_mkdec(u64 val); ++void cc_random_init(void); + #else + static inline u64 cc_mkenc(u64 val) + { +@@ -32,6 +33,7 @@ static inline u64 cc_mkdec(u64 val) + { + return val; + } ++static inline void cc_random_init(void) { } + #endif + + #endif /* _ASM_X86_COCO_H */ +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1120,6 +1121,7 @@ void __init setup_arch(char **cmdline_p) + * memory size. + */ + sev_setup_arch(); ++ cc_random_init(); + + efi_fake_memmap(); + efi_find_mirror(); diff --git a/queue-6.6/x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch b/queue-6.6/x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch new file mode 100644 index 00000000000..e1993443ae3 --- /dev/null +++ b/queue-6.6/x86-mce-make-sure-to-grab-mce_sysfs_mutex-in-set_bank.patch @@ -0,0 +1,67 @@ +From 3ddf944b32f88741c303f0b21459dbb3872b8bc5 Mon Sep 17 00:00:00 2001 +From: "Borislav Petkov (AMD)" +Date: Wed, 13 Mar 2024 14:48:27 +0100 +Subject: x86/mce: Make sure to grab mce_sysfs_mutex in set_bank() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Borislav Petkov (AMD) + +commit 3ddf944b32f88741c303f0b21459dbb3872b8bc5 upstream. + +Modifying a MCA bank's MCA_CTL bits which control which error types to +be reported is done over + + /sys/devices/system/machinecheck/ + ├── machinecheck0 + │   ├── bank0 + │   ├── bank1 + │   ├── bank10 + │   ├── bank11 + ... + +sysfs nodes by writing the new bit mask of events to enable. + +When the write is accepted, the kernel deletes all current timers and +reinits all banks. + +Doing that in parallel can lead to initializing a timer which is already +armed and in the timer wheel, i.e., in use already: + + ODEBUG: init active (active state 0) object: ffff888063a28000 object + type: timer_list hint: mce_timer_fn+0x0/0x240 arch/x86/kernel/cpu/mce/core.c:2642 + WARNING: CPU: 0 PID: 8120 at lib/debugobjects.c:514 + debug_print_object+0x1a0/0x2a0 lib/debugobjects.c:514 + +Fix that by grabbing the sysfs mutex as the rest of the MCA sysfs code +does. + +Reported by: Yue Sun +Reported by: xingwei lee +Signed-off-by: Borislav Petkov (AMD) +Cc: +Link: https://lore.kernel.org/r/CAEkJfYNiENwQY8yV1LYJ9LjJs%2Bx_-PqMv98gKig55=2vbzffRw@mail.gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/mce/core.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/mce/core.c ++++ b/arch/x86/kernel/cpu/mce/core.c +@@ -2468,12 +2468,14 @@ static ssize_t set_bank(struct device *s + return -EINVAL; + + b = &per_cpu(mce_banks_array, s->id)[bank]; +- + if (!b->init) + return -ENODEV; + + b->ctl = new; ++ ++ mutex_lock(&mce_sysfs_mutex); + mce_restart(); ++ mutex_unlock(&mce_sysfs_mutex); + + return size; + } -- 2.47.3