From e62dbf4893082576ca8b00b454a05cd46a23766b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 6 Jun 2024 15:12:47 +0200 Subject: [PATCH] 6.1-stable patches added patches: efi-libstub-only-free-priv.runtime_map-when-allocated.patch genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch --- ...free-priv.runtime_map-when-allocated.patch | 44 +++++++ ...event-vector-leak-during-cpu-offline.patch | 123 ++++++++++++++++++ ...xphyaddr-as-host.maxphyaddr-in-cpuid.patch | 74 +++++++++++ queue-6.1/series | 4 + ...kaslr-when-memory-reservations-exist.patch | 107 +++++++++++++++ 5 files changed, 352 insertions(+) create mode 100644 queue-6.1/efi-libstub-only-free-priv.runtime_map-when-allocated.patch create mode 100644 queue-6.1/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch create mode 100644 queue-6.1/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch create mode 100644 queue-6.1/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch diff --git a/queue-6.1/efi-libstub-only-free-priv.runtime_map-when-allocated.patch b/queue-6.1/efi-libstub-only-free-priv.runtime_map-when-allocated.patch new file mode 100644 index 00000000000..6cba49e95ce --- /dev/null +++ b/queue-6.1/efi-libstub-only-free-priv.runtime_map-when-allocated.patch @@ -0,0 +1,44 @@ +From 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 Mon Sep 17 00:00:00 2001 +From: Hagar Hemdan +Date: Tue, 23 Apr 2024 13:59:26 +0000 +Subject: efi: libstub: only free priv.runtime_map when allocated + +From: Hagar Hemdan + +commit 4b2543f7e1e6b91cfc8dd1696e3cdf01c3ac8974 upstream. + +priv.runtime_map is only allocated when efi_novamap is not set. +Otherwise, it is an uninitialized value. In the error path, it is freed +unconditionally. Avoid passing an uninitialized value to free_pool. +Free priv.runtime_map only when it was allocated. + +This bug was discovered and resolved using Coverity Static Analysis +Security Testing (SAST) by Synopsys, Inc. + +Fixes: f80d26043af9 ("efi: libstub: avoid efi_get_memory_map() for allocating the virt map") +Cc: +Signed-off-by: Hagar Hemdan +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/efi/libstub/fdt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c +index 70e9789ff9de..6a337f1f8787 100644 +--- a/drivers/firmware/efi/libstub/fdt.c ++++ b/drivers/firmware/efi/libstub/fdt.c +@@ -335,8 +335,8 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, + + fail: + efi_free(fdt_size, fdt_addr); +- +- efi_bs_call(free_pool, priv.runtime_map); ++ if (!efi_novamap) ++ efi_bs_call(free_pool, priv.runtime_map); + + return EFI_LOAD_ERROR; + } +-- +2.45.2 + diff --git a/queue-6.1/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch b/queue-6.1/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch new file mode 100644 index 00000000000..5a8ef5f22ff --- /dev/null +++ b/queue-6.1/genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch @@ -0,0 +1,123 @@ +From a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 Mon Sep 17 00:00:00 2001 +From: Dongli Zhang +Date: Wed, 22 May 2024 15:02:18 -0700 +Subject: genirq/cpuhotplug, x86/vector: Prevent vector leak during CPU offline + +From: Dongli Zhang + +commit a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 upstream. + +The absence of IRQD_MOVE_PCNTXT prevents immediate effectiveness of +interrupt affinity reconfiguration via procfs. Instead, the change is +deferred until the next instance of the interrupt being triggered on the +original CPU. + +When the interrupt next triggers on the original CPU, the new affinity is +enforced within __irq_move_irq(). A vector is allocated from the new CPU, +but the old vector on the original CPU remains and is not immediately +reclaimed. Instead, apicd->move_in_progress is flagged, and the reclaiming +process is delayed until the next trigger of the interrupt on the new CPU. + +Upon the subsequent triggering of the interrupt on the new CPU, +irq_complete_move() adds a task to the old CPU's vector_cleanup list if it +remains online. Subsequently, the timer on the old CPU iterates over its +vector_cleanup list, reclaiming old vectors. + +However, a rare scenario arises if the old CPU is outgoing before the +interrupt triggers again on the new CPU. + +In that case irq_force_complete_move() is not invoked on the outgoing CPU +to reclaim the old apicd->prev_vector because the interrupt isn't currently +affine to the outgoing CPU, and irq_needs_fixup() returns false. Even +though __vector_schedule_cleanup() is later called on the new CPU, it +doesn't reclaim apicd->prev_vector; instead, it simply resets both +apicd->move_in_progress and apicd->prev_vector to 0. + +As a result, the vector remains unreclaimed in vector_matrix, leading to a +CPU vector leak. + +To address this issue, move the invocation of irq_force_complete_move() +before the irq_needs_fixup() call to reclaim apicd->prev_vector, if the +interrupt is currently or used to be affine to the outgoing CPU. + +Additionally, reclaim the vector in __vector_schedule_cleanup() as well, +following a warning message, although theoretically it should never see +apicd->move_in_progress with apicd->prev_cpu pointing to an offline CPU. + +Fixes: f0383c24b485 ("genirq/cpuhotplug: Add support for cleaning up move in progress") +Signed-off-by: Dongli Zhang +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240522220218.162423-1-dongli.zhang@oracle.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/apic/vector.c | 9 ++++++--- + kernel/irq/cpuhotplug.c | 16 ++++++++-------- + 2 files changed, 14 insertions(+), 11 deletions(-) + +--- a/arch/x86/kernel/apic/vector.c ++++ b/arch/x86/kernel/apic/vector.c +@@ -982,7 +982,8 @@ static void __send_cleanup_vector(struct + hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu)); + apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR); + } else { +- apicd->prev_vector = 0; ++ pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu); ++ free_moved_vector(apicd); + } + raw_spin_unlock(&vector_lock); + } +@@ -1019,6 +1020,7 @@ void irq_complete_move(struct irq_cfg *c + */ + void irq_force_complete_move(struct irq_desc *desc) + { ++ unsigned int cpu = smp_processor_id(); + struct apic_chip_data *apicd; + struct irq_data *irqd; + unsigned int vector; +@@ -1043,10 +1045,11 @@ void irq_force_complete_move(struct irq_ + goto unlock; + + /* +- * If prev_vector is empty, no action required. ++ * If prev_vector is empty or the descriptor is neither currently ++ * nor previously on the outgoing CPU no action required. + */ + vector = apicd->prev_vector; +- if (!vector) ++ if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) + goto unlock; + + /* +--- a/kernel/irq/cpuhotplug.c ++++ b/kernel/irq/cpuhotplug.c +@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_d + } + + /* ++ * Complete an eventually pending irq move cleanup. If this ++ * interrupt was moved in hard irq context, then the vectors need ++ * to be cleaned up. It can't wait until this interrupt actually ++ * happens and this CPU was involved. ++ */ ++ irq_force_complete_move(desc); ++ ++ /* + * No move required, if: + * - Interrupt is per cpu + * - Interrupt is not started +@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_d + } + + /* +- * Complete an eventually pending irq move cleanup. If this +- * interrupt was moved in hard irq context, then the vectors need +- * to be cleaned up. It can't wait until this interrupt actually +- * happens and this CPU was involved. +- */ +- irq_force_complete_move(desc); +- +- /* + * If there is a setaffinity pending, then try to reuse the pending + * mask, so the last change of the affinity does not get lost. If + * there is no move pending or the pending mask does not contain diff --git a/queue-6.1/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch b/queue-6.1/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch new file mode 100644 index 00000000000..39d8bdc0435 --- /dev/null +++ b/queue-6.1/kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch @@ -0,0 +1,74 @@ +From 6f5c9600621b4efb5c61b482d767432eb1ad3a9c Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Wed, 13 Mar 2024 13:58:42 +0100 +Subject: KVM: x86: Don't advertise guest.MAXPHYADDR as host.MAXPHYADDR in CPUID + +From: Gerd Hoffmann + +commit 6f5c9600621b4efb5c61b482d767432eb1ad3a9c upstream. + +Drop KVM's propagation of GuestPhysBits (CPUID leaf 80000008, EAX[23:16]) +to HostPhysBits (same leaf, EAX[7:0]) when advertising the address widths +to userspace via KVM_GET_SUPPORTED_CPUID. + +Per AMD, GuestPhysBits is intended for software use, and physical CPUs do +not set that field. I.e. GuestPhysBits will be non-zero if and only if +KVM is running as a nested hypervisor, and in that case, GuestPhysBits is +NOT guaranteed to capture the CPU's effective MAXPHYADDR when running with +TDP enabled. + +E.g. KVM will soon use GuestPhysBits to communicate the CPU's maximum +*addressable* guest physical address, which would result in KVM under- +reporting PhysBits when running as an L1 on a CPU with MAXPHYADDR=52, +but without 5-level paging. + +Signed-off-by: Gerd Hoffmann +Cc: stable@vger.kernel.org +Reviewed-by: Xiaoyao Li +Link: https://lore.kernel.org/r/20240313125844.912415-2-kraxel@redhat.com +[sean: rewrite changelog with --verbose, Cc stable@] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -1157,9 +1157,8 @@ static inline int __do_cpuid_func(struct + entry->eax = entry->ebx = entry->ecx = 0; + break; + case 0x80000008: { +- unsigned g_phys_as = (entry->eax >> 16) & 0xff; +- unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); +- unsigned phys_as = entry->eax & 0xff; ++ unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U); ++ unsigned int phys_as; + + /* + * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as +@@ -1167,16 +1166,16 @@ static inline int __do_cpuid_func(struct + * reductions in MAXPHYADDR for memory encryption affect shadow + * paging, too. + * +- * If TDP is enabled but an explicit guest MAXPHYADDR is not +- * provided, use the raw bare metal MAXPHYADDR as reductions to +- * the HPAs do not affect GPAs. ++ * If TDP is enabled, use the raw bare metal MAXPHYADDR as ++ * reductions to the HPAs do not affect GPAs. + */ +- if (!tdp_enabled) +- g_phys_as = boot_cpu_data.x86_phys_bits; +- else if (!g_phys_as) +- g_phys_as = phys_as; ++ if (!tdp_enabled) { ++ phys_as = boot_cpu_data.x86_phys_bits; ++ } else { ++ phys_as = entry->eax & 0xff; ++ } + +- entry->eax = g_phys_as | (virt_as << 8); ++ entry->eax = phys_as | (virt_as << 8); + entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8)); + entry->edx = 0; + cpuid_entry_override(entry, CPUID_8000_0008_EBX); diff --git a/queue-6.1/series b/queue-6.1/series index 63bb48a57f0..81204eab3d8 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -44,6 +44,10 @@ regulator-vqmmc-ipq4019-fix-module-autoloading.patch asoc-rt715-add-vendor-clear-control-register.patch asoc-rt715-sdca-volume-step-modification.patch kvm-selftests-add-test-for-uaccesses-to-non-existent.patch +x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch +efi-libstub-only-free-priv.runtime_map-when-allocated.patch +kvm-x86-don-t-advertise-guest.maxphyaddr-as-host.maxphyaddr-in-cpuid.patch +genirq-cpuhotplug-x86-vector-prevent-vector-leak-during-cpu-offline.patch fpga-dfl-pci-add-pci-subdevice-id-for-intel-d5005-ca.patch softirq-fix-suspicious-rcu-usage-in-__do_softirq.patch asoc-da7219-aad-fix-usage-of-device_get_named_child_.patch diff --git a/queue-6.1/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch b/queue-6.1/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch new file mode 100644 index 00000000000..8d799288e86 --- /dev/null +++ b/queue-6.1/x86-efistub-omit-physical-kaslr-when-memory-reservations-exist.patch @@ -0,0 +1,107 @@ +From 15aa8fb852f995dd234a57f12dfb989044968bb6 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Thu, 16 May 2024 11:05:42 +0200 +Subject: x86/efistub: Omit physical KASLR when memory reservations exist + +From: Ard Biesheuvel + +commit 15aa8fb852f995dd234a57f12dfb989044968bb6 upstream. + +The legacy decompressor has elaborate logic to ensure that the +randomized physical placement of the decompressed kernel image does not +conflict with any memory reservations, including ones specified on the +command line using mem=, memmap=, efi_fake_mem= or hugepages=, which are +taken into account by the kernel proper at a later stage. + +When booting in EFI mode, it is the firmware's job to ensure that the +chosen range does not conflict with any memory reservations that it +knows about, and this is trivially achieved by using the firmware's +memory allocation APIs. + +That leaves reservations specified on the command line, though, which +the firmware knows nothing about, as these regions have no other special +significance to the platform. Since commit + + a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") + +these reservations are not taken into account when randomizing the +physical placement, which may result in conflicts where the memory +cannot be reserved by the kernel proper because its own executable image +resides there. + +To avoid having to duplicate or reuse the existing complicated logic, +disable physical KASLR entirely when such overrides are specified. These +are mostly diagnostic tools or niche features, and physical KASLR (as +opposed to virtual KASLR, which is much more important as it affects the +memory addresses observed by code executing in the kernel) is something +we can live without. + +Closes: https://lkml.kernel.org/r/FA5F6719-8824-4B04-803E-82990E65E627%40akamai.com +Reported-by: Ben Chaney +Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") +Cc: # v6.1+ +Reviewed-by: Kees Cook +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/efi/libstub/x86-stub.c | 28 ++++++++++++++++++++++++++-- + 1 file changed, 26 insertions(+), 2 deletions(-) + +--- a/drivers/firmware/efi/libstub/x86-stub.c ++++ b/drivers/firmware/efi/libstub/x86-stub.c +@@ -736,6 +736,26 @@ static void error(char *str) + efi_warn("Decompression failed: %s\n", str); + } + ++static const char *cmdline_memmap_override; ++ ++static efi_status_t parse_options(const char *cmdline) ++{ ++ static const char opts[][14] = { ++ "mem=", "memmap=", "efi_fake_mem=", "hugepages=" ++ }; ++ ++ for (int i = 0; i < ARRAY_SIZE(opts); i++) { ++ const char *p = strstr(cmdline, opts[i]); ++ ++ if (p == cmdline || (p > cmdline && isspace(p[-1]))) { ++ cmdline_memmap_override = opts[i]; ++ break; ++ } ++ } ++ ++ return efi_parse_options(cmdline); ++} ++ + static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) + { + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; +@@ -767,6 +787,10 @@ static efi_status_t efi_decompress_kerne + !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) { + efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); + seed[0] = 0; ++ } else if (cmdline_memmap_override) { ++ efi_info("%s detected on the kernel command line - disabling physical KASLR\n", ++ cmdline_memmap_override); ++ seed[0] = 0; + } + + boot_params_ptr->hdr.loadflags |= KASLR_FLAG; +@@ -843,7 +867,7 @@ void __noreturn efi_stub_entry(efi_handl + } + + #ifdef CONFIG_CMDLINE_BOOL +- status = efi_parse_options(CONFIG_CMDLINE); ++ status = parse_options(CONFIG_CMDLINE); + if (status != EFI_SUCCESS) { + efi_err("Failed to parse options\n"); + goto fail; +@@ -852,7 +876,7 @@ void __noreturn efi_stub_entry(efi_handl + if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) { + unsigned long cmdline_paddr = ((u64)hdr->cmd_line_ptr | + ((u64)boot_params->ext_cmd_line_ptr << 32)); +- status = efi_parse_options((char *)cmdline_paddr); ++ status = parse_options((char *)cmdline_paddr); + if (status != EFI_SUCCESS) { + efi_err("Failed to parse options\n"); + goto fail; -- 2.47.3