From db147a94bd62255aab1791b5efa62f84ca57223a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 8 Sep 2024 12:32:28 +0200 Subject: [PATCH] 6.1-stable patches added patches: ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch ksmbd-unset-the-binding-mark-of-a-reused-connection.patch perf-x86-intel-limit-the-period-on-haswell.patch rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch x86-tdx-fix-data-leak-in-mmio_read.patch --- ...eak-for-error-path-in-ata_host_alloc.patch | 41 ++++ ...-fix-refcount-leak-in-gicv2m_of_init.patch | 44 ++++ ...nlock-on-in-ksmbd_tcp_set_interfaces.patch | 35 +++ ...-binding-mark-of-a-reused-connection.patch | 90 +++++++ ...86-intel-limit-the-period-on-haswell.patch | 112 +++++++++ ...rt_mutex-wait_lock-before-scheduling.patch | 63 +++++ queue-6.1/series | 8 + ...of-the-physical-memory-address-space.patch | 226 ++++++++++++++++++ .../x86-tdx-fix-data-leak-in-mmio_read.patch | 43 ++++ 9 files changed, 662 insertions(+) create mode 100644 queue-6.1/ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch create mode 100644 queue-6.1/irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch create mode 100644 queue-6.1/ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch create mode 100644 queue-6.1/ksmbd-unset-the-binding-mark-of-a-reused-connection.patch create mode 100644 queue-6.1/perf-x86-intel-limit-the-period-on-haswell.patch create mode 100644 queue-6.1/rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch create mode 100644 queue-6.1/x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch create mode 100644 queue-6.1/x86-tdx-fix-data-leak-in-mmio_read.patch diff --git a/queue-6.1/ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch b/queue-6.1/ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch new file mode 100644 index 00000000000..b669696b35b --- /dev/null +++ b/queue-6.1/ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch @@ -0,0 +1,41 @@ +From 284b75a3d83c7631586d98f6dede1d90f128f0db Mon Sep 17 00:00:00 2001 +From: Zheng Qixing +Date: Thu, 22 Aug 2024 11:30:50 +0800 +Subject: ata: libata: Fix memory leak for error path in ata_host_alloc() + +From: Zheng Qixing + +commit 284b75a3d83c7631586d98f6dede1d90f128f0db upstream. + +In ata_host_alloc(), if devres_alloc() fails to allocate the device host +resource data pointer, the already allocated ata_host structure is not +freed before returning from the function. This results in a potential +memory leak. + +Call kfree(host) before jumping to the error handling path to ensure +that the ata_host structure is properly freed if devres_alloc() fails. + +Fixes: 2623c7a5f279 ("libata: add refcounting to ata_host") +Cc: stable@vger.kernel.org +Signed-off-by: Zheng Qixing +Reviewed-by: Yu Kuai +Signed-off-by: Damien Le Moal +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ata/libata-core.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -5532,8 +5532,10 @@ struct ata_host *ata_host_alloc(struct d + } + + dr = devres_alloc(ata_devres_release, 0, GFP_KERNEL); +- if (!dr) ++ if (!dr) { ++ kfree(host); + goto err_out; ++ } + + devres_add(dev, dr); + dev_set_drvdata(dev, host); diff --git a/queue-6.1/irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch b/queue-6.1/irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch new file mode 100644 index 00000000000..e0d99ca5c6c --- /dev/null +++ b/queue-6.1/irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch @@ -0,0 +1,44 @@ +From c5af2c90ba5629f0424a8d315f75fb8d91713c3c Mon Sep 17 00:00:00 2001 +From: Ma Ke +Date: Tue, 20 Aug 2024 17:28:43 +0800 +Subject: irqchip/gic-v2m: Fix refcount leak in gicv2m_of_init() + +From: Ma Ke + +commit c5af2c90ba5629f0424a8d315f75fb8d91713c3c upstream. + +gicv2m_of_init() fails to perform an of_node_put() when +of_address_to_resource() fails, leading to a refcount leak. + +Address this by moving the error handling path outside of the loop and +making it common to all failure modes. + +Fixes: 4266ab1a8ff5 ("irqchip/gic-v2m: Refactor to prepare for ACPI support") +Signed-off-by: Ma Ke +Signed-off-by: Thomas Gleixner +Reviewed-by: Marc Zyngier +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240820092843.1219933-1-make24@iscas.ac.cn +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-gic-v2m.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/irqchip/irq-gic-v2m.c ++++ b/drivers/irqchip/irq-gic-v2m.c +@@ -438,12 +438,12 @@ static int __init gicv2m_of_init(struct + + ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, + &res, 0); +- if (ret) { +- of_node_put(child); ++ if (ret) + break; +- } + } + ++ if (ret && child) ++ of_node_put(child); + if (!ret) + ret = gicv2m_allocate_domains(parent); + if (ret) diff --git a/queue-6.1/ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch b/queue-6.1/ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch new file mode 100644 index 00000000000..086b07527ba --- /dev/null +++ b/queue-6.1/ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch @@ -0,0 +1,35 @@ +From 844436e045ac2ab7895d8b281cb784a24de1d14d Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Thu, 29 Aug 2024 22:22:35 +0300 +Subject: ksmbd: Unlock on in ksmbd_tcp_set_interfaces() + +From: Dan Carpenter + +commit 844436e045ac2ab7895d8b281cb784a24de1d14d upstream. + +Unlock before returning an error code if this allocation fails. + +Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") +Cc: stable@vger.kernel.org # v5.15+ +Signed-off-by: Dan Carpenter +Acked-by: Namjae Jeon +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/server/transport_tcp.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/smb/server/transport_tcp.c ++++ b/fs/smb/server/transport_tcp.c +@@ -622,8 +622,10 @@ int ksmbd_tcp_set_interfaces(char *ifc_l + for_each_netdev(&init_net, netdev) { + if (netif_is_bridge_port(netdev)) + continue; +- if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) ++ if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) { ++ rtnl_unlock(); + return -ENOMEM; ++ } + } + rtnl_unlock(); + bind_additional_ifaces = 1; diff --git a/queue-6.1/ksmbd-unset-the-binding-mark-of-a-reused-connection.patch b/queue-6.1/ksmbd-unset-the-binding-mark-of-a-reused-connection.patch new file mode 100644 index 00000000000..c060fce5fc6 --- /dev/null +++ b/queue-6.1/ksmbd-unset-the-binding-mark-of-a-reused-connection.patch @@ -0,0 +1,90 @@ +From 78c5a6f1f630172b19af4912e755e1da93ef0ab5 Mon Sep 17 00:00:00 2001 +From: Namjae Jeon +Date: Tue, 27 Aug 2024 21:44:41 +0900 +Subject: ksmbd: unset the binding mark of a reused connection + +From: Namjae Jeon + +commit 78c5a6f1f630172b19af4912e755e1da93ef0ab5 upstream. + +Steve French reported null pointer dereference error from sha256 lib. +cifs.ko can send session setup requests on reused connection. +If reused connection is used for binding session, conn->binding can +still remain true and generate_preauth_hash() will not set +sess->Preauth_HashValue and it will be NULL. +It is used as a material to create an encryption key in +ksmbd_gen_smb311_encryptionkey. ->Preauth_HashValue cause null pointer +dereference error from crypto_shash_update(). + +BUG: kernel NULL pointer dereference, address: 0000000000000000 +#PF: supervisor read access in kernel mode +#PF: error_code(0x0000) - not-present page +PGD 0 P4D 0 +Oops: 0000 [#1] PREEMPT SMP PTI +CPU: 8 PID: 429254 Comm: kworker/8:39 +Hardware name: LENOVO 20MAS08500/20MAS08500, BIOS N2CET69W (1.52 ) +Workqueue: ksmbd-io handle_ksmbd_work [ksmbd] +RIP: 0010:lib_sha256_base_do_update.isra.0+0x11e/0x1d0 [sha256_ssse3] + +? show_regs+0x6d/0x80 +? __die+0x24/0x80 +? page_fault_oops+0x99/0x1b0 +? do_user_addr_fault+0x2ee/0x6b0 +? exc_page_fault+0x83/0x1b0 +? asm_exc_page_fault+0x27/0x30 +? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3] +? lib_sha256_base_do_update.isra.0+0x11e/0x1d0 [sha256_ssse3] +? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3] +? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3] +_sha256_update+0x77/0xa0 [sha256_ssse3] +sha256_avx2_update+0x15/0x30 [sha256_ssse3] +crypto_shash_update+0x1e/0x40 +hmac_update+0x12/0x20 +crypto_shash_update+0x1e/0x40 +generate_key+0x234/0x380 [ksmbd] +generate_smb3encryptionkey+0x40/0x1c0 [ksmbd] +ksmbd_gen_smb311_encryptionkey+0x72/0xa0 [ksmbd] +ntlm_authenticate.isra.0+0x423/0x5d0 [ksmbd] +smb2_sess_setup+0x952/0xaa0 [ksmbd] +__process_request+0xa3/0x1d0 [ksmbd] +__handle_ksmbd_work+0x1c4/0x2f0 [ksmbd] +handle_ksmbd_work+0x2d/0xa0 [ksmbd] +process_one_work+0x16c/0x350 +worker_thread+0x306/0x440 +? __pfx_worker_thread+0x10/0x10 +kthread+0xef/0x120 +? __pfx_kthread+0x10/0x10 +ret_from_fork+0x44/0x70 +? __pfx_kthread+0x10/0x10 +ret_from_fork_asm+0x1b/0x30 + + +Fixes: f5a544e3bab7 ("ksmbd: add support for SMB3 multichannel") +Cc: stable@vger.kernel.org # v5.15+ +Signed-off-by: Namjae Jeon +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/server/smb2pdu.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/smb/server/smb2pdu.c ++++ b/fs/smb/server/smb2pdu.c +@@ -1703,6 +1703,8 @@ int smb2_sess_setup(struct ksmbd_work *w + rc = ksmbd_session_register(conn, sess); + if (rc) + goto out_err; ++ ++ conn->binding = false; + } else if (conn->dialect >= SMB30_PROT_ID && + (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) && + req->Flags & SMB2_SESSION_REQ_FLAG_BINDING) { +@@ -1781,6 +1783,8 @@ int smb2_sess_setup(struct ksmbd_work *w + sess = NULL; + goto out_err; + } ++ ++ conn->binding = false; + } + work->sess = sess; + diff --git a/queue-6.1/perf-x86-intel-limit-the-period-on-haswell.patch b/queue-6.1/perf-x86-intel-limit-the-period-on-haswell.patch new file mode 100644 index 00000000000..eb4543297dd --- /dev/null +++ b/queue-6.1/perf-x86-intel-limit-the-period-on-haswell.patch @@ -0,0 +1,112 @@ +From 25dfc9e357af8aed1ca79b318a73f2c59c1f0b2b Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Mon, 19 Aug 2024 11:30:04 -0700 +Subject: perf/x86/intel: Limit the period on Haswell + +From: Kan Liang + +commit 25dfc9e357af8aed1ca79b318a73f2c59c1f0b2b upstream. + +Running the ltp test cve-2015-3290 concurrently reports the following +warnings. + +perfevents: irq loop stuck! + WARNING: CPU: 31 PID: 32438 at arch/x86/events/intel/core.c:3174 + intel_pmu_handle_irq+0x285/0x370 + Call Trace: + + ? __warn+0xa4/0x220 + ? intel_pmu_handle_irq+0x285/0x370 + ? __report_bug+0x123/0x130 + ? intel_pmu_handle_irq+0x285/0x370 + ? __report_bug+0x123/0x130 + ? intel_pmu_handle_irq+0x285/0x370 + ? report_bug+0x3e/0xa0 + ? handle_bug+0x3c/0x70 + ? exc_invalid_op+0x18/0x50 + ? asm_exc_invalid_op+0x1a/0x20 + ? irq_work_claim+0x1e/0x40 + ? intel_pmu_handle_irq+0x285/0x370 + perf_event_nmi_handler+0x3d/0x60 + nmi_handle+0x104/0x330 + +Thanks to Thomas Gleixner's analysis, the issue is caused by the low +initial period (1) of the frequency estimation algorithm, which triggers +the defects of the HW, specifically erratum HSW11 and HSW143. (For the +details, please refer https://lore.kernel.org/lkml/87plq9l5d2.ffs@tglx/) + +The HSW11 requires a period larger than 100 for the INST_RETIRED.ALL +event, but the initial period in the freq mode is 1. The erratum is the +same as the BDM11, which has been supported in the kernel. A minimum +period of 128 is enforced as well on HSW. + +HSW143 is regarding that the fixed counter 1 may overcount 32 with the +Hyper-Threading is enabled. However, based on the test, the hardware +has more issues than it tells. Besides the fixed counter 1, the message +'interrupt took too long' can be observed on any counter which was armed +with a period < 32 and two events expired in the same NMI. A minimum +period of 32 is enforced for the rest of the events. +The recommended workaround code of the HSW143 is not implemented. +Because it only addresses the issue for the fixed counter. It brings +extra overhead through extra MSR writing. No related overcounting issue +has been reported so far. + +Fixes: 3a632cb229bf ("perf/x86/intel: Add simple Haswell PMU support") +Reported-by: Li Huafei +Suggested-by: Thomas Gleixner +Signed-off-by: Kan Liang +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240819183004.3132920-1-kan.liang@linux.intel.com +Closes: https://lore.kernel.org/lkml/20240729223328.327835-1-lihuafei1@huawei.com/ +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/core.c | 23 +++++++++++++++++++++-- + 1 file changed, 21 insertions(+), 2 deletions(-) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -4352,6 +4352,25 @@ static u8 adl_get_hybrid_cpu_type(void) + return hybrid_big; + } + ++static inline bool erratum_hsw11(struct perf_event *event) ++{ ++ return (event->hw.config & INTEL_ARCH_EVENT_MASK) == ++ X86_CONFIG(.event=0xc0, .umask=0x01); ++} ++ ++/* ++ * The HSW11 requires a period larger than 100 which is the same as the BDM11. ++ * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL. ++ * ++ * The message 'interrupt took too long' can be observed on any counter which ++ * was armed with a period < 32 and two events expired in the same NMI. ++ * A minimum period of 32 is enforced for the rest of the events. ++ */ ++static void hsw_limit_period(struct perf_event *event, s64 *left) ++{ ++ *left = max(*left, erratum_hsw11(event) ? 128 : 32); ++} ++ + /* + * Broadwell: + * +@@ -4369,8 +4388,7 @@ static u8 adl_get_hybrid_cpu_type(void) + */ + static void bdw_limit_period(struct perf_event *event, s64 *left) + { +- if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == +- X86_CONFIG(.event=0xc0, .umask=0x01)) { ++ if (erratum_hsw11(event)) { + if (*left < 128) + *left = 128; + *left &= ~0x3fULL; +@@ -6180,6 +6198,7 @@ __init int intel_pmu_init(void) + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = hsw_get_event_constraints; ++ x86_pmu.limit_period = hsw_limit_period; + x86_pmu.lbr_double_abort = true; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; diff --git a/queue-6.1/rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch b/queue-6.1/rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch new file mode 100644 index 00000000000..49c7c3d2f1c --- /dev/null +++ b/queue-6.1/rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch @@ -0,0 +1,63 @@ +From d33d26036a0274b472299d7dcdaa5fb34329f91b Mon Sep 17 00:00:00 2001 +From: Roland Xu +Date: Thu, 15 Aug 2024 10:58:13 +0800 +Subject: rtmutex: Drop rt_mutex::wait_lock before scheduling + +From: Roland Xu + +commit d33d26036a0274b472299d7dcdaa5fb34329f91b upstream. + +rt_mutex_handle_deadlock() is called with rt_mutex::wait_lock held. In the +good case it returns with the lock held and in the deadlock case it emits a +warning and goes into an endless scheduling loop with the lock held, which +triggers the 'scheduling in atomic' warning. + +Unlock rt_mutex::wait_lock in the dead lock case before issuing the warning +and dropping into the schedule for ever loop. + +[ tglx: Moved unlock before the WARN(), removed the pointless comment, + massaged changelog, added Fixes tag ] + +Fixes: 3d5c9340d194 ("rtmutex: Handle deadlock detection smarter") +Signed-off-by: Roland Xu +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/ME0P300MB063599BEF0743B8FA339C2CECC802@ME0P300MB0635.AUSP300.PROD.OUTLOOK.COM +Signed-off-by: Greg Kroah-Hartman +--- + kernel/locking/rtmutex.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1624,6 +1624,7 @@ static int __sched rt_mutex_slowlock_blo + } + + static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, ++ struct rt_mutex_base *lock, + struct rt_mutex_waiter *w) + { + /* +@@ -1636,10 +1637,10 @@ static void __sched rt_mutex_handle_dead + if (build_ww_mutex() && w->ww_ctx) + return; + +- /* +- * Yell loudly and stop the task right here. +- */ ++ raw_spin_unlock_irq(&lock->wait_lock); ++ + WARN(1, "rtmutex deadlock detected\n"); ++ + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); +@@ -1693,7 +1694,7 @@ static int __sched __rt_mutex_slowlock(s + } else { + __set_current_state(TASK_RUNNING); + remove_waiter(lock, waiter); +- rt_mutex_handle_deadlock(ret, chwalk, waiter); ++ rt_mutex_handle_deadlock(ret, chwalk, lock, waiter); + } + + /* diff --git a/queue-6.1/series b/queue-6.1/series index e8a0497f114..f919ab86137 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -6,3 +6,11 @@ kvm-svm-don-t-advertise-bus-lock-detect-to-guest-if-svm-support-is-missing.patch alsa-hda-conexant-add-pincfg-quirk-to-enable-top-speakers-on-sirius-devices.patch alsa-hda-realtek-add-patch-for-internal-mic-in-lenovo-v145.patch alsa-hda-realtek-support-mute-led-on-hp-laptop-14-dq2xxx.patch +ksmbd-unset-the-binding-mark-of-a-reused-connection.patch +ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch +ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch +x86-tdx-fix-data-leak-in-mmio_read.patch +perf-x86-intel-limit-the-period-on-haswell.patch +irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch +x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch +rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch diff --git a/queue-6.1/x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch b/queue-6.1/x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch new file mode 100644 index 00000000000..11b272a8104 --- /dev/null +++ b/queue-6.1/x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch @@ -0,0 +1,226 @@ +From ea72ce5da22806d5713f3ffb39a6d5ae73841f93 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Wed, 14 Aug 2024 00:29:36 +0200 +Subject: x86/kaslr: Expose and use the end of the physical memory address space + +From: Thomas Gleixner + +commit ea72ce5da22806d5713f3ffb39a6d5ae73841f93 upstream. + +iounmap() on x86 occasionally fails to unmap because the provided valid +ioremap address is not below high_memory. It turned out that this +happens due to KASLR. + +KASLR uses the full address space between PAGE_OFFSET and vaddr_end to +randomize the starting points of the direct map, vmalloc and vmemmap +regions. It thereby limits the size of the direct map by using the +installed memory size plus an extra configurable margin for hot-plug +memory. This limitation is done to gain more randomization space +because otherwise only the holes between the direct map, vmalloc, +vmemmap and vaddr_end would be usable for randomizing. + +The limited direct map size is not exposed to the rest of the kernel, so +the memory hot-plug and resource management related code paths still +operate under the assumption that the available address space can be +determined with MAX_PHYSMEM_BITS. + +request_free_mem_region() allocates from (1 << MAX_PHYSMEM_BITS) - 1 +downwards. That means the first allocation happens past the end of the +direct map and if unlucky this address is in the vmalloc space, which +causes high_memory to become greater than VMALLOC_START and consequently +causes iounmap() to fail for valid ioremap addresses. + +MAX_PHYSMEM_BITS cannot be changed for that because the randomization +does not align with address bit boundaries and there are other places +which actually require to know the maximum number of address bits. All +remaining usage sites of MAX_PHYSMEM_BITS have been analyzed and found +to be correct. + +Cure this by exposing the end of the direct map via PHYSMEM_END and use +that for the memory hot-plug and resource management related places +instead of relying on MAX_PHYSMEM_BITS. In the KASLR case PHYSMEM_END +maps to a variable which is initialized by the KASLR initialization and +otherwise it is based on MAX_PHYSMEM_BITS as before. + +To prevent future hickups add a check into add_pages() to catch callers +trying to add memory above PHYSMEM_END. + +Fixes: 0483e1fa6e09 ("x86/mm: Implement ASLR for kernel memory regions") +Reported-by: Max Ramanouski +Reported-by: Alistair Popple +Signed-off-by: Thomas Gleixner +Tested-By: Max Ramanouski +Tested-by: Alistair Popple +Reviewed-by: Dan Williams +Reviewed-by: Alistair Popple +Reviewed-by: Kees Cook +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/87ed6soy3z.ffs@tglx +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/page_64.h | 1 + + arch/x86/include/asm/pgtable_64_types.h | 4 ++++ + arch/x86/mm/init_64.c | 4 ++++ + arch/x86/mm/kaslr.c | 32 ++++++++++++++++++++++++++------ + include/linux/mm.h | 4 ++++ + kernel/resource.c | 6 ++---- + mm/memory_hotplug.c | 2 +- + mm/sparse.c | 2 +- + 8 files changed, 43 insertions(+), 12 deletions(-) + +--- a/arch/x86/include/asm/page_64.h ++++ b/arch/x86/include/asm/page_64.h +@@ -17,6 +17,7 @@ extern unsigned long phys_base; + extern unsigned long page_offset_base; + extern unsigned long vmalloc_base; + extern unsigned long vmemmap_base; ++extern unsigned long physmem_end; + + static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) + { +--- a/arch/x86/include/asm/pgtable_64_types.h ++++ b/arch/x86/include/asm/pgtable_64_types.h +@@ -139,6 +139,10 @@ extern unsigned int ptrs_per_p4d; + # define VMEMMAP_START __VMEMMAP_BASE_L4 + #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ + ++#ifdef CONFIG_RANDOMIZE_MEMORY ++# define PHYSMEM_END physmem_end ++#endif ++ + /* + * End of the region for which vmalloc page tables are pre-allocated. + * For non-KMSAN builds, this is the same as VMALLOC_END. +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -950,8 +950,12 @@ static void update_end_of_memory_vars(u6 + int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct mhp_params *params) + { ++ unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1; + int ret; + ++ if (WARN_ON_ONCE(end > PHYSMEM_END)) ++ return -ERANGE; ++ + ret = __add_pages(nid, start_pfn, nr_pages, params); + WARN_ON_ONCE(ret); + +--- a/arch/x86/mm/kaslr.c ++++ b/arch/x86/mm/kaslr.c +@@ -47,13 +47,24 @@ static const unsigned long vaddr_end = C + */ + static __initdata struct kaslr_memory_region { + unsigned long *base; ++ unsigned long *end; + unsigned long size_tb; + } kaslr_regions[] = { +- { &page_offset_base, 0 }, +- { &vmalloc_base, 0 }, +- { &vmemmap_base, 0 }, ++ { ++ .base = &page_offset_base, ++ .end = &physmem_end, ++ }, ++ { ++ .base = &vmalloc_base, ++ }, ++ { ++ .base = &vmemmap_base, ++ }, + }; + ++/* The end of the possible address space for physical memory */ ++unsigned long physmem_end __ro_after_init; ++ + /* Get size in bytes used by the memory region */ + static inline unsigned long get_padding(struct kaslr_memory_region *region) + { +@@ -82,6 +93,8 @@ void __init kernel_randomize_memory(void + BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE); + BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); + ++ /* Preset the end of the possible address space for physical memory */ ++ physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1); + if (!kaslr_memory_enabled()) + return; + +@@ -128,11 +141,18 @@ void __init kernel_randomize_memory(void + vaddr += entropy; + *kaslr_regions[i].base = vaddr; + ++ /* Calculate the end of the region */ ++ vaddr += get_padding(&kaslr_regions[i]); + /* +- * Jump the region and add a minimum padding based on +- * randomization alignment. ++ * KASLR trims the maximum possible size of the ++ * direct-map. Update the physmem_end boundary. ++ * No rounding required as the region starts ++ * PUD aligned and size is in units of TB. + */ +- vaddr += get_padding(&kaslr_regions[i]); ++ if (kaslr_regions[i].end) ++ *kaslr_regions[i].end = __pa_nodebug(vaddr - 1); ++ ++ /* Add a minimum padding based on randomization alignment. */ + vaddr = round_up(vaddr + 1, PUD_SIZE); + remain_entropy -= entropy; + } +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -92,6 +92,10 @@ extern const int mmap_rnd_compat_bits_ma + extern int mmap_rnd_compat_bits __read_mostly; + #endif + ++#ifndef PHYSMEM_END ++# define PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1) ++#endif ++ + #include + #include + +--- a/kernel/resource.c ++++ b/kernel/resource.c +@@ -1781,8 +1781,7 @@ static resource_size_t gfr_start(struct + if (flags & GFR_DESCENDING) { + resource_size_t end; + +- end = min_t(resource_size_t, base->end, +- (1ULL << MAX_PHYSMEM_BITS) - 1); ++ end = min_t(resource_size_t, base->end, PHYSMEM_END); + return end - size + 1; + } + +@@ -1799,8 +1798,7 @@ static bool gfr_continue(struct resource + * @size did not wrap 0. + */ + return addr > addr - size && +- addr <= min_t(resource_size_t, base->end, +- (1ULL << MAX_PHYSMEM_BITS) - 1); ++ addr <= min_t(resource_size_t, base->end, PHYSMEM_END); + } + + static resource_size_t gfr_next(resource_size_t addr, resource_size_t size, +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1530,7 +1530,7 @@ struct range __weak arch_get_mappable_ra + + struct range mhp_get_pluggable_range(bool need_mapping) + { +- const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1; ++ const u64 max_phys = PHYSMEM_END; + struct range mhp_range; + + if (need_mapping) { +--- a/mm/sparse.c ++++ b/mm/sparse.c +@@ -129,7 +129,7 @@ static inline int sparse_early_nid(struc + static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, + unsigned long *end_pfn) + { +- unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); ++ unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT; + + /* + * Sanity checks - do not allow an architecture to pass diff --git a/queue-6.1/x86-tdx-fix-data-leak-in-mmio_read.patch b/queue-6.1/x86-tdx-fix-data-leak-in-mmio_read.patch new file mode 100644 index 00000000000..9dd43b8698c --- /dev/null +++ b/queue-6.1/x86-tdx-fix-data-leak-in-mmio_read.patch @@ -0,0 +1,43 @@ +From b6fb565a2d15277896583d471b21bc14a0c99661 Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Mon, 26 Aug 2024 15:53:04 +0300 +Subject: x86/tdx: Fix data leak in mmio_read() + +From: Kirill A. Shutemov + +commit b6fb565a2d15277896583d471b21bc14a0c99661 upstream. + +The mmio_read() function makes a TDVMCALL to retrieve MMIO data for an +address from the VMM. + +Sean noticed that mmio_read() unintentionally exposes the value of an +initialized variable (val) on the stack to the VMM. + +This variable is only needed as an output value. It did not need to be +passed to the VMM in the first place. + +Do not send the original value of *val to the VMM. + +[ dhansen: clarify what 'val' is used for. ] + +Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO") +Reported-by: Sean Christopherson +Signed-off-by: Kirill A. Shutemov +Signed-off-by: Dave Hansen +Cc:stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240826125304.1566719-1-kirill.shutemov%40linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/coco/tdx/tdx.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/arch/x86/coco/tdx/tdx.c ++++ b/arch/x86/coco/tdx/tdx.c +@@ -328,7 +328,6 @@ static bool mmio_read(int size, unsigned + .r12 = size, + .r13 = EPT_READ, + .r14 = addr, +- .r15 = *val, + }; + + if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) -- 2.47.3