]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 8 Sep 2024 10:32:28 +0000 (12:32 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 8 Sep 2024 10:32:28 +0000 (12:32 +0200)
added patches:
ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch
irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch
ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch
ksmbd-unset-the-binding-mark-of-a-reused-connection.patch
perf-x86-intel-limit-the-period-on-haswell.patch
rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch
x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch
x86-tdx-fix-data-leak-in-mmio_read.patch

queue-6.1/ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch [new file with mode: 0644]
queue-6.1/irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch [new file with mode: 0644]
queue-6.1/ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch [new file with mode: 0644]
queue-6.1/ksmbd-unset-the-binding-mark-of-a-reused-connection.patch [new file with mode: 0644]
queue-6.1/perf-x86-intel-limit-the-period-on-haswell.patch [new file with mode: 0644]
queue-6.1/rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch [new file with mode: 0644]
queue-6.1/x86-tdx-fix-data-leak-in-mmio_read.patch [new file with mode: 0644]

diff --git a/queue-6.1/ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch b/queue-6.1/ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch
new file mode 100644 (file)
index 0000000..b669696
--- /dev/null
@@ -0,0 +1,41 @@
+From 284b75a3d83c7631586d98f6dede1d90f128f0db Mon Sep 17 00:00:00 2001
+From: Zheng Qixing <zhengqixing@huawei.com>
+Date: Thu, 22 Aug 2024 11:30:50 +0800
+Subject: ata: libata: Fix memory leak for error path in ata_host_alloc()
+
+From: Zheng Qixing <zhengqixing@huawei.com>
+
+commit 284b75a3d83c7631586d98f6dede1d90f128f0db upstream.
+
+In ata_host_alloc(), if devres_alloc() fails to allocate the device host
+resource data pointer, the already allocated ata_host structure is not
+freed before returning from the function. This results in a potential
+memory leak.
+
+Call kfree(host) before jumping to the error handling path to ensure
+that the ata_host structure is properly freed if devres_alloc() fails.
+
+Fixes: 2623c7a5f279 ("libata: add refcounting to ata_host")
+Cc: stable@vger.kernel.org
+Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>
+Reviewed-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-core.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -5532,8 +5532,10 @@ struct ata_host *ata_host_alloc(struct d
+       }
+       dr = devres_alloc(ata_devres_release, 0, GFP_KERNEL);
+-      if (!dr)
++      if (!dr) {
++              kfree(host);
+               goto err_out;
++      }
+       devres_add(dev, dr);
+       dev_set_drvdata(dev, host);
diff --git a/queue-6.1/irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch b/queue-6.1/irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch
new file mode 100644 (file)
index 0000000..e0d99ca
--- /dev/null
@@ -0,0 +1,44 @@
+From c5af2c90ba5629f0424a8d315f75fb8d91713c3c Mon Sep 17 00:00:00 2001
+From: Ma Ke <make24@iscas.ac.cn>
+Date: Tue, 20 Aug 2024 17:28:43 +0800
+Subject: irqchip/gic-v2m: Fix refcount leak in gicv2m_of_init()
+
+From: Ma Ke <make24@iscas.ac.cn>
+
+commit c5af2c90ba5629f0424a8d315f75fb8d91713c3c upstream.
+
+gicv2m_of_init() fails to perform an of_node_put() when
+of_address_to_resource() fails, leading to a refcount leak.
+
+Address this by moving the error handling path outside of the loop and
+making it common to all failure modes.
+
+Fixes: 4266ab1a8ff5 ("irqchip/gic-v2m: Refactor to prepare for ACPI support")
+Signed-off-by: Ma Ke <make24@iscas.ac.cn>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20240820092843.1219933-1-make24@iscas.ac.cn
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/irqchip/irq-gic-v2m.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/irqchip/irq-gic-v2m.c
++++ b/drivers/irqchip/irq-gic-v2m.c
+@@ -438,12 +438,12 @@ static int __init gicv2m_of_init(struct
+               ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis,
+                                     &res, 0);
+-              if (ret) {
+-                      of_node_put(child);
++              if (ret)
+                       break;
+-              }
+       }
++      if (ret && child)
++              of_node_put(child);
+       if (!ret)
+               ret = gicv2m_allocate_domains(parent);
+       if (ret)
diff --git a/queue-6.1/ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch b/queue-6.1/ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch
new file mode 100644 (file)
index 0000000..086b075
--- /dev/null
@@ -0,0 +1,35 @@
+From 844436e045ac2ab7895d8b281cb784a24de1d14d Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Thu, 29 Aug 2024 22:22:35 +0300
+Subject: ksmbd: Unlock on in ksmbd_tcp_set_interfaces()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+commit 844436e045ac2ab7895d8b281cb784a24de1d14d upstream.
+
+Unlock before returning an error code if this allocation fails.
+
+Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers")
+Cc: stable@vger.kernel.org # v5.15+
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/server/transport_tcp.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/smb/server/transport_tcp.c
++++ b/fs/smb/server/transport_tcp.c
+@@ -622,8 +622,10 @@ int ksmbd_tcp_set_interfaces(char *ifc_l
+               for_each_netdev(&init_net, netdev) {
+                       if (netif_is_bridge_port(netdev))
+                               continue;
+-                      if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL)))
++                      if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) {
++                              rtnl_unlock();
+                               return -ENOMEM;
++                      }
+               }
+               rtnl_unlock();
+               bind_additional_ifaces = 1;
diff --git a/queue-6.1/ksmbd-unset-the-binding-mark-of-a-reused-connection.patch b/queue-6.1/ksmbd-unset-the-binding-mark-of-a-reused-connection.patch
new file mode 100644 (file)
index 0000000..c060fce
--- /dev/null
@@ -0,0 +1,90 @@
+From 78c5a6f1f630172b19af4912e755e1da93ef0ab5 Mon Sep 17 00:00:00 2001
+From: Namjae Jeon <linkinjeon@kernel.org>
+Date: Tue, 27 Aug 2024 21:44:41 +0900
+Subject: ksmbd: unset the binding mark of a reused connection
+
+From: Namjae Jeon <linkinjeon@kernel.org>
+
+commit 78c5a6f1f630172b19af4912e755e1da93ef0ab5 upstream.
+
+Steve French reported null pointer dereference error from sha256 lib.
+cifs.ko can send session setup requests on reused connection.
+If reused connection is used for binding session, conn->binding can
+still remain true and generate_preauth_hash() will not set
+sess->Preauth_HashValue and it will be NULL.
+It is used as a material to create an encryption key in
+ksmbd_gen_smb311_encryptionkey. ->Preauth_HashValue cause null pointer
+dereference error from crypto_shash_update().
+
+BUG: kernel NULL pointer dereference, address: 0000000000000000
+#PF: supervisor read access in kernel mode
+#PF: error_code(0x0000) - not-present page
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+CPU: 8 PID: 429254 Comm: kworker/8:39
+Hardware name: LENOVO 20MAS08500/20MAS08500, BIOS N2CET69W (1.52 )
+Workqueue: ksmbd-io handle_ksmbd_work [ksmbd]
+RIP: 0010:lib_sha256_base_do_update.isra.0+0x11e/0x1d0 [sha256_ssse3]
+<TASK>
+? show_regs+0x6d/0x80
+? __die+0x24/0x80
+? page_fault_oops+0x99/0x1b0
+? do_user_addr_fault+0x2ee/0x6b0
+? exc_page_fault+0x83/0x1b0
+? asm_exc_page_fault+0x27/0x30
+? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3]
+? lib_sha256_base_do_update.isra.0+0x11e/0x1d0 [sha256_ssse3]
+? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3]
+? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3]
+_sha256_update+0x77/0xa0 [sha256_ssse3]
+sha256_avx2_update+0x15/0x30 [sha256_ssse3]
+crypto_shash_update+0x1e/0x40
+hmac_update+0x12/0x20
+crypto_shash_update+0x1e/0x40
+generate_key+0x234/0x380 [ksmbd]
+generate_smb3encryptionkey+0x40/0x1c0 [ksmbd]
+ksmbd_gen_smb311_encryptionkey+0x72/0xa0 [ksmbd]
+ntlm_authenticate.isra.0+0x423/0x5d0 [ksmbd]
+smb2_sess_setup+0x952/0xaa0 [ksmbd]
+__process_request+0xa3/0x1d0 [ksmbd]
+__handle_ksmbd_work+0x1c4/0x2f0 [ksmbd]
+handle_ksmbd_work+0x2d/0xa0 [ksmbd]
+process_one_work+0x16c/0x350
+worker_thread+0x306/0x440
+? __pfx_worker_thread+0x10/0x10
+kthread+0xef/0x120
+? __pfx_kthread+0x10/0x10
+ret_from_fork+0x44/0x70
+? __pfx_kthread+0x10/0x10
+ret_from_fork_asm+0x1b/0x30
+</TASK>
+
+Fixes: f5a544e3bab7 ("ksmbd: add support for SMB3 multichannel")
+Cc: stable@vger.kernel.org # v5.15+
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/server/smb2pdu.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/smb/server/smb2pdu.c
++++ b/fs/smb/server/smb2pdu.c
+@@ -1703,6 +1703,8 @@ int smb2_sess_setup(struct ksmbd_work *w
+               rc = ksmbd_session_register(conn, sess);
+               if (rc)
+                       goto out_err;
++
++              conn->binding = false;
+       } else if (conn->dialect >= SMB30_PROT_ID &&
+                  (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) &&
+                  req->Flags & SMB2_SESSION_REQ_FLAG_BINDING) {
+@@ -1781,6 +1783,8 @@ int smb2_sess_setup(struct ksmbd_work *w
+                       sess = NULL;
+                       goto out_err;
+               }
++
++              conn->binding = false;
+       }
+       work->sess = sess;
diff --git a/queue-6.1/perf-x86-intel-limit-the-period-on-haswell.patch b/queue-6.1/perf-x86-intel-limit-the-period-on-haswell.patch
new file mode 100644 (file)
index 0000000..eb45432
--- /dev/null
@@ -0,0 +1,112 @@
+From 25dfc9e357af8aed1ca79b318a73f2c59c1f0b2b Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Mon, 19 Aug 2024 11:30:04 -0700
+Subject: perf/x86/intel: Limit the period on Haswell
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 25dfc9e357af8aed1ca79b318a73f2c59c1f0b2b upstream.
+
+Running the ltp test cve-2015-3290 concurrently reports the following
+warnings.
+
+perfevents: irq loop stuck!
+  WARNING: CPU: 31 PID: 32438 at arch/x86/events/intel/core.c:3174
+  intel_pmu_handle_irq+0x285/0x370
+  Call Trace:
+   <NMI>
+   ? __warn+0xa4/0x220
+   ? intel_pmu_handle_irq+0x285/0x370
+   ? __report_bug+0x123/0x130
+   ? intel_pmu_handle_irq+0x285/0x370
+   ? __report_bug+0x123/0x130
+   ? intel_pmu_handle_irq+0x285/0x370
+   ? report_bug+0x3e/0xa0
+   ? handle_bug+0x3c/0x70
+   ? exc_invalid_op+0x18/0x50
+   ? asm_exc_invalid_op+0x1a/0x20
+   ? irq_work_claim+0x1e/0x40
+   ? intel_pmu_handle_irq+0x285/0x370
+   perf_event_nmi_handler+0x3d/0x60
+   nmi_handle+0x104/0x330
+
+Thanks to Thomas Gleixner's analysis, the issue is caused by the low
+initial period (1) of the frequency estimation algorithm, which triggers
+the defects of the HW, specifically erratum HSW11 and HSW143. (For the
+details, please refer https://lore.kernel.org/lkml/87plq9l5d2.ffs@tglx/)
+
+The HSW11 requires a period larger than 100 for the INST_RETIRED.ALL
+event, but the initial period in the freq mode is 1. The erratum is the
+same as the BDM11, which has been supported in the kernel. A minimum
+period of 128 is enforced as well on HSW.
+
+HSW143 is regarding that the fixed counter 1 may overcount 32 with the
+Hyper-Threading is enabled. However, based on the test, the hardware
+has more issues than it tells. Besides the fixed counter 1, the message
+'interrupt took too long' can be observed on any counter which was armed
+with a period < 32 and two events expired in the same NMI. A minimum
+period of 32 is enforced for the rest of the events.
+The recommended workaround code of the HSW143 is not implemented.
+Because it only addresses the issue for the fixed counter. It brings
+extra overhead through extra MSR writing. No related overcounting issue
+has been reported so far.
+
+Fixes: 3a632cb229bf ("perf/x86/intel: Add simple Haswell PMU support")
+Reported-by: Li Huafei <lihuafei1@huawei.com>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20240819183004.3132920-1-kan.liang@linux.intel.com
+Closes: https://lore.kernel.org/lkml/20240729223328.327835-1-lihuafei1@huawei.com/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/core.c |   23 +++++++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -4352,6 +4352,25 @@ static u8 adl_get_hybrid_cpu_type(void)
+       return hybrid_big;
+ }
++static inline bool erratum_hsw11(struct perf_event *event)
++{
++      return (event->hw.config & INTEL_ARCH_EVENT_MASK) ==
++              X86_CONFIG(.event=0xc0, .umask=0x01);
++}
++
++/*
++ * The HSW11 requires a period larger than 100 which is the same as the BDM11.
++ * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
++ *
++ * The message 'interrupt took too long' can be observed on any counter which
++ * was armed with a period < 32 and two events expired in the same NMI.
++ * A minimum period of 32 is enforced for the rest of the events.
++ */
++static void hsw_limit_period(struct perf_event *event, s64 *left)
++{
++      *left = max(*left, erratum_hsw11(event) ? 128 : 32);
++}
++
+ /*
+  * Broadwell:
+  *
+@@ -4369,8 +4388,7 @@ static u8 adl_get_hybrid_cpu_type(void)
+  */
+ static void bdw_limit_period(struct perf_event *event, s64 *left)
+ {
+-      if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+-                      X86_CONFIG(.event=0xc0, .umask=0x01)) {
++      if (erratum_hsw11(event)) {
+               if (*left < 128)
+                       *left = 128;
+               *left &= ~0x3fULL;
+@@ -6180,6 +6198,7 @@ __init int intel_pmu_init(void)
+               x86_pmu.hw_config = hsw_hw_config;
+               x86_pmu.get_event_constraints = hsw_get_event_constraints;
++              x86_pmu.limit_period = hsw_limit_period;
+               x86_pmu.lbr_double_abort = true;
+               extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+                       hsw_format_attr : nhm_format_attr;
diff --git a/queue-6.1/rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch b/queue-6.1/rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch
new file mode 100644 (file)
index 0000000..49c7c3d
--- /dev/null
@@ -0,0 +1,63 @@
+From d33d26036a0274b472299d7dcdaa5fb34329f91b Mon Sep 17 00:00:00 2001
+From: Roland Xu <mu001999@outlook.com>
+Date: Thu, 15 Aug 2024 10:58:13 +0800
+Subject: rtmutex: Drop rt_mutex::wait_lock before scheduling
+
+From: Roland Xu <mu001999@outlook.com>
+
+commit d33d26036a0274b472299d7dcdaa5fb34329f91b upstream.
+
+rt_mutex_handle_deadlock() is called with rt_mutex::wait_lock held.  In the
+good case it returns with the lock held and in the deadlock case it emits a
+warning and goes into an endless scheduling loop with the lock held, which
+triggers the 'scheduling in atomic' warning.
+
+Unlock rt_mutex::wait_lock in the dead lock case before issuing the warning
+and dropping into the schedule for ever loop.
+
+[ tglx: Moved unlock before the WARN(), removed the pointless comment,
+       massaged changelog, added Fixes tag ]
+
+Fixes: 3d5c9340d194 ("rtmutex: Handle deadlock detection smarter")
+Signed-off-by: Roland Xu <mu001999@outlook.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/ME0P300MB063599BEF0743B8FA339C2CECC802@ME0P300MB0635.AUSP300.PROD.OUTLOOK.COM
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/locking/rtmutex.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1624,6 +1624,7 @@ static int __sched rt_mutex_slowlock_blo
+ }
+ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
++                                           struct rt_mutex_base *lock,
+                                            struct rt_mutex_waiter *w)
+ {
+       /*
+@@ -1636,10 +1637,10 @@ static void __sched rt_mutex_handle_dead
+       if (build_ww_mutex() && w->ww_ctx)
+               return;
+-      /*
+-       * Yell loudly and stop the task right here.
+-       */
++      raw_spin_unlock_irq(&lock->wait_lock);
++
+       WARN(1, "rtmutex deadlock detected\n");
++
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule();
+@@ -1693,7 +1694,7 @@ static int __sched __rt_mutex_slowlock(s
+       } else {
+               __set_current_state(TASK_RUNNING);
+               remove_waiter(lock, waiter);
+-              rt_mutex_handle_deadlock(ret, chwalk, waiter);
++              rt_mutex_handle_deadlock(ret, chwalk, lock, waiter);
+       }
+       /*
index e8a0497f114065a639aad48218430789a0a55eaa..f919ab861377226adc617fb158c0c3ebd0a1d73e 100644 (file)
@@ -6,3 +6,11 @@ kvm-svm-don-t-advertise-bus-lock-detect-to-guest-if-svm-support-is-missing.patch
 alsa-hda-conexant-add-pincfg-quirk-to-enable-top-speakers-on-sirius-devices.patch
 alsa-hda-realtek-add-patch-for-internal-mic-in-lenovo-v145.patch
 alsa-hda-realtek-support-mute-led-on-hp-laptop-14-dq2xxx.patch
+ksmbd-unset-the-binding-mark-of-a-reused-connection.patch
+ksmbd-unlock-on-in-ksmbd_tcp_set_interfaces.patch
+ata-libata-fix-memory-leak-for-error-path-in-ata_host_alloc.patch
+x86-tdx-fix-data-leak-in-mmio_read.patch
+perf-x86-intel-limit-the-period-on-haswell.patch
+irqchip-gic-v2m-fix-refcount-leak-in-gicv2m_of_init.patch
+x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch
+rtmutex-drop-rt_mutex-wait_lock-before-scheduling.patch
diff --git a/queue-6.1/x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch b/queue-6.1/x86-kaslr-expose-and-use-the-end-of-the-physical-memory-address-space.patch
new file mode 100644 (file)
index 0000000..11b272a
--- /dev/null
@@ -0,0 +1,226 @@
+From ea72ce5da22806d5713f3ffb39a6d5ae73841f93 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 14 Aug 2024 00:29:36 +0200
+Subject: x86/kaslr: Expose and use the end of the physical memory address space
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit ea72ce5da22806d5713f3ffb39a6d5ae73841f93 upstream.
+
+iounmap() on x86 occasionally fails to unmap because the provided valid
+ioremap address is not below high_memory. It turned out that this
+happens due to KASLR.
+
+KASLR uses the full address space between PAGE_OFFSET and vaddr_end to
+randomize the starting points of the direct map, vmalloc and vmemmap
+regions.  It thereby limits the size of the direct map by using the
+installed memory size plus an extra configurable margin for hot-plug
+memory.  This limitation is done to gain more randomization space
+because otherwise only the holes between the direct map, vmalloc,
+vmemmap and vaddr_end would be usable for randomizing.
+
+The limited direct map size is not exposed to the rest of the kernel, so
+the memory hot-plug and resource management related code paths still
+operate under the assumption that the available address space can be
+determined with MAX_PHYSMEM_BITS.
+
+request_free_mem_region() allocates from (1 << MAX_PHYSMEM_BITS) - 1
+downwards.  That means the first allocation happens past the end of the
+direct map and if unlucky this address is in the vmalloc space, which
+causes high_memory to become greater than VMALLOC_START and consequently
+causes iounmap() to fail for valid ioremap addresses.
+
+MAX_PHYSMEM_BITS cannot be changed for that because the randomization
+does not align with address bit boundaries and there are other places
+which actually require to know the maximum number of address bits.  All
+remaining usage sites of MAX_PHYSMEM_BITS have been analyzed and found
+to be correct.
+
+Cure this by exposing the end of the direct map via PHYSMEM_END and use
+that for the memory hot-plug and resource management related places
+instead of relying on MAX_PHYSMEM_BITS. In the KASLR case PHYSMEM_END
+maps to a variable which is initialized by the KASLR initialization and
+otherwise it is based on MAX_PHYSMEM_BITS as before.
+
+To prevent future hickups add a check into add_pages() to catch callers
+trying to add memory above PHYSMEM_END.
+
+Fixes: 0483e1fa6e09 ("x86/mm: Implement ASLR for kernel memory regions")
+Reported-by: Max Ramanouski <max8rr8@gmail.com>
+Reported-by: Alistair Popple <apopple@nvidia.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-By: Max Ramanouski <max8rr8@gmail.com>
+Tested-by: Alistair Popple <apopple@nvidia.com>
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Reviewed-by: Alistair Popple <apopple@nvidia.com>
+Reviewed-by: Kees Cook <kees@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/87ed6soy3z.ffs@tglx
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/page_64.h          |    1 +
+ arch/x86/include/asm/pgtable_64_types.h |    4 ++++
+ arch/x86/mm/init_64.c                   |    4 ++++
+ arch/x86/mm/kaslr.c                     |   32 ++++++++++++++++++++++++++------
+ include/linux/mm.h                      |    4 ++++
+ kernel/resource.c                       |    6 ++----
+ mm/memory_hotplug.c                     |    2 +-
+ mm/sparse.c                             |    2 +-
+ 8 files changed, 43 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/include/asm/page_64.h
++++ b/arch/x86/include/asm/page_64.h
+@@ -17,6 +17,7 @@ extern unsigned long phys_base;
+ extern unsigned long page_offset_base;
+ extern unsigned long vmalloc_base;
+ extern unsigned long vmemmap_base;
++extern unsigned long physmem_end;
+ static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
+ {
+--- a/arch/x86/include/asm/pgtable_64_types.h
++++ b/arch/x86/include/asm/pgtable_64_types.h
+@@ -139,6 +139,10 @@ extern unsigned int ptrs_per_p4d;
+ # define VMEMMAP_START                __VMEMMAP_BASE_L4
+ #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
++#ifdef CONFIG_RANDOMIZE_MEMORY
++# define PHYSMEM_END          physmem_end
++#endif
++
+ /*
+  * End of the region for which vmalloc page tables are pre-allocated.
+  * For non-KMSAN builds, this is the same as VMALLOC_END.
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -950,8 +950,12 @@ static void update_end_of_memory_vars(u6
+ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+             struct mhp_params *params)
+ {
++      unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1;
+       int ret;
++      if (WARN_ON_ONCE(end > PHYSMEM_END))
++              return -ERANGE;
++
+       ret = __add_pages(nid, start_pfn, nr_pages, params);
+       WARN_ON_ONCE(ret);
+--- a/arch/x86/mm/kaslr.c
++++ b/arch/x86/mm/kaslr.c
+@@ -47,13 +47,24 @@ static const unsigned long vaddr_end = C
+  */
+ static __initdata struct kaslr_memory_region {
+       unsigned long *base;
++      unsigned long *end;
+       unsigned long size_tb;
+ } kaslr_regions[] = {
+-      { &page_offset_base, 0 },
+-      { &vmalloc_base, 0 },
+-      { &vmemmap_base, 0 },
++      {
++              .base   = &page_offset_base,
++              .end    = &physmem_end,
++      },
++      {
++              .base   = &vmalloc_base,
++      },
++      {
++              .base   = &vmemmap_base,
++      },
+ };
++/* The end of the possible address space for physical memory */
++unsigned long physmem_end __ro_after_init;
++
+ /* Get size in bytes used by the memory region */
+ static inline unsigned long get_padding(struct kaslr_memory_region *region)
+ {
+@@ -82,6 +93,8 @@ void __init kernel_randomize_memory(void
+       BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
+       BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
++      /* Preset the end of the possible address space for physical memory */
++      physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1);
+       if (!kaslr_memory_enabled())
+               return;
+@@ -128,11 +141,18 @@ void __init kernel_randomize_memory(void
+               vaddr += entropy;
+               *kaslr_regions[i].base = vaddr;
++              /* Calculate the end of the region */
++              vaddr += get_padding(&kaslr_regions[i]);
+               /*
+-               * Jump the region and add a minimum padding based on
+-               * randomization alignment.
++               * KASLR trims the maximum possible size of the
++               * direct-map. Update the physmem_end boundary.
++               * No rounding required as the region starts
++               * PUD aligned and size is in units of TB.
+                */
+-              vaddr += get_padding(&kaslr_regions[i]);
++              if (kaslr_regions[i].end)
++                      *kaslr_regions[i].end = __pa_nodebug(vaddr - 1);
++
++              /* Add a minimum padding based on randomization alignment. */
+               vaddr = round_up(vaddr + 1, PUD_SIZE);
+               remain_entropy -= entropy;
+       }
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -92,6 +92,10 @@ extern const int mmap_rnd_compat_bits_ma
+ extern int mmap_rnd_compat_bits __read_mostly;
+ #endif
++#ifndef PHYSMEM_END
++# define PHYSMEM_END  ((1ULL << MAX_PHYSMEM_BITS) - 1)
++#endif
++
+ #include <asm/page.h>
+ #include <asm/processor.h>
+--- a/kernel/resource.c
++++ b/kernel/resource.c
+@@ -1781,8 +1781,7 @@ static resource_size_t gfr_start(struct
+       if (flags & GFR_DESCENDING) {
+               resource_size_t end;
+-              end = min_t(resource_size_t, base->end,
+-                          (1ULL << MAX_PHYSMEM_BITS) - 1);
++              end = min_t(resource_size_t, base->end, PHYSMEM_END);
+               return end - size + 1;
+       }
+@@ -1799,8 +1798,7 @@ static bool gfr_continue(struct resource
+        * @size did not wrap 0.
+        */
+       return addr > addr - size &&
+-             addr <= min_t(resource_size_t, base->end,
+-                           (1ULL << MAX_PHYSMEM_BITS) - 1);
++             addr <= min_t(resource_size_t, base->end, PHYSMEM_END);
+ }
+ static resource_size_t gfr_next(resource_size_t addr, resource_size_t size,
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1530,7 +1530,7 @@ struct range __weak arch_get_mappable_ra
+ struct range mhp_get_pluggable_range(bool need_mapping)
+ {
+-      const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1;
++      const u64 max_phys = PHYSMEM_END;
+       struct range mhp_range;
+       if (need_mapping) {
+--- a/mm/sparse.c
++++ b/mm/sparse.c
+@@ -129,7 +129,7 @@ static inline int sparse_early_nid(struc
+ static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
+                                               unsigned long *end_pfn)
+ {
+-      unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
++      unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT;
+       /*
+        * Sanity checks - do not allow an architecture to pass
diff --git a/queue-6.1/x86-tdx-fix-data-leak-in-mmio_read.patch b/queue-6.1/x86-tdx-fix-data-leak-in-mmio_read.patch
new file mode 100644 (file)
index 0000000..9dd43b8
--- /dev/null
@@ -0,0 +1,43 @@
+From b6fb565a2d15277896583d471b21bc14a0c99661 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Mon, 26 Aug 2024 15:53:04 +0300
+Subject: x86/tdx: Fix data leak in mmio_read()
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit b6fb565a2d15277896583d471b21bc14a0c99661 upstream.
+
+The mmio_read() function makes a TDVMCALL to retrieve MMIO data for an
+address from the VMM.
+
+Sean noticed that mmio_read() unintentionally exposes the value of an
+initialized variable (val) on the stack to the VMM.
+
+This variable is only needed as an output value. It did not need to be
+passed to the VMM in the first place.
+
+Do not send the original value of *val to the VMM.
+
+[ dhansen: clarify what 'val' is used for. ]
+
+Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO")
+Reported-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc:stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20240826125304.1566719-1-kirill.shutemov%40linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/coco/tdx/tdx.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/x86/coco/tdx/tdx.c
++++ b/arch/x86/coco/tdx/tdx.c
+@@ -328,7 +328,6 @@ static bool mmio_read(int size, unsigned
+               .r12 = size,
+               .r13 = EPT_READ,
+               .r14 = addr,
+-              .r15 = *val,
+       };
+       if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))