]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
7.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 3 May 2026 12:23:33 +0000 (14:23 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 3 May 2026 12:23:33 +0000 (14:23 +0200)
added patches:
arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch
crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch
crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch
crypto-talitos-rename-first-last-to-first_desc-last_desc.patch
firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch
firmware-google-framebuffer-do-not-unregister-platform-device.patch
hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch
kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch
kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch
kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch
kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch
kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch
kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch
kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch
kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch
kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch
kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch
kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch
kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch
kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch
kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch
kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch
kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch
kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch
kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch
kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch
kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch
kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch
kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch
kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch
kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch
kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch
kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch
kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch
kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch
loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch
mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch
mm-damon-core-disallow-time-quota-setting-zero-esz.patch
mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch
mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch
mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch
mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch
mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch
mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch
mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch
mmc-block-use-single-block-write-in-retry.patch
mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch
pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch
randomize_kstack-maintain-kstack_offset-per-task.patch
rtc-ntxec-fix-of-node-reference-imbalance.patch
tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch
tpm-tpm_tis-add-error-logging-for-data-transfer.patch
tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch
tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch
tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch
userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch
xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch
xfs-start-gc-on-zonegc_low_space-attribute-updates.patch

59 files changed:
queue-7.0/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch [new file with mode: 0644]
queue-7.0/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch [new file with mode: 0644]
queue-7.0/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch [new file with mode: 0644]
queue-7.0/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch [new file with mode: 0644]
queue-7.0/firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch [new file with mode: 0644]
queue-7.0/firmware-google-framebuffer-do-not-unregister-platform-device.patch [new file with mode: 0644]
queue-7.0/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch [new file with mode: 0644]
queue-7.0/kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch [new file with mode: 0644]
queue-7.0/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch [new file with mode: 0644]
queue-7.0/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch [new file with mode: 0644]
queue-7.0/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch [new file with mode: 0644]
queue-7.0/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch [new file with mode: 0644]
queue-7.0/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch [new file with mode: 0644]
queue-7.0/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch [new file with mode: 0644]
queue-7.0/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch [new file with mode: 0644]
queue-7.0/mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch [new file with mode: 0644]
queue-7.0/mm-damon-core-disallow-time-quota-setting-zero-esz.patch [new file with mode: 0644]
queue-7.0/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch [new file with mode: 0644]
queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch [new file with mode: 0644]
queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch [new file with mode: 0644]
queue-7.0/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch [new file with mode: 0644]
queue-7.0/mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch [new file with mode: 0644]
queue-7.0/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch [new file with mode: 0644]
queue-7.0/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch [new file with mode: 0644]
queue-7.0/mmc-block-use-single-block-write-in-retry.patch [new file with mode: 0644]
queue-7.0/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch [new file with mode: 0644]
queue-7.0/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch [new file with mode: 0644]
queue-7.0/randomize_kstack-maintain-kstack_offset-per-task.patch [new file with mode: 0644]
queue-7.0/rtc-ntxec-fix-of-node-reference-imbalance.patch [new file with mode: 0644]
queue-7.0/series
queue-7.0/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch [new file with mode: 0644]
queue-7.0/tpm-tpm_tis-add-error-logging-for-data-transfer.patch [new file with mode: 0644]
queue-7.0/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch [new file with mode: 0644]
queue-7.0/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch [new file with mode: 0644]
queue-7.0/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch [new file with mode: 0644]
queue-7.0/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch [new file with mode: 0644]
queue-7.0/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch [new file with mode: 0644]
queue-7.0/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch [new file with mode: 0644]

diff --git a/queue-7.0/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch b/queue-7.0/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch
new file mode 100644 (file)
index 0000000..409e5e1
--- /dev/null
@@ -0,0 +1,57 @@
+From d5325810814ee995debfa0b6c4a22e0391598bef Mon Sep 17 00:00:00 2001
+From: Francesco Dolcini <francesco.dolcini@toradex.com>
+Date: Fri, 20 Mar 2026 08:30:30 +0100
+Subject: arm64: dts: ti: am62-verdin: Enable pullup for eMMC data pins
+
+From: Francesco Dolcini <francesco.dolcini@toradex.com>
+
+commit d5325810814ee995debfa0b6c4a22e0391598bef upstream.
+
+Verdin AM62 board does not have external pullups on eMMC DAT1-DAT7 pins.
+Enable internal pullups on DAT1-DAT7 considering:
+
+ - without a host-side pullup, these lines rely solely on the eMMC
+   device's internal pullup (R_int, 10kohm-150kohm per JEDEC), which may
+   exceed the recommended 50kohm max for 1.8V VCCQ
+ - JEDEC JESD84-B51 Table 200 requires host-side pullups (R_DAT,
+   10kohm-100kohm) on all data lines to prevent bus floating
+
+Fixes: 316b80246b16 ("arm64: dts: ti: add verdin am62")
+Cc: stable@vger.kernel.org
+Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
+Link: https://patch.msgid.link/20260320073032.10427-1-francesco@dolcini.it
+Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi |   20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
+@@ -572,16 +572,16 @@
+       /* On-module eMMC */
+       pinctrl_sdhci0: main-mmc0-default-pins {
+               pinctrl-single,pins = <
+-                      AM62X_IOPAD(0x220, PIN_INPUT, 0) /*  (Y3) MMC0_CMD  */
+-                      AM62X_IOPAD(0x218, PIN_INPUT, 0) /* (AB1) MMC0_CLK  */
+-                      AM62X_IOPAD(0x214, PIN_INPUT, 0) /* (AA2) MMC0_DAT0 */
+-                      AM62X_IOPAD(0x210, PIN_INPUT, 0) /* (AA1) MMC0_DAT1 */
+-                      AM62X_IOPAD(0x20c, PIN_INPUT, 0) /* (AA3) MMC0_DAT2 */
+-                      AM62X_IOPAD(0x208, PIN_INPUT, 0) /*  (Y4) MMC0_DAT3 */
+-                      AM62X_IOPAD(0x204, PIN_INPUT, 0) /* (AB2) MMC0_DAT4 */
+-                      AM62X_IOPAD(0x200, PIN_INPUT, 0) /* (AC1) MMC0_DAT5 */
+-                      AM62X_IOPAD(0x1fc, PIN_INPUT, 0) /* (AD2) MMC0_DAT6 */
+-                      AM62X_IOPAD(0x1f8, PIN_INPUT, 0) /* (AC2) MMC0_DAT7 */
++                      AM62X_IOPAD(0x220, PIN_INPUT,        0) /*  (Y3) MMC0_CMD  */
++                      AM62X_IOPAD(0x218, PIN_INPUT,        0) /* (AB1) MMC0_CLK  */
++                      AM62X_IOPAD(0x214, PIN_INPUT,        0) /* (AA2) MMC0_DAT0 */
++                      AM62X_IOPAD(0x210, PIN_INPUT_PULLUP, 0) /* (AA1) MMC0_DAT1 */
++                      AM62X_IOPAD(0x20c, PIN_INPUT_PULLUP, 0) /* (AA3) MMC0_DAT2 */
++                      AM62X_IOPAD(0x208, PIN_INPUT_PULLUP, 0) /*  (Y4) MMC0_DAT3 */
++                      AM62X_IOPAD(0x204, PIN_INPUT_PULLUP, 0) /* (AB2) MMC0_DAT4 */
++                      AM62X_IOPAD(0x200, PIN_INPUT_PULLUP, 0) /* (AC1) MMC0_DAT5 */
++                      AM62X_IOPAD(0x1fc, PIN_INPUT_PULLUP, 0) /* (AD2) MMC0_DAT6 */
++                      AM62X_IOPAD(0x1f8, PIN_INPUT_PULLUP, 0) /* (AC2) MMC0_DAT7 */
+               >;
+       };
diff --git a/queue-7.0/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch b/queue-7.0/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch
new file mode 100644 (file)
index 0000000..bc3570e
--- /dev/null
@@ -0,0 +1,55 @@
+From 95aed2af87ec43fa7624cc81dd13d37824ad4972 Mon Sep 17 00:00:00 2001
+From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Date: Wed, 1 Apr 2026 10:31:11 +0100
+Subject: crypto: qat - fix IRQ cleanup on 6xxx probe failure
+
+From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+
+commit 95aed2af87ec43fa7624cc81dd13d37824ad4972 upstream.
+
+When adf_dev_up() partially completes and then fails, the IRQ
+handlers registered during adf_isr_resource_alloc() are not detached
+before the MSI-X vectors are released.
+
+Since the device is enabled with pcim_enable_device(), calling
+pci_alloc_irq_vectors() internally registers pcim_msi_release() as a
+devres action. On probe failure, devres runs pcim_msi_release() which
+calls pci_free_irq_vectors(), tearing down the MSI-X vectors while IRQ
+handlers (for example 'qat0-bundle0') are still attached. This causes
+remove_proc_entry() warnings:
+
+    [   22.163964] remove_proc_entry: removing non-empty directory 'irq/143', leaking at least 'qat0-bundle0'
+
+Moving the devm_add_action_or_reset() before adf_dev_up() does not solve
+the problem since devres runs in LIFO order and pcim_msi_release(),
+registered later inside adf_dev_up(), would still fire before
+adf_device_down().
+
+Fix by calling adf_dev_down() explicitly when adf_dev_up() fails, to
+properly free IRQ handlers before devres releases the MSI-X vectors.
+
+Fixes: 17fd7514ae68 ("crypto: qat - add qat_6xxx driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Reviewed-by: Ahsan Atta <ahsan.atta@intel.com>
+Reviewed-by: Laurent M Coquerel <laurent.m.coquerel@intel.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/intel/qat/qat_6xxx/adf_drv.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c
++++ b/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c
+@@ -182,8 +182,10 @@ static int adf_probe(struct pci_dev *pde
+               return ret;
+       ret = adf_dev_up(accel_dev, true);
+-      if (ret)
++      if (ret) {
++              adf_dev_down(accel_dev);
+               return ret;
++      }
+       ret = devm_add_action_or_reset(dev, adf_device_down, accel_dev);
+       if (ret)
diff --git a/queue-7.0/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch b/queue-7.0/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch
new file mode 100644 (file)
index 0000000..56a906b
--- /dev/null
@@ -0,0 +1,357 @@
+From 655ef638a2bc3cd0a9eff99a02f83cab94a3a917 Mon Sep 17 00:00:00 2001
+From: Paul Louvel <paul.louvel@bootlin.com>
+Date: Mon, 30 Mar 2026 12:28:18 +0200
+Subject: crypto: talitos - fix SEC1 32k ahash request limitation
+
+From: Paul Louvel <paul.louvel@bootlin.com>
+
+commit 655ef638a2bc3cd0a9eff99a02f83cab94a3a917 upstream.
+
+Since commit c662b043cdca ("crypto: af_alg/hash: Support
+MSG_SPLICE_PAGES"), the crypto core may pass large scatterlists spanning
+multiple pages to drivers supporting ahash operations. As a result, a
+driver can now receive large ahash requests.
+
+The SEC1 engine has a limitation where a single descriptor cannot
+process more than 32k of data. The current implementation attempts to
+handle the entire request within a single descriptor, which leads to
+failures raised by the driver:
+
+  "length exceeds h/w max limit"
+
+Address this limitation by splitting large ahash requests into multiple
+descriptors, each respecting the 32k hardware limit. This allows
+processing arbitrarily large requests.
+
+Cc: stable@vger.kernel.org
+Fixes: c662b043cdca ("crypto: af_alg/hash: Support MSG_SPLICE_PAGES")
+Signed-off-by: Paul Louvel <paul.louvel@bootlin.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/talitos.c |  216 +++++++++++++++++++++++++++++++----------------
+ 1 file changed, 147 insertions(+), 69 deletions(-)
+
+--- a/drivers/crypto/talitos.c
++++ b/drivers/crypto/talitos.c
+@@ -12,6 +12,7 @@
+  * All rights reserved.
+  */
++#include <linux/workqueue.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/mod_devicetable.h>
+@@ -870,10 +871,18 @@ struct talitos_ahash_req_ctx {
+       unsigned int swinit;
+       unsigned int first;
+       unsigned int last;
++      unsigned int last_request;
+       unsigned int to_hash_later;
+       unsigned int nbuf;
+       struct scatterlist bufsl[2];
+       struct scatterlist *psrc;
++
++      struct scatterlist request_bufsl[2];
++      struct ahash_request *areq;
++      struct scatterlist *request_sl;
++      unsigned int remaining_ahash_request_bytes;
++      unsigned int current_ahash_request_bytes;
++      struct work_struct sec1_ahash_process_remaining;
+ };
+ struct talitos_export_state {
+@@ -1759,7 +1768,20 @@ static void ahash_done(struct device *de
+       kfree(edesc);
+-      ahash_request_complete(areq, err);
++      if (err) {
++              ahash_request_complete(areq, err);
++              return;
++      }
++
++      req_ctx->remaining_ahash_request_bytes -=
++              req_ctx->current_ahash_request_bytes;
++
++      if (!req_ctx->remaining_ahash_request_bytes) {
++              ahash_request_complete(areq, 0);
++              return;
++      }
++
++      schedule_work(&req_ctx->sec1_ahash_process_remaining);
+ }
+ /*
+@@ -1925,60 +1947,7 @@ static struct talitos_edesc *ahash_edesc
+                                  nbytes, 0, 0, 0, areq->base.flags, false);
+ }
+-static int ahash_init(struct ahash_request *areq)
+-{
+-      struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+-      struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+-      struct device *dev = ctx->dev;
+-      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+-      unsigned int size;
+-      dma_addr_t dma;
+-
+-      /* Initialize the context */
+-      req_ctx->buf_idx = 0;
+-      req_ctx->nbuf = 0;
+-      req_ctx->first = 1; /* first indicates h/w must init its context */
+-      req_ctx->swinit = 0; /* assume h/w init of context */
+-      size =  (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
+-                      ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
+-                      : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
+-      req_ctx->hw_context_size = size;
+-
+-      dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
+-                           DMA_TO_DEVICE);
+-      dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE);
+-
+-      return 0;
+-}
+-
+-/*
+- * on h/w without explicit sha224 support, we initialize h/w context
+- * manually with sha224 constants, and tell it to run sha256.
+- */
+-static int ahash_init_sha224_swinit(struct ahash_request *areq)
+-{
+-      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+-
+-      req_ctx->hw_context[0] = SHA224_H0;
+-      req_ctx->hw_context[1] = SHA224_H1;
+-      req_ctx->hw_context[2] = SHA224_H2;
+-      req_ctx->hw_context[3] = SHA224_H3;
+-      req_ctx->hw_context[4] = SHA224_H4;
+-      req_ctx->hw_context[5] = SHA224_H5;
+-      req_ctx->hw_context[6] = SHA224_H6;
+-      req_ctx->hw_context[7] = SHA224_H7;
+-
+-      /* init 64-bit count */
+-      req_ctx->hw_context[8] = 0;
+-      req_ctx->hw_context[9] = 0;
+-
+-      ahash_init(areq);
+-      req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
+-
+-      return 0;
+-}
+-
+-static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
++static int ahash_process_req_one(struct ahash_request *areq, unsigned int nbytes)
+ {
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+@@ -1997,12 +1966,12 @@ static int ahash_process_req(struct ahas
+       if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
+               /* Buffer up to one whole block */
+-              nents = sg_nents_for_len(areq->src, nbytes);
++              nents = sg_nents_for_len(req_ctx->request_sl, nbytes);
+               if (nents < 0) {
+                       dev_err(dev, "Invalid number of src SG.\n");
+                       return nents;
+               }
+-              sg_copy_to_buffer(areq->src, nents,
++              sg_copy_to_buffer(req_ctx->request_sl, nents,
+                                 ctx_buf + req_ctx->nbuf, nbytes);
+               req_ctx->nbuf += nbytes;
+               return 0;
+@@ -2029,7 +1998,7 @@ static int ahash_process_req(struct ahas
+               sg_init_table(req_ctx->bufsl, nsg);
+               sg_set_buf(req_ctx->bufsl, ctx_buf, req_ctx->nbuf);
+               if (nsg > 1)
+-                      sg_chain(req_ctx->bufsl, 2, areq->src);
++                      sg_chain(req_ctx->bufsl, 2, req_ctx->request_sl);
+               req_ctx->psrc = req_ctx->bufsl;
+       } else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) {
+               int offset;
+@@ -2038,26 +2007,26 @@ static int ahash_process_req(struct ahas
+                       offset = blocksize - req_ctx->nbuf;
+               else
+                       offset = nbytes_to_hash - req_ctx->nbuf;
+-              nents = sg_nents_for_len(areq->src, offset);
++              nents = sg_nents_for_len(req_ctx->request_sl, offset);
+               if (nents < 0) {
+                       dev_err(dev, "Invalid number of src SG.\n");
+                       return nents;
+               }
+-              sg_copy_to_buffer(areq->src, nents,
++              sg_copy_to_buffer(req_ctx->request_sl, nents,
+                                 ctx_buf + req_ctx->nbuf, offset);
+               req_ctx->nbuf += offset;
+-              req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, areq->src,
++              req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, req_ctx->request_sl,
+                                                offset);
+       } else
+-              req_ctx->psrc = areq->src;
++              req_ctx->psrc = req_ctx->request_sl;
+       if (to_hash_later) {
+-              nents = sg_nents_for_len(areq->src, nbytes);
++              nents = sg_nents_for_len(req_ctx->request_sl, nbytes);
+               if (nents < 0) {
+                       dev_err(dev, "Invalid number of src SG.\n");
+                       return nents;
+               }
+-              sg_pcopy_to_buffer(areq->src, nents,
++              sg_pcopy_to_buffer(req_ctx->request_sl, nents,
+                                  req_ctx->buf[(req_ctx->buf_idx + 1) & 1],
+                                     to_hash_later,
+                                     nbytes - to_hash_later);
+@@ -2065,7 +2034,7 @@ static int ahash_process_req(struct ahas
+       req_ctx->to_hash_later = to_hash_later;
+       /* Allocate extended descriptor */
+-      edesc = ahash_edesc_alloc(areq, nbytes_to_hash);
++      edesc = ahash_edesc_alloc(req_ctx->areq, nbytes_to_hash);
+       if (IS_ERR(edesc))
+               return PTR_ERR(edesc);
+@@ -2087,14 +2056,123 @@ static int ahash_process_req(struct ahas
+       if (ctx->keylen && (req_ctx->first || req_ctx->last))
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
+-      return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, ahash_done);
++      return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done);
+ }
+-static int ahash_update(struct ahash_request *areq)
++static void sec1_ahash_process_remaining(struct work_struct *work)
+ {
++      struct talitos_ahash_req_ctx *req_ctx =
++              container_of(work, struct talitos_ahash_req_ctx,
++                           sec1_ahash_process_remaining);
++      int err = 0;
++
++      req_ctx->request_sl = scatterwalk_ffwd(req_ctx->request_bufsl,
++                                             req_ctx->request_sl, TALITOS1_MAX_DATA_LEN);
++
++      if (req_ctx->remaining_ahash_request_bytes > TALITOS1_MAX_DATA_LEN)
++              req_ctx->current_ahash_request_bytes = TALITOS1_MAX_DATA_LEN;
++      else {
++              req_ctx->current_ahash_request_bytes =
++                      req_ctx->remaining_ahash_request_bytes;
++
++              if (req_ctx->last_request)
++                      req_ctx->last = 1;
++      }
++
++      err = ahash_process_req_one(req_ctx->areq,
++                                  req_ctx->current_ahash_request_bytes);
++
++      if (err != -EINPROGRESS)
++              ahash_request_complete(req_ctx->areq, err);
++}
++
++static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
++{
++      struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++      struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
++      struct device *dev = ctx->dev;
++      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++      struct talitos_private *priv = dev_get_drvdata(dev);
++      bool is_sec1 = has_ftr_sec1(priv);
++
++      req_ctx->areq = areq;
++      req_ctx->request_sl = areq->src;
++      req_ctx->remaining_ahash_request_bytes = nbytes;
++
++      if (is_sec1) {
++              if (nbytes > TALITOS1_MAX_DATA_LEN)
++                      nbytes = TALITOS1_MAX_DATA_LEN;
++              else if (req_ctx->last_request)
++                      req_ctx->last = 1;
++      }
++
++      req_ctx->current_ahash_request_bytes = nbytes;
++
++      return ahash_process_req_one(req_ctx->areq,
++                                   req_ctx->current_ahash_request_bytes);
++}
++
++static int ahash_init(struct ahash_request *areq)
++{
++      struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++      struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
++      struct device *dev = ctx->dev;
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++      unsigned int size;
++      dma_addr_t dma;
++      /* Initialize the context */
++      req_ctx->buf_idx = 0;
++      req_ctx->nbuf = 0;
++      req_ctx->first = 1; /* first indicates h/w must init its context */
++      req_ctx->swinit = 0; /* assume h/w init of context */
++      size =  (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
++                      ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
++                      : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
++      req_ctx->hw_context_size = size;
++      req_ctx->last_request = 0;
+       req_ctx->last = 0;
++      INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining);
++
++      dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
++                           DMA_TO_DEVICE);
++      dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE);
++
++      return 0;
++}
++
++/*
++ * on h/w without explicit sha224 support, we initialize h/w context
++ * manually with sha224 constants, and tell it to run sha256.
++ */
++static int ahash_init_sha224_swinit(struct ahash_request *areq)
++{
++      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++
++      req_ctx->hw_context[0] = SHA224_H0;
++      req_ctx->hw_context[1] = SHA224_H1;
++      req_ctx->hw_context[2] = SHA224_H2;
++      req_ctx->hw_context[3] = SHA224_H3;
++      req_ctx->hw_context[4] = SHA224_H4;
++      req_ctx->hw_context[5] = SHA224_H5;
++      req_ctx->hw_context[6] = SHA224_H6;
++      req_ctx->hw_context[7] = SHA224_H7;
++
++      /* init 64-bit count */
++      req_ctx->hw_context[8] = 0;
++      req_ctx->hw_context[9] = 0;
++
++      ahash_init(areq);
++      req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
++
++      return 0;
++}
++
++static int ahash_update(struct ahash_request *areq)
++{
++      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++
++      req_ctx->last_request = 0;
+       return ahash_process_req(areq, areq->nbytes);
+ }
+@@ -2103,7 +2181,7 @@ static int ahash_final(struct ahash_requ
+ {
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+-      req_ctx->last = 1;
++      req_ctx->last_request = 1;
+       return ahash_process_req(areq, 0);
+ }
+@@ -2112,7 +2190,7 @@ static int ahash_finup(struct ahash_requ
+ {
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+-      req_ctx->last = 1;
++      req_ctx->last_request = 1;
+       return ahash_process_req(areq, areq->nbytes);
+ }
diff --git a/queue-7.0/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch b/queue-7.0/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch
new file mode 100644 (file)
index 0000000..7d26de3
--- /dev/null
@@ -0,0 +1,199 @@
+From a1b80018b8cec27fc06a8b04a7f8b5f6cfe86eae Mon Sep 17 00:00:00 2001
+From: Paul Louvel <paul.louvel@bootlin.com>
+Date: Mon, 30 Mar 2026 12:28:19 +0200
+Subject: crypto: talitos - rename first/last to first_desc/last_desc
+
+From: Paul Louvel <paul.louvel@bootlin.com>
+
+commit a1b80018b8cec27fc06a8b04a7f8b5f6cfe86eae upstream.
+
+Previous commit introduces a new last_request variable in the context
+structure.
+
+Renaming the first/last existing member variable in the context
+structure to improve readability.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paul Louvel <paul.louvel@bootlin.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/talitos.c |   46 +++++++++++++++++++++++-----------------------
+ 1 file changed, 23 insertions(+), 23 deletions(-)
+
+--- a/drivers/crypto/talitos.c
++++ b/drivers/crypto/talitos.c
+@@ -869,8 +869,8 @@ struct talitos_ahash_req_ctx {
+       u8 buf[2][HASH_MAX_BLOCK_SIZE];
+       int buf_idx;
+       unsigned int swinit;
+-      unsigned int first;
+-      unsigned int last;
++      unsigned int first_desc;
++      unsigned int last_desc;
+       unsigned int last_request;
+       unsigned int to_hash_later;
+       unsigned int nbuf;
+@@ -889,8 +889,8 @@ struct talitos_export_state {
+       u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
+       u8 buf[HASH_MAX_BLOCK_SIZE];
+       unsigned int swinit;
+-      unsigned int first;
+-      unsigned int last;
++      unsigned int first_desc;
++      unsigned int last_desc;
+       unsigned int to_hash_later;
+       unsigned int nbuf;
+ };
+@@ -1722,7 +1722,7 @@ static void common_nonsnoop_hash_unmap(s
+       if (desc->next_desc &&
+           desc->ptr[5].ptr != desc2->ptr[5].ptr)
+               unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
+-      if (req_ctx->last)
++      if (req_ctx->last_desc)
+               memcpy(areq->result, req_ctx->hw_context,
+                      crypto_ahash_digestsize(tfm));
+@@ -1759,7 +1759,7 @@ static void ahash_done(struct device *de
+                container_of(desc, struct talitos_edesc, desc);
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+-      if (!req_ctx->last && req_ctx->to_hash_later) {
++      if (!req_ctx->last_desc && req_ctx->to_hash_later) {
+               /* Position any partial block for next update/final/finup */
+               req_ctx->buf_idx = (req_ctx->buf_idx + 1) & 1;
+               req_ctx->nbuf = req_ctx->to_hash_later;
+@@ -1825,7 +1825,7 @@ static int common_nonsnoop_hash(struct t
+       /* first DWORD empty */
+       /* hash context in */
+-      if (!req_ctx->first || req_ctx->swinit) {
++      if (!req_ctx->first_desc || req_ctx->swinit) {
+               map_single_talitos_ptr_nosync(dev, &desc->ptr[1],
+                                             req_ctx->hw_context_size,
+                                             req_ctx->hw_context,
+@@ -1833,7 +1833,7 @@ static int common_nonsnoop_hash(struct t
+               req_ctx->swinit = 0;
+       }
+       /* Indicate next op is not the first. */
+-      req_ctx->first = 0;
++      req_ctx->first_desc = 0;
+       /* HMAC key */
+       if (ctx->keylen)
+@@ -1866,7 +1866,7 @@ static int common_nonsnoop_hash(struct t
+       /* fifth DWORD empty */
+       /* hash/HMAC out -or- hash context out */
+-      if (req_ctx->last)
++      if (req_ctx->last_desc)
+               map_single_talitos_ptr(dev, &desc->ptr[5],
+                                      crypto_ahash_digestsize(tfm),
+                                      req_ctx->hw_context, DMA_FROM_DEVICE);
+@@ -1908,7 +1908,7 @@ static int common_nonsnoop_hash(struct t
+               if (sg_count > 1)
+                       sync_needed = true;
+               copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1);
+-              if (req_ctx->last)
++              if (req_ctx->last_desc)
+                       map_single_talitos_ptr_nosync(dev, &desc->ptr[5],
+                                                     req_ctx->hw_context_size,
+                                                     req_ctx->hw_context,
+@@ -1964,7 +1964,7 @@ static int ahash_process_req_one(struct
+       bool is_sec1 = has_ftr_sec1(priv);
+       u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx];
+-      if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
++      if (!req_ctx->last_desc && (nbytes + req_ctx->nbuf <= blocksize)) {
+               /* Buffer up to one whole block */
+               nents = sg_nents_for_len(req_ctx->request_sl, nbytes);
+               if (nents < 0) {
+@@ -1981,7 +1981,7 @@ static int ahash_process_req_one(struct
+       nbytes_to_hash = nbytes + req_ctx->nbuf;
+       to_hash_later = nbytes_to_hash & (blocksize - 1);
+-      if (req_ctx->last)
++      if (req_ctx->last_desc)
+               to_hash_later = 0;
+       else if (to_hash_later)
+               /* There is a partial block. Hash the full block(s) now */
+@@ -2041,19 +2041,19 @@ static int ahash_process_req_one(struct
+       edesc->desc.hdr = ctx->desc_hdr_template;
+       /* On last one, request SEC to pad; otherwise continue */
+-      if (req_ctx->last)
++      if (req_ctx->last_desc)
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_PAD;
+       else
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT;
+       /* request SEC to INIT hash. */
+-      if (req_ctx->first && !req_ctx->swinit)
++      if (req_ctx->first_desc && !req_ctx->swinit)
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT;
+       /* When the tfm context has a keylen, it's an HMAC.
+        * A first or last (ie. not middle) descriptor must request HMAC.
+        */
+-      if (ctx->keylen && (req_ctx->first || req_ctx->last))
++      if (ctx->keylen && (req_ctx->first_desc || req_ctx->last_desc))
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
+       return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done);
+@@ -2076,7 +2076,7 @@ static void sec1_ahash_process_remaining
+                       req_ctx->remaining_ahash_request_bytes;
+               if (req_ctx->last_request)
+-                      req_ctx->last = 1;
++                      req_ctx->last_desc = 1;
+       }
+       err = ahash_process_req_one(req_ctx->areq,
+@@ -2103,7 +2103,7 @@ static int ahash_process_req(struct ahas
+               if (nbytes > TALITOS1_MAX_DATA_LEN)
+                       nbytes = TALITOS1_MAX_DATA_LEN;
+               else if (req_ctx->last_request)
+-                      req_ctx->last = 1;
++                      req_ctx->last_desc = 1;
+       }
+       req_ctx->current_ahash_request_bytes = nbytes;
+@@ -2124,14 +2124,14 @@ static int ahash_init(struct ahash_reque
+       /* Initialize the context */
+       req_ctx->buf_idx = 0;
+       req_ctx->nbuf = 0;
+-      req_ctx->first = 1; /* first indicates h/w must init its context */
++      req_ctx->first_desc = 1; /* first_desc indicates h/w must init its context */
+       req_ctx->swinit = 0; /* assume h/w init of context */
+       size =  (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
+                       ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
+                       : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
+       req_ctx->hw_context_size = size;
+       req_ctx->last_request = 0;
+-      req_ctx->last = 0;
++      req_ctx->last_desc = 0;
+       INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining);
+       dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
+@@ -2224,8 +2224,8 @@ static int ahash_export(struct ahash_req
+              req_ctx->hw_context_size);
+       memcpy(export->buf, req_ctx->buf[req_ctx->buf_idx], req_ctx->nbuf);
+       export->swinit = req_ctx->swinit;
+-      export->first = req_ctx->first;
+-      export->last = req_ctx->last;
++      export->first_desc = req_ctx->first_desc;
++      export->last_desc = req_ctx->last_desc;
+       export->to_hash_later = req_ctx->to_hash_later;
+       export->nbuf = req_ctx->nbuf;
+@@ -2250,8 +2250,8 @@ static int ahash_import(struct ahash_req
+       memcpy(req_ctx->hw_context, export->hw_context, size);
+       memcpy(req_ctx->buf[0], export->buf, export->nbuf);
+       req_ctx->swinit = export->swinit;
+-      req_ctx->first = export->first;
+-      req_ctx->last = export->last;
++      req_ctx->first_desc = export->first_desc;
++      req_ctx->last_desc = export->last_desc;
+       req_ctx->to_hash_later = export->to_hash_later;
+       req_ctx->nbuf = export->nbuf;
diff --git a/queue-7.0/firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch b/queue-7.0/firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch
new file mode 100644 (file)
index 0000000..81e9718
--- /dev/null
@@ -0,0 +1,345 @@
+From a2be37eedb52ea26938fa4cc9de1ff84963c57ad Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
+Date: Tue, 24 Feb 2026 11:42:04 +0100
+Subject: firmware: exynos-acpm: Drop fake 'const' on handle pointer
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
+
+commit a2be37eedb52ea26938fa4cc9de1ff84963c57ad upstream.
+
+All the functions operating on the 'handle' pointer are claiming it is a
+pointer to const thus they should not modify the handle.  In fact that's
+a false statement, because first thing these functions do is drop the
+cast to const with container_of:
+
+  struct acpm_info *acpm = handle_to_acpm_info(handle);
+
+And with such cast the handle is easily writable with simple:
+
+  acpm->handle.ops.pmic_ops.read_reg = NULL;
+
+The code is not correct logically, either, because functions like
+acpm_get_by_node() and acpm_handle_put() are meant to modify the handle
+reference counting, thus they must modify the handle.  Modification here
+happens anyway, even if the reference counting is stored in the
+container which the handle is part of.
+
+The code does not have actual visible bug, but incorrect 'const'
+annotations could lead to incorrect compiler decisions.
+
+Fixes: a88927b534ba ("firmware: add Exynos ACPM protocol driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
+Link: https://patch.msgid.link/20260224104203.42950-2-krzysztof.kozlowski@oss.qualcomm.com
+Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/clk/samsung/clk-acpm.c                        |    4 -
+ drivers/firmware/samsung/exynos-acpm-dvfs.c           |    4 -
+ drivers/firmware/samsung/exynos-acpm-dvfs.h           |    4 -
+ drivers/firmware/samsung/exynos-acpm-pmic.c           |   10 ++--
+ drivers/firmware/samsung/exynos-acpm-pmic.h           |   10 ++--
+ drivers/firmware/samsung/exynos-acpm.c                |   16 ++++---
+ drivers/firmware/samsung/exynos-acpm.h                |    2 
+ drivers/mfd/sec-acpm.c                                |   10 ++--
+ include/linux/firmware/samsung/exynos-acpm-protocol.h |   40 +++++++-----------
+ 9 files changed, 48 insertions(+), 52 deletions(-)
+
+--- a/drivers/clk/samsung/clk-acpm.c
++++ b/drivers/clk/samsung/clk-acpm.c
+@@ -20,7 +20,7 @@ struct acpm_clk {
+       u32 id;
+       struct clk_hw hw;
+       unsigned int mbox_chan_id;
+-      const struct acpm_handle *handle;
++      struct acpm_handle *handle;
+ };
+ struct acpm_clk_variant {
+@@ -113,7 +113,7 @@ static int acpm_clk_register(struct devi
+ static int acpm_clk_probe(struct platform_device *pdev)
+ {
+-      const struct acpm_handle *acpm_handle;
++      struct acpm_handle *acpm_handle;
+       struct clk_hw_onecell_data *clk_data;
+       struct clk_hw **hws;
+       struct device *dev = &pdev->dev;
+--- a/drivers/firmware/samsung/exynos-acpm-dvfs.c
++++ b/drivers/firmware/samsung/exynos-acpm-dvfs.c
+@@ -42,7 +42,7 @@ static void acpm_dvfs_init_set_rate_cmd(
+       cmd[3] = ktime_to_ms(ktime_get());
+ }
+-int acpm_dvfs_set_rate(const struct acpm_handle *handle,
++int acpm_dvfs_set_rate(struct acpm_handle *handle,
+                      unsigned int acpm_chan_id, unsigned int clk_id,
+                      unsigned long rate)
+ {
+@@ -62,7 +62,7 @@ static void acpm_dvfs_init_get_rate_cmd(
+       cmd[3] = ktime_to_ms(ktime_get());
+ }
+-unsigned long acpm_dvfs_get_rate(const struct acpm_handle *handle,
++unsigned long acpm_dvfs_get_rate(struct acpm_handle *handle,
+                                unsigned int acpm_chan_id, unsigned int clk_id)
+ {
+       struct acpm_xfer xfer;
+--- a/drivers/firmware/samsung/exynos-acpm-dvfs.h
++++ b/drivers/firmware/samsung/exynos-acpm-dvfs.h
+@@ -11,10 +11,10 @@
+ struct acpm_handle;
+-int acpm_dvfs_set_rate(const struct acpm_handle *handle,
++int acpm_dvfs_set_rate(struct acpm_handle *handle,
+                      unsigned int acpm_chan_id, unsigned int id,
+                      unsigned long rate);
+-unsigned long acpm_dvfs_get_rate(const struct acpm_handle *handle,
++unsigned long acpm_dvfs_get_rate(struct acpm_handle *handle,
+                                unsigned int acpm_chan_id,
+                                unsigned int clk_id);
+--- a/drivers/firmware/samsung/exynos-acpm-pmic.c
++++ b/drivers/firmware/samsung/exynos-acpm-pmic.c
+@@ -77,7 +77,7 @@ static void acpm_pmic_init_read_cmd(u32
+       cmd[3] = ktime_to_ms(ktime_get());
+ }
+-int acpm_pmic_read_reg(const struct acpm_handle *handle,
++int acpm_pmic_read_reg(struct acpm_handle *handle,
+                      unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                      u8 *buf)
+ {
+@@ -107,7 +107,7 @@ static void acpm_pmic_init_bulk_read_cmd
+                FIELD_PREP(ACPM_PMIC_VALUE, count);
+ }
+-int acpm_pmic_bulk_read(const struct acpm_handle *handle,
++int acpm_pmic_bulk_read(struct acpm_handle *handle,
+                       unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                       u8 count, u8 *buf)
+ {
+@@ -150,7 +150,7 @@ static void acpm_pmic_init_write_cmd(u32
+       cmd[3] = ktime_to_ms(ktime_get());
+ }
+-int acpm_pmic_write_reg(const struct acpm_handle *handle,
++int acpm_pmic_write_reg(struct acpm_handle *handle,
+                       unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                       u8 value)
+ {
+@@ -187,7 +187,7 @@ static void acpm_pmic_init_bulk_write_cm
+       }
+ }
+-int acpm_pmic_bulk_write(const struct acpm_handle *handle,
++int acpm_pmic_bulk_write(struct acpm_handle *handle,
+                        unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                        u8 count, const u8 *buf)
+ {
+@@ -220,7 +220,7 @@ static void acpm_pmic_init_update_cmd(u3
+       cmd[3] = ktime_to_ms(ktime_get());
+ }
+-int acpm_pmic_update_reg(const struct acpm_handle *handle,
++int acpm_pmic_update_reg(struct acpm_handle *handle,
+                        unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                        u8 value, u8 mask)
+ {
+--- a/drivers/firmware/samsung/exynos-acpm-pmic.h
++++ b/drivers/firmware/samsung/exynos-acpm-pmic.h
+@@ -11,19 +11,19 @@
+ struct acpm_handle;
+-int acpm_pmic_read_reg(const struct acpm_handle *handle,
++int acpm_pmic_read_reg(struct acpm_handle *handle,
+                      unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                      u8 *buf);
+-int acpm_pmic_bulk_read(const struct acpm_handle *handle,
++int acpm_pmic_bulk_read(struct acpm_handle *handle,
+                       unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                       u8 count, u8 *buf);
+-int acpm_pmic_write_reg(const struct acpm_handle *handle,
++int acpm_pmic_write_reg(struct acpm_handle *handle,
+                       unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                       u8 value);
+-int acpm_pmic_bulk_write(const struct acpm_handle *handle,
++int acpm_pmic_bulk_write(struct acpm_handle *handle,
+                        unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                        u8 count, const u8 *buf);
+-int acpm_pmic_update_reg(const struct acpm_handle *handle,
++int acpm_pmic_update_reg(struct acpm_handle *handle,
+                        unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+                        u8 value, u8 mask);
+ #endif /* __EXYNOS_ACPM_PMIC_H__ */
+--- a/drivers/firmware/samsung/exynos-acpm.c
++++ b/drivers/firmware/samsung/exynos-acpm.c
+@@ -412,7 +412,7 @@ static int acpm_wait_for_message_respons
+  *
+  * Return: 0 on success, -errno otherwise.
+  */
+-int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer)
++int acpm_do_xfer(struct acpm_handle *handle, const struct acpm_xfer *xfer)
+ {
+       struct acpm_info *acpm = handle_to_acpm_info(handle);
+       struct exynos_mbox_msg msg;
+@@ -674,7 +674,7 @@ static int acpm_probe(struct platform_de
+  * acpm_handle_put() - release the handle acquired by acpm_get_by_phandle.
+  * @handle:   Handle acquired by acpm_get_by_phandle.
+  */
+-static void acpm_handle_put(const struct acpm_handle *handle)
++static void acpm_handle_put(struct acpm_handle *handle)
+ {
+       struct acpm_info *acpm = handle_to_acpm_info(handle);
+       struct device *dev = acpm->dev;
+@@ -700,9 +700,11 @@ static void devm_acpm_release(struct dev
+  * @np:               ACPM device tree node.
+  *
+  * Return: pointer to handle on success, ERR_PTR(-errno) otherwise.
++ *
++ * Note: handle CANNOT be pointer to const
+  */
+-static const struct acpm_handle *acpm_get_by_node(struct device *dev,
+-                                                struct device_node *np)
++static struct acpm_handle *acpm_get_by_node(struct device *dev,
++                                          struct device_node *np)
+ {
+       struct platform_device *pdev;
+       struct device_link *link;
+@@ -743,10 +745,10 @@ static const struct acpm_handle *acpm_ge
+  *
+  * Return: pointer to handle on success, ERR_PTR(-errno) otherwise.
+  */
+-const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+-                                              struct device_node *np)
++struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
++                                        struct device_node *np)
+ {
+-      const struct acpm_handle **ptr, *handle;
++      struct acpm_handle **ptr, *handle;
+       ptr = devres_alloc(devm_acpm_release, sizeof(*ptr), GFP_KERNEL);
+       if (!ptr)
+--- a/drivers/firmware/samsung/exynos-acpm.h
++++ b/drivers/firmware/samsung/exynos-acpm.h
+@@ -17,7 +17,7 @@ struct acpm_xfer {
+ struct acpm_handle;
+-int acpm_do_xfer(const struct acpm_handle *handle,
++int acpm_do_xfer(struct acpm_handle *handle,
+                const struct acpm_xfer *xfer);
+ #endif /* __EXYNOS_ACPM_H__ */
+--- a/drivers/mfd/sec-acpm.c
++++ b/drivers/mfd/sec-acpm.c
+@@ -367,7 +367,7 @@ static const struct regmap_config s2mpg1
+ };
+ struct sec_pmic_acpm_shared_bus_context {
+-      const struct acpm_handle *acpm;
++      struct acpm_handle *acpm;
+       unsigned int acpm_chan_id;
+       u8 speedy_channel;
+ };
+@@ -390,7 +390,7 @@ static int sec_pmic_acpm_bus_write(void
+                                  size_t count)
+ {
+       struct sec_pmic_acpm_bus_context *ctx = context;
+-      const struct acpm_handle *acpm = ctx->shared->acpm;
++      struct acpm_handle *acpm = ctx->shared->acpm;
+       const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops;
+       size_t val_count = count - BITS_TO_BYTES(ACPM_ADDR_BITS);
+       const u8 *d = data;
+@@ -410,7 +410,7 @@ static int sec_pmic_acpm_bus_read(void *
+                                 void *val_buf, size_t val_size)
+ {
+       struct sec_pmic_acpm_bus_context *ctx = context;
+-      const struct acpm_handle *acpm = ctx->shared->acpm;
++      struct acpm_handle *acpm = ctx->shared->acpm;
+       const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops;
+       const u8 *r = reg_buf;
+       u8 reg;
+@@ -429,7 +429,7 @@ static int sec_pmic_acpm_bus_reg_update_
+                                            unsigned int val)
+ {
+       struct sec_pmic_acpm_bus_context *ctx = context;
+-      const struct acpm_handle *acpm = ctx->shared->acpm;
++      struct acpm_handle *acpm = ctx->shared->acpm;
+       const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops;
+       return pmic_ops->update_reg(acpm, ctx->shared->acpm_chan_id, ctx->type, reg & 0xff,
+@@ -480,7 +480,7 @@ static int sec_pmic_acpm_probe(struct pl
+       struct regmap *regmap_common, *regmap_pmic, *regmap;
+       const struct sec_pmic_acpm_platform_data *pdata;
+       struct sec_pmic_acpm_shared_bus_context *shared_ctx;
+-      const struct acpm_handle *acpm;
++      struct acpm_handle *acpm;
+       struct device *dev = &pdev->dev;
+       int ret, irq;
+--- a/include/linux/firmware/samsung/exynos-acpm-protocol.h
++++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h
+@@ -14,30 +14,24 @@ struct acpm_handle;
+ struct device_node;
+ struct acpm_dvfs_ops {
+-      int (*set_rate)(const struct acpm_handle *handle,
+-                      unsigned int acpm_chan_id, unsigned int clk_id,
+-                      unsigned long rate);
+-      unsigned long (*get_rate)(const struct acpm_handle *handle,
++      int (*set_rate)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++                      unsigned int clk_id, unsigned long rate);
++      unsigned long (*get_rate)(struct acpm_handle *handle,
+                                 unsigned int acpm_chan_id,
+                                 unsigned int clk_id);
+ };
+ struct acpm_pmic_ops {
+-      int (*read_reg)(const struct acpm_handle *handle,
+-                      unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+-                      u8 *buf);
+-      int (*bulk_read)(const struct acpm_handle *handle,
+-                       unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+-                       u8 count, u8 *buf);
+-      int (*write_reg)(const struct acpm_handle *handle,
+-                       unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+-                       u8 value);
+-      int (*bulk_write)(const struct acpm_handle *handle,
+-                        unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+-                        u8 count, const u8 *buf);
+-      int (*update_reg)(const struct acpm_handle *handle,
+-                        unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+-                        u8 value, u8 mask);
++      int (*read_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++                      u8 type, u8 reg, u8 chan, u8 *buf);
++      int (*bulk_read)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++                       u8 type, u8 reg, u8 chan, u8 count, u8 *buf);
++      int (*write_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++                       u8 type, u8 reg, u8 chan, u8 value);
++      int (*bulk_write)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++                        u8 type, u8 reg, u8 chan, u8 count, const u8 *buf);
++      int (*update_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++                        u8 type, u8 reg, u8 chan, u8 value, u8 mask);
+ };
+ struct acpm_ops {
+@@ -56,12 +50,12 @@ struct acpm_handle {
+ struct device;
+ #if IS_ENABLED(CONFIG_EXYNOS_ACPM_PROTOCOL)
+-const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+-                                              struct device_node *np);
++struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
++                                        struct device_node *np);
+ #else
+-static inline const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+-                                                            struct device_node *np)
++static inline struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
++                                                      struct device_node *np)
+ {
+       return NULL;
+ }
diff --git a/queue-7.0/firmware-google-framebuffer-do-not-unregister-platform-device.patch b/queue-7.0/firmware-google-framebuffer-do-not-unregister-platform-device.patch
new file mode 100644 (file)
index 0000000..bb50115
--- /dev/null
@@ -0,0 +1,69 @@
+From 5cd28bd28c8ce426b56ce4230dbd17537181d5ad Mon Sep 17 00:00:00 2001
+From: Thomas Zimmermann <tzimmermann@suse.de>
+Date: Tue, 17 Feb 2026 16:56:11 +0100
+Subject: firmware: google: framebuffer: Do not unregister platform device
+
+From: Thomas Zimmermann <tzimmermann@suse.de>
+
+commit 5cd28bd28c8ce426b56ce4230dbd17537181d5ad upstream.
+
+The native driver takes over the framebuffer aperture by removing the
+system- framebuffer platform device. Afterwards the pointer in drvdata
+is dangling. Remove the entire logic around drvdata and let the kernel's
+aperture helpers handle this. The platform device depends on the native
+hardware device instead of the coreboot device anyway.
+
+When commit 851b4c14532d ("firmware: coreboot: Add coreboot framebuffer
+driver") added the coreboot framebuffer code, the kernel did not support
+device-based aperture management. Instead native driviers only removed
+the conflicting fbdev device. At that point, unregistering the framebuffer
+device most likely worked correctly. It was definitely broken after
+commit d9702b2a2171 ("fbdev/simplefb: Do not use struct
+fb_info.apertures"). So take this commit for the Fixes tag. Earlier
+releases might work depending on the native hardware driver.
+
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Fixes: d9702b2a2171 ("fbdev/simplefb: Do not use struct fb_info.apertures")
+Acked-by: Tzung-Bi Shih <tzungbi@kernel.org>
+Acked-by: Julius Werner <jwerner@chromium.org>
+Cc: Thomas Zimmermann <tzimmermann@suse.de>
+Cc: Javier Martinez Canillas <javierm@redhat.com>
+Cc: Hans de Goede <hansg@kernel.org>
+Cc: linux-fbdev@vger.kernel.org
+Cc: <stable@vger.kernel.org> # v6.3+
+Link: https://patch.msgid.link/20260217155836.96267-2-tzimmermann@suse.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/google/framebuffer-coreboot.c |   10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/drivers/firmware/google/framebuffer-coreboot.c
++++ b/drivers/firmware/google/framebuffer-coreboot.c
+@@ -81,19 +81,10 @@ static int framebuffer_probe(struct core
+                                                sizeof(pdata));
+       if (IS_ERR(pdev))
+               pr_warn("coreboot: could not register framebuffer\n");
+-      else
+-              dev_set_drvdata(&dev->dev, pdev);
+       return PTR_ERR_OR_ZERO(pdev);
+ }
+-static void framebuffer_remove(struct coreboot_device *dev)
+-{
+-      struct platform_device *pdev = dev_get_drvdata(&dev->dev);
+-
+-      platform_device_unregister(pdev);
+-}
+-
+ static const struct coreboot_device_id framebuffer_ids[] = {
+       { .tag = CB_TAG_FRAMEBUFFER },
+       { /* sentinel */ }
+@@ -102,7 +93,6 @@ MODULE_DEVICE_TABLE(coreboot, framebuffe
+ static struct coreboot_driver framebuffer_driver = {
+       .probe = framebuffer_probe,
+-      .remove = framebuffer_remove,
+       .drv = {
+               .name = "framebuffer",
+       },
diff --git a/queue-7.0/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch b/queue-7.0/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch
new file mode 100644 (file)
index 0000000..bf439e9
--- /dev/null
@@ -0,0 +1,58 @@
+From 24c73e93d6a756e1b8626bb259d2e07c5b89b370 Mon Sep 17 00:00:00 2001
+From: Sanman Pradhan <psanman@juniper.net>
+Date: Fri, 10 Apr 2026 00:25:55 +0000
+Subject: hwmon: (pt5161l) Fix bugs in pt5161l_read_block_data()
+
+From: Sanman Pradhan <psanman@juniper.net>
+
+commit 24c73e93d6a756e1b8626bb259d2e07c5b89b370 upstream.
+
+Fix two bugs in pt5161l_read_block_data():
+
+1. Buffer overrun: The local buffer rbuf is declared as u8 rbuf[24],
+   but i2c_smbus_read_block_data() can return up to
+   I2C_SMBUS_BLOCK_MAX (32) bytes. The i2c-core copies the data into
+   the caller's buffer before the return value can be checked, so
+   the post-read length validation does not prevent a stack overrun
+   if a device returns more than 24 bytes. Resize the buffer to
+   I2C_SMBUS_BLOCK_MAX.
+
+2. Unexpected positive return on length mismatch: When all three
+   retries are exhausted because the device returns data with an
+   unexpected length, i2c_smbus_read_block_data() returns a positive
+   byte count. The function returns this directly, and callers treat
+   any non-negative return as success, processing stale or incomplete
+   buffer contents. Return -EIO when retries are exhausted with a
+   positive return value, preserving the negative error code on I2C
+   failure.
+
+Fixes: 1b2ca93cd0592 ("hwmon: Add driver for Astera Labs PT5161L retimer")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sanman Pradhan <psanman@juniper.net>
+Link: https://lore.kernel.org/r/20260410002549.424162-1-sanman.pradhan@hpe.com
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hwmon/pt5161l.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/hwmon/pt5161l.c
++++ b/drivers/hwmon/pt5161l.c
+@@ -121,7 +121,7 @@ static int pt5161l_read_block_data(struc
+       int ret, tries;
+       u8 remain_len = len;
+       u8 curr_len;
+-      u8 wbuf[16], rbuf[24];
++      u8 wbuf[16], rbuf[I2C_SMBUS_BLOCK_MAX];
+       u8 cmd = 0x08; /* [7]:pec_en, [4:2]:func, [1]:start, [0]:end */
+       u8 config = 0x00; /* [6]:cfg_type, [4:1]:burst_len, [0]:address bit16 */
+@@ -151,7 +151,7 @@ static int pt5161l_read_block_data(struc
+                               break;
+               }
+               if (tries >= 3)
+-                      return ret;
++                      return ret < 0 ? ret : -EIO;
+               memcpy(val, rbuf, curr_len);
+               val += curr_len;
diff --git a/queue-7.0/kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch b/queue-7.0/kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch
new file mode 100644 (file)
index 0000000..16e62d9
--- /dev/null
@@ -0,0 +1,39 @@
+From d70d4323dd9636e35696639f6b4c2b2735291516 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Wed, 1 Apr 2026 11:36:00 +0100
+Subject: KVM: arm64: Account for RESx bits in __compute_fgt()
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit d70d4323dd9636e35696639f6b4c2b2735291516 upstream.
+
+When computing Fine Grained Traps, it is preferable to account for
+the reserved bits. The HW will most probably ignore them, unless the
+bits have been repurposed to do something else.
+
+Use caution, and fold our view of the reserved bits in,
+
+Reviewed-by: Sascha Bischoff <sascha.bischoff@arm.com>
+Fixes: c259d763e6b09 ("KVM: arm64: Account for RES1 bits in DECLARE_FEAT_MAP() and co")
+Link: https://sashiko.dev/#/patchset/20260319154937.3619520-1-sascha.bischoff%40arm.com
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260401103611.357092-6-maz@kernel.org
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/config.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/kvm/config.c
++++ b/arch/arm64/kvm/config.c
+@@ -1585,8 +1585,8 @@ static __always_inline void __compute_fg
+               clear |= ~nested & m->nmask;
+       }
+-      val |= set;
+-      val &= ~clear;
++      val |= set | m->res1;
++      val &= ~(clear | m->res0);
+       *vcpu_fgt(vcpu, reg) = val;
+ }
diff --git a/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch b/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch
new file mode 100644 (file)
index 0000000..96d5a29
--- /dev/null
@@ -0,0 +1,69 @@
+From 96bd3e76a171a8e21a6387e54e4c420a81968492 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:10 +0000
+Subject: KVM: nSVM: Add missing consistency check for EFER, CR0, CR4, and CS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 96bd3e76a171a8e21a6387e54e4c420a81968492 upstream.
+
+According to the APM Volume #2, 15.5, Canonicalization and Consistency
+Checks (24593—Rev. 3.42—March 2024), the following condition (among
+others) results in a #VMEXIT with VMEXIT_INVALID (aka SVM_EXIT_ERR):
+
+  EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
+
+In the list of consistency checks done when EFER.LME and CR0.PG are set,
+add a check that CS.L and CS.D are not both set, after the existing
+check that CR4.PAE is set.
+
+This is functionally a nop because the nested VMRUN results in
+SVM_EXIT_ERR in HW, which is forwarded to L1, but KVM makes all
+consistency checks before a VMRUN is actually attempted.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-17-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++++
+ arch/x86/kvm/svm/svm.h    |    1 +
+ 2 files changed, 7 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -392,6 +392,10 @@ static bool __nested_vmcb_check_save(str
+                   CC(!(save->cr0 & X86_CR0_PE)) ||
+                   CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3)))
+                       return false;
++
++              if (CC((save->cs.attrib & SVM_SELECTOR_L_MASK) &&
++                     (save->cs.attrib & SVM_SELECTOR_DB_MASK)))
++                      return false;
+       }
+       /* Note, SVM doesn't have any additional restrictions on CR4. */
+@@ -508,6 +512,8 @@ static void __nested_copy_vmcb_save_to_c
+        * Copy only fields that are validated, as we need them
+        * to avoid TOC/TOU races.
+        */
++      to->cs = from->cs;
++
+       to->efer = from->efer;
+       to->cr0 = from->cr0;
+       to->cr3 = from->cr3;
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -140,6 +140,7 @@ struct kvm_vmcb_info {
+ };
+ struct vmcb_save_area_cached {
++      struct vmcb_seg cs;
+       u64 efer;
+       u64 cr4;
+       u64 cr3;
diff --git a/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch b/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch
new file mode 100644 (file)
index 0000000..47c0008
--- /dev/null
@@ -0,0 +1,49 @@
+From b71138fcc362c67ebe66747bb22cb4e6b4d6a651 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:09 +0000
+Subject: KVM: nSVM: Add missing consistency check for nCR3 validity
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit b71138fcc362c67ebe66747bb22cb4e6b4d6a651 upstream.
+
+From the APM Volume #2, 15.25.4 (24593—Rev. 3.42—March 2024):
+
+  When VMRUN is executed with nested paging enabled (NP_ENABLE = 1), the
+  following conditions are considered illegal state combinations, in
+  addition to those mentioned in “Canonicalization and Consistency Checks”:
+      • Any MBZ bit of nCR3 is set.
+      • Any G_PAT.PA field has an unsupported type encoding or any
+        reserved field in G_PAT has a nonzero value.
+
+Add the consistency check for nCR3 being a legal GPA with no MBZ bits
+set.  Note, the G_PAT.PA check is being handled separately[*].
+
+Link: https://lore.kernel.org/kvm/20260205214326.1029278-3-jmattson@google.com [*]
+Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-16-yosry@kernel.org
+[sean: capture everything in CC(), massage changelog formatting]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -350,6 +350,10 @@ static bool __nested_vmcb_check_controls
+       if (CC(control->asid == 0))
+               return false;
++      if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
++             !kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3)))
++              return false;
++
+       if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
+                                          MSRPM_SIZE)))
+               return false;
diff --git a/queue-7.0/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch b/queue-7.0/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch
new file mode 100644 (file)
index 0000000..45f5964
--- /dev/null
@@ -0,0 +1,41 @@
+From 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:59 +0000
+Subject: KVM: nSVM: Always inject a #GP if mapping VMCB12 fails on nested VMRUN
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 upstream.
+
+nested_svm_vmrun() currently only injects a #GP if kvm_vcpu_map() fails
+with -EINVAL. But it could also fail with -EFAULT if creating a host
+mapping failed. Inject a #GP in all cases, no reason to treat failure
+modes differently.
+
+Fixes: 8c5fbf1a7231 ("KVM/nSVM: Use the new mapping API for mapping guest memory")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-6-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1021,12 +1021,9 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+       }
+       vmcb12_gpa = svm->vmcb->save.rax;
+-      ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
+-      if (ret == -EINVAL) {
++      if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+-      } else if (ret) {
+-              return kvm_skip_emulated_instruction(vcpu);
+       }
+       ret = kvm_skip_emulated_instruction(vcpu);
diff --git a/queue-7.0/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch b/queue-7.0/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch
new file mode 100644 (file)
index 0000000..9ef1a4f
--- /dev/null
@@ -0,0 +1,65 @@
+From 33d3617a52f9930d22b2af59f813c2fbdefa6dd5 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 3 Mar 2026 16:22:23 -0800
+Subject: KVM: nSVM: Always intercept VMMCALL when L2 is active
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 33d3617a52f9930d22b2af59f813c2fbdefa6dd5 upstream.
+
+Always intercept VMMCALL now that KVM properly synthesizes a #UD as
+appropriate, i.e. when L1 doesn't want to intercept VMMCALL, to avoid
+putting L2 into an infinite #UD loop if KVM_X86_QUIRK_FIX_HYPERCALL_INSN
+is enabled.
+
+By letting L2 execute VMMCALL natively and thus #UD, for all intents and
+purposes KVM morphs the VMMCALL intercept into a #UD intercept (KVM always
+intercepts #UD).  When the hypercall quirk is enabled, KVM "emulates"
+VMMCALL in response to the #UD by trying to fixup the opcode to the "right"
+vendor, then restarts the guest, without skipping the VMMCALL.  As a
+result, the guest sees an endless stream of #UDs since it's already
+executing the correct vendor hypercall instruction, i.e. the emulator
+doesn't anticipate that the #UD could be due to lack of interception, as
+opposed to a truly undefined opcode.
+
+Fixes: 0d945bd93511 ("KVM: SVM: Don't allow nested guest to VMMCALL into host")
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry@kernel.org>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://patch.msgid.link/20260304002223.1105129-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/hyperv.h |    4 ----
+ arch/x86/kvm/svm/nested.c |    7 -------
+ 2 files changed, 11 deletions(-)
+
+--- a/arch/x86/kvm/svm/hyperv.h
++++ b/arch/x86/kvm/svm/hyperv.h
+@@ -51,10 +51,6 @@ static inline bool nested_svm_is_l2_tlb_
+ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+ #else /* CONFIG_KVM_HYPERV */
+ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {}
+-static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
+-{
+-      return false;
+-}
+ static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+ {
+       return false;
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -158,13 +158,6 @@ void recalc_intercepts(struct vcpu_svm *
+                       vmcb_clr_intercept(c, INTERCEPT_VINTR);
+       }
+-      /*
+-       * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
+-       * flush feature is enabled.
+-       */
+-      if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
+-              vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
+-
+       for (i = 0; i < MAX_INTERCEPT; i++)
+               c->intercepts[i] |= g->intercepts[i];
diff --git a/queue-7.0/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch b/queue-7.0/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch
new file mode 100644 (file)
index 0000000..f7d04a3
--- /dev/null
@@ -0,0 +1,76 @@
+From 8d397582f6b5e9fbcf09781c7c934b4910e94a50 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:47 +0000
+Subject: KVM: nSVM: Always use NextRIP as vmcb02's NextRIP after first L2 VMRUN
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 8d397582f6b5e9fbcf09781c7c934b4910e94a50 upstream.
+
+For guests with NRIPS disabled, L1 does not provide NextRIP when running
+an L2 with an injected soft interrupt, instead it advances the current RIP
+before running it. KVM uses the current RIP as the NextRIP in vmcb02 to
+emulate a CPU without NRIPS.
+
+However, after L2 runs the first time, NextRIP will be updated by the CPU
+and/or KVM, and the current RIP is no longer the correct value to use in
+vmcb02.  Hence, after save/restore, use the current RIP if and only if a
+nested run is pending, otherwise use NextRIP.  Give soft_int_next_rip the
+same treatment, as it's the same logic, just for a narrower use case.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-6-yosry@kernel.org
+[sean: give soft_int_next_rip the same treatment]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   28 ++++++++++++++++++----------
+ 1 file changed, 18 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -856,24 +856,32 @@ static void nested_vmcb02_prepare_contro
+       vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
+       /*
+-       * next_rip is consumed on VMRUN as the return address pushed on the
++       * NextRIP is consumed on VMRUN as the return address pushed on the
+        * stack for injected soft exceptions/interrupts.  If nrips is exposed
+-       * to L1, take it verbatim from vmcb12.  If nrips is supported in
+-       * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
+-       * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
+-       * prior to injecting the event).
++       * to L1, take it verbatim from vmcb12.
++       *
++       * If nrips is supported in hardware but not exposed to L1, stuff the
++       * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
++       * responsible for advancing RIP prior to injecting the event). This is
++       * only the case for the first L2 run after VMRUN. After that (e.g.
++       * during save/restore), NextRIP is updated by the CPU and/or KVM, and
++       * the value of the L2 RIP from vmcb12 should not be used.
+        */
+-      if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+-              vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
+-      else if (boot_cpu_has(X86_FEATURE_NRIPS))
+-              vmcb02->control.next_rip    = vmcb12_rip;
++      if (boot_cpu_has(X86_FEATURE_NRIPS)) {
++              if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
++                  !svm->nested.nested_run_pending)
++                      vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
++              else
++                      vmcb02->control.next_rip    = vmcb12_rip;
++      }
+       svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
+       if (is_evtinj_soft(vmcb02->control.event_inj)) {
+               svm->soft_int_injected = true;
+               svm->soft_int_csbase = vmcb12_csbase;
+               svm->soft_int_old_rip = vmcb12_rip;
+-              if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
++              if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
++                  !svm->nested.nested_run_pending)
+                       svm->soft_int_next_rip = svm->nested.ctl.next_rip;
+               else
+                       svm->soft_int_next_rip = vmcb12_rip;
diff --git a/queue-7.0/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch b/queue-7.0/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch
new file mode 100644 (file)
index 0000000..4281118
--- /dev/null
@@ -0,0 +1,65 @@
+From b53ab5167a81537777ac780bbd93d32613aa3bda Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:55 +0000
+Subject: KVM: nSVM: Avoid clearing VMCB_LBR in vmcb12
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit b53ab5167a81537777ac780bbd93d32613aa3bda upstream.
+
+svm_copy_lbrs() always marks VMCB_LBR dirty in the destination VMCB.
+However, nested_svm_vmexit() uses it to copy LBRs to vmcb12, and
+clearing clean bits in vmcb12 is not architecturally defined.
+
+Move vmcb_mark_dirty() to callers and drop it for vmcb12.
+
+This also facilitates incoming refactoring that does not pass the entire
+VMCB to svm_copy_lbrs().
+
+Fixes: d20c796ca370 ("KVM: x86: nSVM: implement nested LBR virtualization")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-2-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    7 +++++--
+ arch/x86/kvm/svm/svm.c    |    2 --
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -726,6 +726,7 @@ static void nested_vmcb02_prepare_save(s
+       } else {
+               svm_copy_lbrs(vmcb02, vmcb01);
+       }
++      vmcb_mark_dirty(vmcb02, VMCB_LBR);
+       svm_update_lbrv(&svm->vcpu);
+ }
+@@ -1242,10 +1243,12 @@ int nested_svm_vmexit(struct vcpu_svm *s
+               kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+       if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+-                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
++                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+               svm_copy_lbrs(vmcb12, vmcb02);
+-      else
++      } else {
+               svm_copy_lbrs(vmcb01, vmcb02);
++              vmcb_mark_dirty(vmcb01, VMCB_LBR);
++      }
+       svm_update_lbrv(vcpu);
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -848,8 +848,6 @@ void svm_copy_lbrs(struct vmcb *to_vmcb,
+       to_vmcb->save.br_to             = from_vmcb->save.br_to;
+       to_vmcb->save.last_excp_from    = from_vmcb->save.last_excp_from;
+       to_vmcb->save.last_excp_to      = from_vmcb->save.last_excp_to;
+-
+-      vmcb_mark_dirty(to_vmcb, VMCB_LBR);
+ }
+ static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
diff --git a/queue-7.0/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch
new file mode 100644 (file)
index 0000000..59a5f69
--- /dev/null
@@ -0,0 +1,69 @@
+From 69b721a86d0dcb026f6db7d111dcde7550442d2e Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:05 +0000
+Subject: KVM: nSVM: Clear EVENTINJ fields in vmcb12 on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 69b721a86d0dcb026f6db7d111dcde7550442d2e upstream.
+
+According to the APM, from the reference of the VMRUN instruction:
+
+  Upon #VMEXIT, the processor performs the following actions in order to
+  return to the host execution context:
+
+  ...
+
+  clear EVENTINJ field in VMCB
+
+KVM already syncs EVENTINJ fields from vmcb02 to cached vmcb12 on every
+L2->L0  #VMEXIT. Since these fields are zeroed by the CPU on #VMEXIT, they
+will mostly be zeroed in vmcb12 on nested #VMEXIT by nested_svm_vmexit().
+
+However, this is not the case when:
+
+  1. Consistency checks fail, as nested_svm_vmexit() is not called.
+  2. Entering guest mode fails before L2 runs (e.g. due to failed load of
+     CR3).
+
+(2) was broken by commit 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB
+controls updated by the processor on every vmexit"), as prior to that
+nested_svm_vmexit() always zeroed EVENTINJ fields.
+
+Explicitly clear the fields in all nested #VMEXIT code paths.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Fixes: 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-12-yosry@kernel.org
+[sean: massage changelog formatting]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1045,6 +1045,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+               vmcb12->control.exit_code    = SVM_EXIT_ERR;
+               vmcb12->control.exit_info_1  = 0;
+               vmcb12->control.exit_info_2  = 0;
++              vmcb12->control.event_inj = 0;
++              vmcb12->control.event_inj_err = 0;
+               svm_set_gif(svm, false);
+               goto out;
+       }
+@@ -1188,9 +1190,9 @@ static int nested_svm_vmexit_update_vmcb
+       if (nested_vmcb12_has_lbrv(vcpu))
+               svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
++      vmcb12->control.event_inj         = 0;
++      vmcb12->control.event_inj_err     = 0;
+       vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
+-      vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
+-      vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
+       trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
+                                      vmcb12->control.exit_info_1,
diff --git a/queue-7.0/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch b/queue-7.0/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch
new file mode 100644 (file)
index 0000000..462c166
--- /dev/null
@@ -0,0 +1,33 @@
+From f85a6ce06e4a0d49652f57967a649ab09e06287c Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:04 +0000
+Subject: KVM: nSVM: Clear GIF on nested #VMEXIT(INVALID)
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit f85a6ce06e4a0d49652f57967a649ab09e06287c upstream.
+
+According to the APM, GIF is set to 0 on any #VMEXIT, including
+an #VMEXIT(INVALID) due to failed consistency checks. Clear GIF on
+consistency check failures.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-11-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1045,6 +1045,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+               vmcb12->control.exit_code    = SVM_EXIT_ERR;
+               vmcb12->control.exit_info_1  = 0;
+               vmcb12->control.exit_info_2  = 0;
++              svm_set_gif(svm, false);
+               goto out;
+       }
diff --git a/queue-7.0/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch
new file mode 100644 (file)
index 0000000..3e71af0
--- /dev/null
@@ -0,0 +1,64 @@
+From 8998e1d012f3f45d0456f16706682cef04c3c436 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:06 +0000
+Subject: KVM: nSVM: Clear tracking of L1->L2 NMI and soft IRQ on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 8998e1d012f3f45d0456f16706682cef04c3c436 upstream.
+
+KVM clears tracking of L1->L2 injected NMIs (i.e. nmi_l1_to_l2) and soft
+IRQs (i.e. soft_int_injected) on a synthesized #VMEXIT(INVALID) due to
+failed VMRUN. However, they are not explicitly cleared in other
+synthesized #VMEXITs.
+
+soft_int_injected is always cleared after the first VMRUN of L2 when
+completing interrupts, as any re-injection is then tracked by KVM
+(instead of purely in vmcb02).
+
+nmi_l1_to_l2 is not cleared after the first VMRUN if NMI injection
+failed, as KVM still needs to keep track that the NMI originated from L1
+to avoid blocking NMIs for L1. It is only cleared when the NMI injection
+succeeds.
+
+KVM could synthesize a #VMEXIT to L1 before successfully injecting the
+NMI into L2 (e.g. due to a #NPF on L2's NMI handler in L1's NPTs). In
+this case, nmi_l1_to_l2 will remain true, and KVM may not correctly mask
+NMIs and intercept IRET when injecting an NMI into L1.
+
+Clear both nmi_l1_to_l2 and soft_int_injected in nested_svm_vmexit(), i.e.
+for all #VMEXITs except those that occur due to failed consistency checks,
+as those happen before nmi_l1_to_l2 or soft_int_injected are set.
+
+Fixes: 159fc6fa3b7d ("KVM: nSVM: Transparently handle L1 -> L2 NMI re-injection")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-13-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1074,8 +1074,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+ out_exit_err:
+       svm->nested.nested_run_pending = 0;
+-      svm->nmi_l1_to_l2 = false;
+-      svm->soft_int_injected = false;
+       svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
+       svm->vmcb->control.exit_info_1  = 0;
+@@ -1331,6 +1329,10 @@ void nested_svm_vmexit(struct vcpu_svm *
+       if (nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true))
+               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
++      /* Drop tracking for L1->L2 injected NMIs and soft IRQs */
++      svm->nmi_l1_to_l2 = false;
++      svm->soft_int_injected = false;
++
+       /*
+        * Drop what we picked up for L2 via svm_complete_interrupts() so it
+        * doesn't end up in L1.
diff --git a/queue-7.0/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch b/queue-7.0/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch
new file mode 100644 (file)
index 0000000..1bef050
--- /dev/null
@@ -0,0 +1,139 @@
+From c64bc6ed1764c1b7e3c0017019f743196074092f Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 4 Mar 2026 16:06:56 -0800
+Subject: KVM: nSVM: Delay setting soft IRQ RIP tracking fields until vCPU run
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c64bc6ed1764c1b7e3c0017019f743196074092f upstream.
+
+In the save+restore path, when restoring nested state, the values of RIP
+and CS base passed into nested_vmcb02_prepare_control() are mostly
+incorrect.  They are both pulled from the vmcb02. For CS base, the value
+is only correct if system regs are restored before nested state. The
+value of RIP is whatever the vCPU had in vmcb02 before restoring nested
+state (zero on a freshly created vCPU).
+
+Instead, take a similar approach to NextRIP, and delay initializing the
+RIP tracking fields until shortly before the vCPU is run, to make sure
+the most up-to-date values of RIP and CS base are used regardless of
+KVM_SET_SREGS, KVM_SET_REGS, and KVM_SET_NESTED_STATE's relative
+ordering.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-8-yosry@kernel.org
+[sean: deal with the svm_cancel_injection() madness]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   17 ++++++++---------
+ arch/x86/kvm/svm/svm.c    |   29 +++++++++++++++++++++++++++++
+ 2 files changed, 37 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -754,9 +754,7 @@ static bool is_evtinj_nmi(u32 evtinj)
+       return type == SVM_EVTINJ_TYPE_NMI;
+ }
+-static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
+-                                        unsigned long vmcb12_rip,
+-                                        unsigned long vmcb12_csbase)
++static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
+ {
+       u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
+       u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+@@ -868,15 +866,16 @@ static void nested_vmcb02_prepare_contro
+               vmcb02->control.next_rip = svm->nested.ctl.next_rip;
+       svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
++
++      /*
++       * soft_int_csbase, soft_int_old_rip, and soft_int_next_rip (if L1
++       * doesn't have NRIPS) are initialized later, before the vCPU is run.
++       */
+       if (is_evtinj_soft(vmcb02->control.event_inj)) {
+               svm->soft_int_injected = true;
+-              svm->soft_int_csbase = vmcb12_csbase;
+-              svm->soft_int_old_rip = vmcb12_rip;
+               if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
+                   !svm->nested.nested_run_pending)
+                       svm->soft_int_next_rip = svm->nested.ctl.next_rip;
+-              else
+-                      svm->soft_int_next_rip = vmcb12_rip;
+       }
+       /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */
+@@ -974,7 +973,7 @@ int enter_svm_guest_mode(struct kvm_vcpu
+       nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
+       svm_switch_vmcb(svm, &svm->nested.vmcb02);
+-      nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
++      nested_vmcb02_prepare_control(svm);
+       nested_vmcb02_prepare_save(svm, vmcb12);
+       ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
+@@ -1920,7 +1919,7 @@ static int svm_set_nested_state(struct k
+       nested_copy_vmcb_control_to_cache(svm, ctl);
+       svm_switch_vmcb(svm, &svm->nested.vmcb02);
+-      nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
++      nested_vmcb02_prepare_control(svm);
+       /*
+        * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3639,6 +3639,16 @@ static int svm_handle_exit(struct kvm_vc
+       return svm_invoke_exit_handler(vcpu, svm->vmcb->control.exit_code);
+ }
++static void svm_set_nested_run_soft_int_state(struct kvm_vcpu *vcpu)
++{
++      struct vcpu_svm *svm = to_svm(vcpu);
++
++      svm->soft_int_csbase = svm->vmcb->save.cs.base;
++      svm->soft_int_old_rip = kvm_rip_read(vcpu);
++      if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
++              svm->soft_int_next_rip = kvm_rip_read(vcpu);
++}
++
+ static int pre_svm_run(struct kvm_vcpu *vcpu)
+ {
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
+@@ -3761,6 +3771,13 @@ static void svm_fixup_nested_rips(struct
+       if (boot_cpu_has(X86_FEATURE_NRIPS) &&
+           !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+               svm->vmcb->control.next_rip = kvm_rip_read(vcpu);
++
++      /*
++       * Simiarly, initialize the soft int metadata here to use the most
++       * up-to-date values of RIP and CS base, regardless of restore order.
++       */
++      if (svm->soft_int_injected)
++              svm_set_nested_run_soft_int_state(vcpu);
+ }
+ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+@@ -4131,6 +4148,18 @@ static void svm_complete_soft_interrupt(
+       struct vcpu_svm *svm = to_svm(vcpu);
+       /*
++       * Initialize the soft int fields *before* reading them below if KVM
++       * aborted entry to the guest with a nested VMRUN pending.  To ensure
++       * KVM uses up-to-date values for RIP and CS base across save/restore,
++       * regardless of restore order, KVM waits to set the soft int fields
++       * until VMRUN is imminent.  But when canceling injection, KVM requeues
++       * the soft int and will reinject it via the standard injection flow,
++       * and so KVM needs to grab the state from the pending nested VMRUN.
++       */
++      if (is_guest_mode(vcpu) && svm->nested.nested_run_pending)
++              svm_set_nested_run_soft_int_state(vcpu);
++
++      /*
+        * If NRIPS is enabled, KVM must snapshot the pre-VMRUN next_rip that's
+        * associated with the original soft exception/interrupt.  next_rip is
+        * cleared on all exits that can occur while vectoring an event, so KVM
diff --git a/queue-7.0/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch b/queue-7.0/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch
new file mode 100644 (file)
index 0000000..d053d10
--- /dev/null
@@ -0,0 +1,117 @@
+From a0592461f39c00b28f552fe842a063a00043eaa8 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:48 +0000
+Subject: KVM: nSVM: Delay stuffing L2's current RIP into NextRIP until vCPU run
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit a0592461f39c00b28f552fe842a063a00043eaa8 upstream.
+
+For guests with NRIPS disabled, L1 does not provide NextRIP when running
+an L2 with an injected soft interrupt, instead it advances L2's RIP
+before running it. KVM uses L2's current RIP as the NextRIP in vmcb02 to
+emulate a CPU without NRIPS.
+
+However, in svm_set_nested_state(), the value used for L2's current RIP
+comes from vmcb02, which is just whatever the vCPU had in vmcb02 before
+restoring nested state (zero on a freshly created vCPU). Passing the
+cached RIP value instead (i.e. kvm_rip_read()) would only fix the issue
+if registers are restored before nested state.
+
+Instead, split the logic of setting NextRIP in vmcb02. Handle the
+'normal' case of initializing vmcb02's NextRIP using NextRIP from vmcb12
+(or KVM_GET_NESTED_STATE's payload) in nested_vmcb02_prepare_control().
+Delay the special case of stuffing L2's current RIP into vmcb02's
+NextRIP until shortly before the vCPU is run, to make sure the most
+up-to-date value of RIP is used regardless of KVM_SET_REGS and
+KVM_SET_NESTED_STATE's relative ordering.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-7-yosry@kernel.org
+[sean: use new helper, svm_fixup_nested_rips()]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   25 ++++++++-----------------
+ arch/x86/kvm/svm/svm.c    |   25 +++++++++++++++++++++++++
+ 2 files changed, 33 insertions(+), 17 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -856,24 +856,15 @@ static void nested_vmcb02_prepare_contro
+       vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
+       /*
+-       * NextRIP is consumed on VMRUN as the return address pushed on the
+-       * stack for injected soft exceptions/interrupts.  If nrips is exposed
+-       * to L1, take it verbatim from vmcb12.
+-       *
+-       * If nrips is supported in hardware but not exposed to L1, stuff the
+-       * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
+-       * responsible for advancing RIP prior to injecting the event). This is
+-       * only the case for the first L2 run after VMRUN. After that (e.g.
+-       * during save/restore), NextRIP is updated by the CPU and/or KVM, and
+-       * the value of the L2 RIP from vmcb12 should not be used.
++       * If nrips is exposed to L1, take NextRIP as-is.  Otherwise, L1
++       * advances L2's RIP before VMRUN instead of using NextRIP. KVM will
++       * stuff the current RIP as vmcb02's NextRIP before L2 is run.  After
++       * the first run of L2 (e.g. after save+restore), NextRIP is updated by
++       * the CPU and/or KVM and should be used regardless of L1's support.
+        */
+-      if (boot_cpu_has(X86_FEATURE_NRIPS)) {
+-              if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
+-                  !svm->nested.nested_run_pending)
+-                      vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
+-              else
+-                      vmcb02->control.next_rip    = vmcb12_rip;
+-      }
++      if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
++          !svm->nested.nested_run_pending)
++              vmcb02->control.next_rip = svm->nested.ctl.next_rip;
+       svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
+       if (is_evtinj_soft(vmcb02->control.event_inj)) {
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3742,6 +3742,29 @@ static void svm_inject_irq(struct kvm_vc
+       svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type;
+ }
++static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu)
++{
++      struct vcpu_svm *svm = to_svm(vcpu);
++
++      if (!is_guest_mode(vcpu) || !svm->nested.nested_run_pending)
++              return;
++
++      /*
++       * If nrips is supported in hardware but not exposed to L1, stuff the
++       * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
++       * responsible for advancing RIP prior to injecting the event). Once L2
++       * runs after L1 executes VMRUN, NextRIP is updated by the CPU and/or
++       * KVM, and this is no longer needed.
++       *
++       * This is done here (as opposed to when preparing vmcb02) to use the
++       * most up-to-date value of RIP regardless of the order of restoring
++       * registers and nested state in the vCPU save+restore path.
++       */
++      if (boot_cpu_has(X86_FEATURE_NRIPS) &&
++          !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
++              svm->vmcb->control.next_rip = kvm_rip_read(vcpu);
++}
++
+ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+                                    int trig_mode, int vector)
+ {
+@@ -4338,6 +4361,8 @@ static __no_kcsan fastpath_t svm_vcpu_ru
+           kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS))
+               svm->vmcb->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
++      svm_fixup_nested_rips(vcpu);
++
+       svm_hv_update_vp_id(svm->vmcb, vcpu);
+       /*
diff --git a/queue-7.0/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch b/queue-7.0/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch
new file mode 100644 (file)
index 0000000..3d93321
--- /dev/null
@@ -0,0 +1,67 @@
+From e0b6f031d64c086edd563e7af9c0c0a2261dd2a4 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:08 +0000
+Subject: KVM: nSVM: Drop the non-architectural consistency check for NP_ENABLE
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit e0b6f031d64c086edd563e7af9c0c0a2261dd2a4 upstream.
+
+KVM currenty fails a nested VMRUN and injects VMEXIT_INVALID (aka
+SVM_EXIT_ERR) if L1 sets NP_ENABLE and the host does not support NPTs.
+On first glance, it seems like the check should actually be for
+guest_cpu_cap_has(X86_FEATURE_NPT) instead, as it is possible for the
+host to support NPTs but the guest CPUID to not advertise it.
+
+However, the consistency check is not architectural to begin with. The
+APM does not mention VMEXIT_INVALID if NP_ENABLE is set on a processor
+that does not have X86_FEATURE_NPT. Hence, NP_ENABLE should be ignored
+if X86_FEATURE_NPT is not available for L1, so sanitize it when copying
+from the VMCB12 to KVM's cache.
+
+Apart from the consistency check, NP_ENABLE in VMCB12 is currently
+ignored because the bit is actually copied from VMCB01 to VMCB02, not
+from VMCB12.
+
+Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-15-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -350,9 +350,6 @@ static bool __nested_vmcb_check_controls
+       if (CC(control->asid == 0))
+               return false;
+-      if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
+-              return false;
+-
+       if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
+                                          MSRPM_SIZE)))
+               return false;
+@@ -462,6 +459,11 @@ void __nested_copy_vmcb_control_to_cache
+       nested_svm_sanitize_intercept(vcpu, to, SKINIT);
+       nested_svm_sanitize_intercept(vcpu, to, RDPRU);
++      /* Always clear SVM_NESTED_CTL_NP_ENABLE if the guest cannot use NPTs */
++      to->nested_ctl          = from->nested_ctl;
++      if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NPT))
++              to->nested_ctl &= ~SVM_NESTED_CTL_NP_ENABLE;
++
+       to->iopm_base_pa        = from->iopm_base_pa;
+       to->msrpm_base_pa       = from->msrpm_base_pa;
+       to->tsc_offset          = from->tsc_offset;
+@@ -475,7 +477,6 @@ void __nested_copy_vmcb_control_to_cache
+       to->exit_info_2         = from->exit_info_2;
+       to->exit_int_info       = from->exit_int_info;
+       to->exit_int_info_err   = from->exit_int_info_err;
+-      to->nested_ctl          = from->nested_ctl;
+       to->event_inj           = from->event_inj;
+       to->event_inj_err       = from->event_inj_err;
+       to->next_rip            = from->next_rip;
diff --git a/queue-7.0/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch b/queue-7.0/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch
new file mode 100644 (file)
index 0000000..896ae7d
--- /dev/null
@@ -0,0 +1,42 @@
+From 24f7d36b824b65cf1a2db3db478059187b2a37b0 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 24 Feb 2026 22:50:17 +0000
+Subject: KVM: nSVM: Ensure AVIC is inhibited when restoring a vCPU to guest mode
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 24f7d36b824b65cf1a2db3db478059187b2a37b0 upstream.
+
+On nested VMRUN, KVM ensures AVIC is inhibited by requesting
+KVM_REQ_APICV_UPDATE, triggering a check of inhibit reasons, finding
+APICV_INHIBIT_REASON_NESTED, and disabling AVIC.
+
+However, when KVM_SET_NESTED_STATE is performed on a vCPU not in guest
+mode with AVIC enabled, KVM_REQ_APICV_UPDATE is not requested, and AVIC
+is not inhibited.
+
+Request KVM_REQ_APICV_UPDATE in the KVM_SET_NESTED_STATE path if AVIC is
+active, similar to the nested VMRUN path.
+
+Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260224225017.3303870-1-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1939,6 +1939,9 @@ static int svm_set_nested_state(struct k
+       svm->nested.force_msr_bitmap_recalc = true;
++      if (kvm_vcpu_apicv_active(vcpu))
++              kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
++
+       kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+       ret = 0;
+ out_free:
diff --git a/queue-7.0/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch b/queue-7.0/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch
new file mode 100644 (file)
index 0000000..7bbc640
--- /dev/null
@@ -0,0 +1,42 @@
+From e63fb1379f4b9300a44739964e69549bebbcdca4 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Tue, 10 Feb 2026 01:08:06 +0000
+Subject: KVM: nSVM: Mark all of vmcb02 dirty when restoring nested state
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit e63fb1379f4b9300a44739964e69549bebbcdca4 upstream.
+
+When restoring a vCPU in guest mode, any state restored before
+KVM_SET_NESTED_STATE (e.g. KVM_SET_SREGS) will mark the corresponding
+dirty bits in vmcb01, as it is the active VMCB before switching to
+vmcb02 in svm_set_nested_state().
+
+Hence, mark all fields in vmcb02 dirty in svm_set_nested_state() to
+capture any previously restored fields.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260210010806.3204289-1-yosry.ahmed@linux.dev
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1918,6 +1918,12 @@ static int svm_set_nested_state(struct k
+       nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
+       /*
++       * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields
++       * dirty in vmcb01 instead of vmcb02, so mark all of vmcb02 dirty here.
++       */
++      vmcb_mark_all_dirty(svm->vmcb);
++
++      /*
+        * While the nested guest CR3 is already checked and set by
+        * KVM_SET_SREGS, it was set when nested state was yet loaded,
+        * thus MMU might not be initialized correctly.
diff --git a/queue-7.0/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch b/queue-7.0/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch
new file mode 100644 (file)
index 0000000..f3b6d40
--- /dev/null
@@ -0,0 +1,141 @@
+From c36991c6f8d2ab56ee67aff04e3c357f45cfc76c Mon Sep 17 00:00:00 2001
+From: Kevin Cheng <chengkev@google.com>
+Date: Tue, 3 Mar 2026 16:22:22 -0800
+Subject: KVM: nSVM: Raise #UD if unhandled VMMCALL isn't intercepted by L1
+
+From: Kevin Cheng <chengkev@google.com>
+
+commit c36991c6f8d2ab56ee67aff04e3c357f45cfc76c upstream.
+
+Explicitly synthesize a #UD for VMMCALL if L2 is active, L1 does NOT want
+to intercept VMMCALL, nested_svm_l2_tlb_flush_enabled() is true, and the
+hypercall is something other than one of the supported Hyper-V hypercalls.
+When all of the above conditions are met, KVM will intercept VMMCALL but
+never forward it to L1, i.e. will let L2 make hypercalls as if it were L1.
+
+The TLFS says a whole lot of nothing about this scenario, so go with the
+architectural behavior, which says that VMMCALL #UDs if it's not
+intercepted.
+
+Opportunistically do a 2-for-1 stub trade by stub-ifying the new API
+instead of the helpers it uses.  The last remaining "single" stub will
+soon be dropped as well.
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Fixes: 3f4a812edf5c ("KVM: nSVM: hyper-v: Enable L2 TLB flush")
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Kevin Cheng <chengkev@google.com>
+Link: https://patch.msgid.link/20260228033328.2285047-5-chengkev@google.com
+[sean: rewrite changelog and comment, tag for stable, remove defunct stubs]
+Reviewed-by: Yosry Ahmed <yosry@kernel.org>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://patch.msgid.link/20260304002223.1105129-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/hyperv.h     |    8 --------
+ arch/x86/kvm/svm/hyperv.h |   11 +++++++++++
+ arch/x86/kvm/svm/nested.c |    4 +---
+ arch/x86/kvm/svm/svm.c    |   19 ++++++++++++++++++-
+ 4 files changed, 30 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.h
++++ b/arch/x86/kvm/hyperv.h
+@@ -305,14 +305,6 @@ static inline bool kvm_hv_has_stimer_pen
+ {
+       return false;
+ }
+-static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+-{
+-      return false;
+-}
+-static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu)
+-{
+-      return false;
+-}
+ static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu)
+ {
+       return 0;
+--- a/arch/x86/kvm/svm/hyperv.h
++++ b/arch/x86/kvm/svm/hyperv.h
+@@ -41,6 +41,13 @@ static inline bool nested_svm_l2_tlb_flu
+       return hv_vcpu->vp_assist_page.nested_control.features.directhypercall;
+ }
++static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
++{
++      return guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
++             nested_svm_l2_tlb_flush_enabled(vcpu) &&
++             kvm_hv_is_tlb_flush_hcall(vcpu);
++}
++
+ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+ #else /* CONFIG_KVM_HYPERV */
+ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {}
+@@ -48,6 +55,10 @@ static inline bool nested_svm_l2_tlb_flu
+ {
+       return false;
+ }
++static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
++{
++      return false;
++}
+ static inline void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) {}
+ #endif /* CONFIG_KVM_HYPERV */
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1711,9 +1711,7 @@ int nested_svm_exit_special(struct vcpu_
+       }
+       case SVM_EXIT_VMMCALL:
+               /* Hyper-V L2 TLB flush hypercall is handled by L0 */
+-              if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
+-                  nested_svm_l2_tlb_flush_enabled(vcpu) &&
+-                  kvm_hv_is_tlb_flush_hcall(vcpu))
++              if (nested_svm_is_l2_tlb_flush_hcall(vcpu))
+                       return NESTED_EXIT_HOST;
+               break;
+       default:
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -52,6 +52,7 @@
+ #include "svm.h"
+ #include "svm_ops.h"
++#include "hyperv.h"
+ #include "kvm_onhyperv.h"
+ #include "svm_onhyperv.h"
+@@ -3249,6 +3250,22 @@ static int bus_lock_exit(struct kvm_vcpu
+       return 0;
+ }
++static int vmmcall_interception(struct kvm_vcpu *vcpu)
++{
++      /*
++       * Inject a #UD if L2 is active and the VMMCALL isn't a Hyper-V TLB
++       * hypercall, as VMMCALL #UDs if it's not intercepted, and this path is
++       * reachable if and only if L1 doesn't want to intercept VMMCALL or has
++       * enabled L0 (KVM) handling of Hyper-V L2 TLB flush hypercalls.
++       */
++      if (is_guest_mode(vcpu) && !nested_svm_is_l2_tlb_flush_hcall(vcpu)) {
++              kvm_queue_exception(vcpu, UD_VECTOR);
++              return 1;
++      }
++
++      return kvm_emulate_hypercall(vcpu);
++}
++
+ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
+       [SVM_EXIT_READ_CR0]                     = cr_interception,
+       [SVM_EXIT_READ_CR3]                     = cr_interception,
+@@ -3299,7 +3316,7 @@ static int (*const svm_exit_handlers[])(
+       [SVM_EXIT_TASK_SWITCH]                  = task_switch_interception,
+       [SVM_EXIT_SHUTDOWN]                     = shutdown_interception,
+       [SVM_EXIT_VMRUN]                        = vmrun_interception,
+-      [SVM_EXIT_VMMCALL]                      = kvm_emulate_hypercall,
++      [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
+       [SVM_EXIT_VMLOAD]                       = vmload_interception,
+       [SVM_EXIT_VMSAVE]                       = vmsave_interception,
+       [SVM_EXIT_STGI]                         = stgi_interception,
diff --git a/queue-7.0/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch b/queue-7.0/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch
new file mode 100644 (file)
index 0000000..c81f4d7
--- /dev/null
@@ -0,0 +1,58 @@
+From 290c8d82023ab0e1d2782d37136541e017174d7c Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:00 +0000
+Subject: KVM: nSVM: Refactor checking LBRV enablement in vmcb12 into a helper
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 290c8d82023ab0e1d2782d37136541e017174d7c upstream.
+
+Refactor the vCPU cap and vmcb12 flag checks into a helper. The
+unlikely() annotation is dropped, it's unlikely (huh) to make a
+difference and the CPU will probably predict it better on its own.
+
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-7-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -651,6 +651,12 @@ void nested_vmcb02_compute_g_pat(struct
+       svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
+ }
++static bool nested_vmcb12_has_lbrv(struct kvm_vcpu *vcpu)
++{
++      return guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
++              (to_svm(vcpu)->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
++}
++
+ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+ {
+       bool new_vmcb12 = false;
+@@ -715,8 +721,7 @@ static void nested_vmcb02_prepare_save(s
+               vmcb_mark_dirty(vmcb02, VMCB_DR);
+       }
+-      if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+-                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
++      if (nested_vmcb12_has_lbrv(vcpu)) {
+               /*
+                * Reserved bits of DEBUGCTL are ignored.  Be consistent with
+                * svm_set_msr's definition of reserved bits.
+@@ -1243,8 +1248,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       if (!nested_exit_on_intr(svm))
+               kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+-      if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+-                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
++      if (nested_vmcb12_has_lbrv(vcpu)) {
+               svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+       } else {
+               svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
diff --git a/queue-7.0/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch b/queue-7.0/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch
new file mode 100644 (file)
index 0000000..a2aa0e8
--- /dev/null
@@ -0,0 +1,142 @@
+From dcf3648ab71437b504abbfdc4e74622a0f1a56e3 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:01 +0000
+Subject: KVM: nSVM: Refactor writing vmcb12 on nested #VMEXIT as a helper
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit dcf3648ab71437b504abbfdc4e74622a0f1a56e3 upstream.
+
+Move mapping vmcb12 and updating it out of nested_svm_vmexit() into a
+helper, no functional change intended.
+
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-8-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   77 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 44 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1134,36 +1134,20 @@ void svm_copy_vmloadsave_state(struct vm
+       to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
+ }
+-int nested_svm_vmexit(struct vcpu_svm *svm)
++static int nested_svm_vmexit_update_vmcb12(struct kvm_vcpu *vcpu)
+ {
+-      struct kvm_vcpu *vcpu = &svm->vcpu;
+-      struct vmcb *vmcb01 = svm->vmcb01.ptr;
++      struct vcpu_svm *svm = to_svm(vcpu);
+       struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+-      struct vmcb *vmcb12;
+       struct kvm_host_map map;
++      struct vmcb *vmcb12;
+       int rc;
+       rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
+-      if (rc) {
+-              if (rc == -EINVAL)
+-                      kvm_inject_gp(vcpu, 0);
+-              return 1;
+-      }
++      if (rc)
++              return rc;
+       vmcb12 = map.hva;
+-      /* Exit Guest-Mode */
+-      leave_guest_mode(vcpu);
+-      svm->nested.vmcb12_gpa = 0;
+-      WARN_ON_ONCE(svm->nested.nested_run_pending);
+-
+-      kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+-
+-      /* in case we halted in L2 */
+-      kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
+-
+-      /* Give the current vmcb to the guest */
+-
+       vmcb12->save.es     = vmcb02->save.es;
+       vmcb12->save.cs     = vmcb02->save.cs;
+       vmcb12->save.ss     = vmcb02->save.ss;
+@@ -1200,10 +1184,48 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+               vmcb12->control.next_rip  = vmcb02->control.next_rip;
++      if (nested_vmcb12_has_lbrv(vcpu))
++              svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
++
+       vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
+       vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
+       vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
++      trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
++                                     vmcb12->control.exit_info_1,
++                                     vmcb12->control.exit_info_2,
++                                     vmcb12->control.exit_int_info,
++                                     vmcb12->control.exit_int_info_err,
++                                     KVM_ISA_SVM);
++
++      kvm_vcpu_unmap(vcpu, &map);
++      return 0;
++}
++
++int nested_svm_vmexit(struct vcpu_svm *svm)
++{
++      struct kvm_vcpu *vcpu = &svm->vcpu;
++      struct vmcb *vmcb01 = svm->vmcb01.ptr;
++      struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
++      int rc;
++
++      rc = nested_svm_vmexit_update_vmcb12(vcpu);
++      if (rc) {
++              if (rc == -EINVAL)
++                      kvm_inject_gp(vcpu, 0);
++              return 1;
++      }
++
++      /* Exit Guest-Mode */
++      leave_guest_mode(vcpu);
++      svm->nested.vmcb12_gpa = 0;
++      WARN_ON_ONCE(svm->nested.nested_run_pending);
++
++      kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
++
++      /* in case we halted in L2 */
++      kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
++
+       if (!kvm_pause_in_guest(vcpu->kvm)) {
+               vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
+               vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
+@@ -1248,9 +1270,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       if (!nested_exit_on_intr(svm))
+               kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+-      if (nested_vmcb12_has_lbrv(vcpu)) {
+-              svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+-      } else {
++      if (!nested_vmcb12_has_lbrv(vcpu)) {
+               svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+               vmcb_mark_dirty(vmcb01, VMCB_LBR);
+       }
+@@ -1306,15 +1326,6 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       svm->vcpu.arch.dr7 = DR7_FIXED_1;
+       kvm_update_dr7(&svm->vcpu);
+-      trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
+-                                     vmcb12->control.exit_info_1,
+-                                     vmcb12->control.exit_info_2,
+-                                     vmcb12->control.exit_int_info,
+-                                     vmcb12->control.exit_int_info_err,
+-                                     KVM_ISA_SVM);
+-
+-      kvm_vcpu_unmap(vcpu, &map);
+-
+       nested_svm_transition_tlb_flush(vcpu);
+       nested_svm_uninit_mmu_context(vcpu);
diff --git a/queue-7.0/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch b/queue-7.0/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch
new file mode 100644 (file)
index 0000000..e436ccd
--- /dev/null
@@ -0,0 +1,52 @@
+From 03bee264f8ebfd39e0254c98e112d033a7aa9055 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:44 +0000
+Subject: KVM: nSVM: Sync interrupt shadow to cached vmcb12 after VMRUN of L2
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 03bee264f8ebfd39e0254c98e112d033a7aa9055 upstream.
+
+After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs
+fields written by the CPU from vmcb02 to the cached vmcb12. This is
+because the cached vmcb12 is used as the authoritative copy of some of
+the controls, and is the payload when saving/restoring nested state.
+
+int_state is also written by the CPU, specifically bit 0 (i.e.
+SVM_INTERRUPT_SHADOW_MASK) for nested VMs, but it is not sync'd to
+cached vmcb12. This does not cause a problem if KVM_SET_NESTED_STATE
+preceeds KVM_SET_VCPU_EVENTS in the restore path, as an interrupt shadow
+would be correctly restored to vmcb02 (KVM_SET_VCPU_EVENTS overwrites
+what KVM_SET_NESTED_STATE restored in int_state).
+
+However, if KVM_SET_VCPU_EVENTS preceeds KVM_SET_NESTED_STATE, an
+interrupt shadow would be restored into vmcb01 instead of vmcb02. This
+would mostly be benign for L1 (delays an interrupt), but not for L2. For
+L2, the vCPU could hang (e.g. if a wakeup interrupt is delivered before
+a HLT that should have been in an interrupt shadow).
+
+Sync int_state to the cached vmcb12 in nested_sync_control_from_vmcb02()
+to avoid this problem. With that, KVM_SET_NESTED_STATE restores the
+correct interrupt shadow state, and if KVM_SET_VCPU_EVENTS follows it
+would overwrite it with the same value.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-3-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -530,6 +530,7 @@ void nested_sync_control_from_vmcb02(str
+       u32 mask;
+       svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
+       svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
++      svm->nested.ctl.int_state       = svm->vmcb->control.int_state;
+       /* Only a few fields of int_ctl are written by the processor.  */
+       mask = V_IRQ_MASK | V_TPR_MASK;
diff --git a/queue-7.0/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch b/queue-7.0/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch
new file mode 100644 (file)
index 0000000..f8edd13
--- /dev/null
@@ -0,0 +1,55 @@
+From 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:43 +0000
+Subject: KVM: nSVM: Sync NextRIP to cached vmcb12 after VMRUN of L2
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 upstream.
+
+After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs
+fields written by the CPU from vmcb02 to the cached vmcb12. This is
+because the cached vmcb12 is used as the authoritative copy of some of
+the controls, and is the payload when saving/restoring nested state.
+
+NextRIP is also written by the CPU (in some cases) after VMRUN, but is
+not sync'd to the cached vmcb12. As a result, it is corrupted after
+save/restore (replaced by the original value written by L1 on nested
+VMRUN). This could cause problems for both KVM (e.g. when injecting a
+soft IRQ) or L1 (e.g. when using NextRIP to advance RIP after emulating
+an instruction).
+
+Fix this by sync'ing NextRIP to the cache after VMRUN of L2, but only
+after completing interrupts (not in nested_sync_control_from_vmcb02()),
+as KVM may update NextRIP (e.g. when re-injecting a soft IRQ).
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-2-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4436,6 +4436,16 @@ static __no_kcsan fastpath_t svm_vcpu_ru
+       svm_complete_interrupts(vcpu);
++      /*
++       * Update the cache after completing interrupts to get an accurate
++       * NextRIP, e.g. when re-injecting a soft interrupt.
++       *
++       * FIXME: Rework svm_get_nested_state() to not pull data from the
++       *        cache (except for maybe int_ctl).
++       */
++      if (is_guest_mode(vcpu))
++              svm->nested.ctl.next_rip = svm->vmcb->control.next_rip;
++
+       return svm_exit_handlers_fastpath(vcpu);
+ }
diff --git a/queue-7.0/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch
new file mode 100644 (file)
index 0000000..7b399b6
--- /dev/null
@@ -0,0 +1,55 @@
+From 1b30e7551767cb95b3e49bb169c72bbd76b56e05 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:02 +0000
+Subject: KVM: nSVM: Triple fault if mapping VMCB12 fails on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 1b30e7551767cb95b3e49bb169c72bbd76b56e05 upstream.
+
+KVM currently injects a #GP and hopes for the best if mapping VMCB12
+fails on nested #VMEXIT, and only if the failure mode is -EINVAL.
+Mapping the VMCB12 could also fail if creating host mappings fails.
+
+After the #GP is injected, nested_svm_vmexit() bails early, without
+cleaning up (e.g. KVM_REQ_GET_NESTED_STATE_PAGES is set, is_guest_mode()
+is true, etc).
+
+Instead of optionally injecting a #GP, triple fault the guest if mapping
+VMCB12 fails since KVM cannot make a sane recovery. The APM states that
+a #VMEXIT will triple fault if host state is illegal or an exception
+occurs while loading host state, so the behavior is not entirely made
+up.
+
+Do not return early from nested_svm_vmexit(), continue cleaning up the
+vCPU state (e.g. switch back to vmcb01), to handle the failure as
+gracefully as possible.
+
+Fixes: cf74a78b229d ("KVM: SVM: Add VMEXIT handler and intercepts")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-9-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1208,12 +1208,8 @@ void nested_svm_vmexit(struct vcpu_svm *
+       struct vmcb *vmcb01 = svm->vmcb01.ptr;
+       struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+-      rc = nested_svm_vmexit_update_vmcb12(vcpu);
+-      if (rc) {
+-              if (rc == -EINVAL)
+-                      kvm_inject_gp(vcpu, 0);
+-              return 1;
+-      }
++      if (nested_svm_vmexit_update_vmcb12(vcpu))
++              kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+       /* Exit Guest-Mode */
+       leave_guest_mode(vcpu);
diff --git a/queue-7.0/kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch
new file mode 100644 (file)
index 0000000..49beb53
--- /dev/null
@@ -0,0 +1,137 @@
+From 5d291ef0585ed880ed4dd71ea1a5965e0a65fb53 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:03 +0000
+Subject: KVM: nSVM: Triple fault if restore host CR3 fails on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 5d291ef0585ed880ed4dd71ea1a5965e0a65fb53 upstream.
+
+If loading L1's CR3 fails on a nested #VMEXIT, nested_svm_vmexit()
+returns an error code that is ignored by most callers, and continues to
+run L1 with corrupted state. A sane recovery is not possible in this
+case, and HW behavior is to cause a shutdown. Inject a triple fault
+instead, and do not return early from nested_svm_vmexit(). Continue
+cleaning up the vCPU state (e.g. clear pending exceptions), to handle
+the failure as gracefully as possible.
+
+From the APM:
+
+  Upon #VMEXIT, the processor performs the following actions in order to
+  return to the host execution context:
+
+  ...
+
+  if (illegal host state loaded, or exception while loading host state)
+      shutdown
+  else
+      execute first host instruction following the VMRUN
+
+Remove the return value of nested_svm_vmexit(), which is mostly
+unchecked anyway.
+
+Fixes: d82aaef9c88a ("KVM: nSVM: use nested_svm_load_cr3() on guest->host switch")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-10-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   10 +++-------
+ arch/x86/kvm/svm/svm.c    |   11 ++---------
+ arch/x86/kvm/svm/svm.h    |    6 +++---
+ 3 files changed, 8 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1202,12 +1202,11 @@ static int nested_svm_vmexit_update_vmcb
+       return 0;
+ }
+-int nested_svm_vmexit(struct vcpu_svm *svm)
++void nested_svm_vmexit(struct vcpu_svm *svm)
+ {
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       struct vmcb *vmcb01 = svm->vmcb01.ptr;
+       struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+-      int rc;
+       rc = nested_svm_vmexit_update_vmcb12(vcpu);
+       if (rc) {
+@@ -1330,9 +1329,8 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       nested_svm_uninit_mmu_context(vcpu);
+-      rc = nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true);
+-      if (rc)
+-              return 1;
++      if (nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true))
++              kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+       /*
+        * Drop what we picked up for L2 via svm_complete_interrupts() so it
+@@ -1357,8 +1355,6 @@ int nested_svm_vmexit(struct vcpu_svm *s
+        */
+       if (kvm_apicv_activated(vcpu->kvm))
+               __kvm_vcpu_update_apicv(vcpu);
+-
+-      return 0;
+ }
+ static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2233,13 +2233,9 @@ static int emulate_svm_instr(struct kvm_
+               [SVM_INSTR_VMSAVE] = vmsave_interception,
+       };
+       struct vcpu_svm *svm = to_svm(vcpu);
+-      int ret;
+       if (is_guest_mode(vcpu)) {
+-              /* Returns '1' or -errno on failure, '0' on success. */
+-              ret = nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]);
+-              if (ret)
+-                      return ret;
++              nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]);
+               return 1;
+       }
+       return svm_instr_handlers[opcode](vcpu);
+@@ -4872,7 +4868,6 @@ static int svm_enter_smm(struct kvm_vcpu
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_host_map map_save;
+-      int ret;
+       if (!is_guest_mode(vcpu))
+               return 0;
+@@ -4892,9 +4887,7 @@ static int svm_enter_smm(struct kvm_vcpu
+       svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
+       svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
+-      ret = nested_svm_simple_vmexit(svm, SVM_EXIT_SW);
+-      if (ret)
+-              return ret;
++      nested_svm_simple_vmexit(svm, SVM_EXIT_SW);
+       /*
+        * KVM uses VMCB01 to store L1 host state while L2 runs but
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -793,14 +793,14 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+ void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+                         struct vmcb_save_area *from_save);
+ void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+-int nested_svm_vmexit(struct vcpu_svm *svm);
++void nested_svm_vmexit(struct vcpu_svm *svm);
+-static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
++static inline void nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
+ {
+       svm->vmcb->control.exit_code    = exit_code;
+       svm->vmcb->control.exit_info_1  = 0;
+       svm->vmcb->control.exit_info_2  = 0;
+-      return nested_svm_vmexit(svm);
++      nested_svm_vmexit(svm);
+ }
+ int nested_svm_exit_handled(struct vcpu_svm *svm);
diff --git a/queue-7.0/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch
new file mode 100644 (file)
index 0000000..389ee3f
--- /dev/null
@@ -0,0 +1,73 @@
+From 5c247d08bc81bbad4c662dcf5654137a2f8483ec Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Tue, 3 Feb 2026 20:10:10 +0000
+Subject: KVM: nSVM: Use vcpu->arch.cr2 when updating vmcb12 on nested #VMEXIT
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit 5c247d08bc81bbad4c662dcf5654137a2f8483ec upstream.
+
+KVM currently uses the value of CR2 from vmcb02 to update vmcb12 on
+nested #VMEXIT. This value is incorrect in some cases, causing L1 to run
+L2 with a corrupted CR2. This could lead to segfaults or data corruption
+if L2 is in the middle of handling a #PF and reads a corrupted CR2. Use
+the correct value in vcpu->arch.cr2 instead.
+
+The value in vcpu->arch.cr2 is sync'd to vmcb02 shortly before a VMRUN
+of L2, and sync'd back to vcpu->arch.cr2 shortly after. The value are
+only out-of-sync in two cases: after save+restore, and after a #PF is
+injected into L2. In either case, if a #VMEXIT to L1 is synthesized
+before L2 runs, using the value in vmcb02 would be incorrect.
+
+After save+restore, the value of CR2 is restored by KVM_SET_SREGS into
+vcpu->arch.cr2. It is not reflect in vmcb02 until a VMRUN of L2. Before
+that, it holds whatever was in vmcb02 before restore, which would be
+zero on a new vCPU that never ran nested. If a #VMEXIT to L1 is
+synthesized before L2 ever runs, using vcpu->arch.cr2 to update vmcb12
+is the right thing to do.
+
+The #PF injection case is more nuanced.  Although the APM is a bit
+unclear about when CR2 is written during a #PF, the SDM is more clear:
+
+       Processors update CR2 whenever a page fault is detected. If a
+       second page fault occurs while an earlier page fault is being
+       delivered, the faulting linear address of the second fault will
+       overwrite the contents of CR2 (replacing the previous address).
+       These updates to CR2 occur even if the page fault results in a
+       double fault or occurs during the delivery of a double fault.
+
+KVM injecting the exception surely counts as the #PF being "detected".
+More importantly, when an exception is injected into L2 at the time of a
+synthesized #VMEXIT, KVM updates exit_int_info in vmcb12 accordingly,
+such that an L1 hypervisor can re-inject the exception. If CR2 is not
+written at that point, the L1 hypervisor have no way of correctly
+re-injecting the #PF. Hence, if a #VMEXIT to L1 is synthesized after
+the #PF is injected into L2 but before it actually runs, using
+vcpu->arch.cr2 to update vmcb12 is also the right thing to do.
+
+Note that KVM does _not_ update vcpu->arch.cr2 when a #PF is pending for
+L2, only when it is injected. The distinction is important, because only
+injected (but not intercepted) exceptions are propagated to L1 through
+exit_int_info. It would be incorrect to update CR2 in vmcb12 for a
+pending #PF, as L1 would perceive an updated CR2 value with no #PF.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260203201010.1871056-1-yosry.ahmed@linux.dev
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1166,7 +1166,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       vmcb12->save.efer   = svm->vcpu.arch.efer;
+       vmcb12->save.cr0    = kvm_read_cr0(vcpu);
+       vmcb12->save.cr3    = kvm_read_cr3(vcpu);
+-      vmcb12->save.cr2    = vmcb02->save.cr2;
++      vmcb12->save.cr2    = vcpu->arch.cr2;
+       vmcb12->save.cr4    = svm->vcpu.arch.cr4;
+       vmcb12->save.rflags = kvm_get_rflags(vcpu);
+       vmcb12->save.rip    = kvm_rip_read(vcpu);
diff --git a/queue-7.0/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch b/queue-7.0/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch
new file mode 100644 (file)
index 0000000..05666e6
--- /dev/null
@@ -0,0 +1,133 @@
+From 3700f0788da6acf73b2df56690f4b201aa4aefd2 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:57 +0000
+Subject: KVM: SVM: Add missing save/restore handling of LBR MSRs
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 3700f0788da6acf73b2df56690f4b201aa4aefd2 upstream.
+
+MSR_IA32_DEBUGCTLMSR and LBR MSRs are currently not enumerated by
+KVM_GET_MSR_INDEX_LIST, and LBR MSRs cannot be set with KVM_SET_MSRS. So
+save/restore is completely broken.
+
+Fix it by adding the MSRs to msrs_to_save_base, and allowing writes to
+LBR MSRs from userspace only (as they are read-only MSRs) if LBR
+virtualization is enabled.  Additionally, to correctly restore L1's LBRs
+while L2 is running, make sure the LBRs are copied from the captured
+VMCB01 save area in svm_copy_vmrun_state().
+
+Note, for VMX, this also fixes a flaw where MSR_IA32_DEBUGCTLMSR isn't
+reported as an MSR to save/restore.
+
+Note #2, over-reporting MSR_IA32_LASTxxx on Intel is ok, as KVM already
+handles unsupported reads and writes thanks to commit b5e2fec0ebc3 ("KVM:
+Ignore DEBUGCTL MSRs with no effect") (kvm_do_msr_access() will morph the
+unsupported userspace write into a nop).
+
+Fixes: 24e09cbf480a ("KVM: SVM: enable LBR virtualization")
+Cc: stable@vger.kernel.org
+Reported-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-4-yosry@kernel.org
+[sean: guard with lbrv checks, massage changelog]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    5 +++++
+ arch/x86/kvm/svm/svm.c    |   42 +++++++++++++++++++++++++++++++++++++-----
+ arch/x86/kvm/x86.c        |    3 +++
+ 3 files changed, 45 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1109,6 +1109,11 @@ void svm_copy_vmrun_state(struct vmcb_sa
+               to_save->isst_addr = from_save->isst_addr;
+               to_save->ssp = from_save->ssp;
+       }
++
++      if (kvm_cpu_cap_has(X86_FEATURE_LBRV)) {
++              svm_copy_lbrs(to_save, from_save);
++              to_save->dbgctl &= ~DEBUGCTL_RESERVED_BITS;
++      }
+ }
+ void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2788,19 +2788,19 @@ static int svm_get_msr(struct kvm_vcpu *
+               msr_info->data = svm->tsc_aux;
+               break;
+       case MSR_IA32_DEBUGCTLMSR:
+-              msr_info->data = svm->vmcb->save.dbgctl;
++              msr_info->data = lbrv ? svm->vmcb->save.dbgctl : 0;
+               break;
+       case MSR_IA32_LASTBRANCHFROMIP:
+-              msr_info->data = svm->vmcb->save.br_from;
++              msr_info->data = lbrv ? svm->vmcb->save.br_from : 0;
+               break;
+       case MSR_IA32_LASTBRANCHTOIP:
+-              msr_info->data = svm->vmcb->save.br_to;
++              msr_info->data = lbrv ? svm->vmcb->save.br_to : 0;
+               break;
+       case MSR_IA32_LASTINTFROMIP:
+-              msr_info->data = svm->vmcb->save.last_excp_from;
++              msr_info->data = lbrv ? svm->vmcb->save.last_excp_from : 0;
+               break;
+       case MSR_IA32_LASTINTTOIP:
+-              msr_info->data = svm->vmcb->save.last_excp_to;
++              msr_info->data = lbrv ? svm->vmcb->save.last_excp_to : 0;
+               break;
+       case MSR_VM_HSAVE_PA:
+               msr_info->data = svm->nested.hsave_msr;
+@@ -3075,6 +3075,38 @@ static int svm_set_msr(struct kvm_vcpu *
+               vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
+               svm_update_lbrv(vcpu);
+               break;
++      case MSR_IA32_LASTBRANCHFROMIP:
++              if (!lbrv)
++                      return KVM_MSR_RET_UNSUPPORTED;
++              if (!msr->host_initiated)
++                      return 1;
++              svm->vmcb->save.br_from = data;
++              vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++              break;
++      case MSR_IA32_LASTBRANCHTOIP:
++              if (!lbrv)
++                      return KVM_MSR_RET_UNSUPPORTED;
++              if (!msr->host_initiated)
++                      return 1;
++              svm->vmcb->save.br_to = data;
++              vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++              break;
++      case MSR_IA32_LASTINTFROMIP:
++              if (!lbrv)
++                      return KVM_MSR_RET_UNSUPPORTED;
++              if (!msr->host_initiated)
++                      return 1;
++              svm->vmcb->save.last_excp_from = data;
++              vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++              break;
++      case MSR_IA32_LASTINTTOIP:
++              if (!lbrv)
++                      return KVM_MSR_RET_UNSUPPORTED;
++              if (!msr->host_initiated)
++                      return 1;
++              svm->vmcb->save.last_excp_to = data;
++              vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++              break;
+       case MSR_VM_HSAVE_PA:
+               /*
+                * Old kernels did not validate the value written to
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -351,6 +351,9 @@ static const u32 msrs_to_save_base[] = {
+       MSR_IA32_U_CET, MSR_IA32_S_CET,
+       MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
+       MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
++      MSR_IA32_DEBUGCTLMSR,
++      MSR_IA32_LASTBRANCHFROMIP, MSR_IA32_LASTBRANCHTOIP,
++      MSR_IA32_LASTINTFROMIP, MSR_IA32_LASTINTTOIP,
+ };
+ static const u32 msrs_to_save_pmu[] = {
diff --git a/queue-7.0/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch b/queue-7.0/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch
new file mode 100644 (file)
index 0000000..984bf5c
--- /dev/null
@@ -0,0 +1,42 @@
+From d5bde6113aed8315a2bfe708730b721be9c2f48b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 18 Feb 2026 15:09:51 -0800
+Subject: KVM: SVM: Explicitly mark vmcb01 dirty after modifying VMCB intercepts
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d5bde6113aed8315a2bfe708730b721be9c2f48b upstream.
+
+When reacting to an intercept update, explicitly mark vmcb01's intercepts
+dirty, as KVM always initially operates on vmcb01, and nested_svm_vmexit()
+isn't guaranteed to mark VMCB_INTERCEPTS as dirty.  I.e. if L2 is active,
+KVM will modify the intercepts for L1, but might not mark them as dirty
+before the next VMRUN of L1.
+
+Fixes: 116a0a23676e ("KVM: SVM: Add clean-bit for intercetps, tsc-offset and pause filter count")
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260218230958.2877682-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -128,11 +128,13 @@ void recalc_intercepts(struct vcpu_svm *
+       struct vmcb_ctrl_area_cached *g;
+       unsigned int i;
+-      vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++      vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_INTERCEPTS);
+       if (!is_guest_mode(&svm->vcpu))
+               return;
++      vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++
+       c = &svm->vmcb->control;
+       h = &svm->vmcb01.ptr->control;
+       g = &svm->nested.ctl;
diff --git a/queue-7.0/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch b/queue-7.0/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch
new file mode 100644 (file)
index 0000000..0a8c0d7
--- /dev/null
@@ -0,0 +1,36 @@
+From d99df02ff427f461102230f9c5b90a6c64ee8e23 Mon Sep 17 00:00:00 2001
+From: Kevin Cheng <chengkev@google.com>
+Date: Sat, 28 Feb 2026 03:33:26 +0000
+Subject: KVM: SVM: Inject #UD for INVLPGA if EFER.SVME=0
+
+From: Kevin Cheng <chengkev@google.com>
+
+commit d99df02ff427f461102230f9c5b90a6c64ee8e23 upstream.
+
+INVLPGA should cause a #UD when EFER.SVME is not set. Add a check to
+properly inject #UD when EFER.SVME=0.
+
+Fixes: ff092385e828 ("KVM: SVM: Implement INVLPGA")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kevin Cheng <chengkev@google.com>
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260228033328.2285047-3-chengkev@google.com
+[sean: tag for stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2366,6 +2366,9 @@ static int invlpga_interception(struct k
+       gva_t gva = kvm_rax_read(vcpu);
+       u32 asid = kvm_rcx_read(vcpu);
++      if (nested_svm_check_permissions(vcpu))
++              return 1;
++
+       /* FIXME: Handle an address size prefix. */
+       if (!is_long_mode(vcpu))
+               gva = (u32)gva;
diff --git a/queue-7.0/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch b/queue-7.0/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch
new file mode 100644 (file)
index 0000000..723873f
--- /dev/null
@@ -0,0 +1,94 @@
+From 361dbe8173c460a2bf8aee23920f6c2dbdcabb94 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:56 +0000
+Subject: KVM: SVM: Switch svm_copy_lbrs() to a macro
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 361dbe8173c460a2bf8aee23920f6c2dbdcabb94 upstream.
+
+In preparation for using svm_copy_lbrs() with 'struct vmcb_save_area'
+without a containing 'struct vmcb', and later even 'struct
+vmcb_save_area_cached', make it a macro.
+
+Macros are generally not preferred compared to functions, mainly due to
+type-safety. However, in this case it seems like having a simple macro
+copying a few fields is better than copy-pasting the same 5 lines of
+code in different places.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-3-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    8 ++++----
+ arch/x86/kvm/svm/svm.c    |    9 ---------
+ arch/x86/kvm/svm/svm.h    |   10 +++++++++-
+ 3 files changed, 13 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -721,10 +721,10 @@ static void nested_vmcb02_prepare_save(s
+                * Reserved bits of DEBUGCTL are ignored.  Be consistent with
+                * svm_set_msr's definition of reserved bits.
+                */
+-              svm_copy_lbrs(vmcb02, vmcb12);
++              svm_copy_lbrs(&vmcb02->save, &vmcb12->save);
+               vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
+       } else {
+-              svm_copy_lbrs(vmcb02, vmcb01);
++              svm_copy_lbrs(&vmcb02->save, &vmcb01->save);
+       }
+       vmcb_mark_dirty(vmcb02, VMCB_LBR);
+       svm_update_lbrv(&svm->vcpu);
+@@ -1243,9 +1243,9 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+                    (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+-              svm_copy_lbrs(vmcb12, vmcb02);
++              svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+       } else {
+-              svm_copy_lbrs(vmcb01, vmcb02);
++              svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+               vmcb_mark_dirty(vmcb01, VMCB_LBR);
+       }
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -841,15 +841,6 @@ static void svm_recalc_msr_intercepts(st
+        */
+ }
+-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
+-{
+-      to_vmcb->save.dbgctl            = from_vmcb->save.dbgctl;
+-      to_vmcb->save.br_from           = from_vmcb->save.br_from;
+-      to_vmcb->save.br_to             = from_vmcb->save.br_to;
+-      to_vmcb->save.last_excp_from    = from_vmcb->save.last_excp_from;
+-      to_vmcb->save.last_excp_to      = from_vmcb->save.last_excp_to;
+-}
+-
+ static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
+ {
+       to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -713,8 +713,16 @@ static inline void *svm_vcpu_alloc_msrpm
+       return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT);
+ }
++#define svm_copy_lbrs(to, from)                                       \
++do {                                                          \
++      (to)->dbgctl            = (from)->dbgctl;               \
++      (to)->br_from           = (from)->br_from;              \
++      (to)->br_to             = (from)->br_to;                \
++      (to)->last_excp_from    = (from)->last_excp_from;       \
++      (to)->last_excp_to      = (from)->last_excp_to;         \
++} while (0)
++
+ void svm_vcpu_free_msrpm(void *msrpm);
+-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+ void svm_enable_lbrv(struct kvm_vcpu *vcpu);
+ void svm_update_lbrv(struct kvm_vcpu *vcpu);
diff --git a/queue-7.0/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch b/queue-7.0/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch
new file mode 100644 (file)
index 0000000..8a4c784
--- /dev/null
@@ -0,0 +1,175 @@
+From d0ad1b05bbe6f8da159a4dfb6692b3b7ce30ccc8 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 17 Feb 2026 16:54:38 -0800
+Subject: KVM: x86: Defer non-architectural deliver of exception payload to userspace read
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d0ad1b05bbe6f8da159a4dfb6692b3b7ce30ccc8 upstream.
+
+When attempting to play nice with userspace that hasn't enabled
+KVM_CAP_EXCEPTION_PAYLOAD, defer KVM's non-architectural delivery of the
+payload until userspace actually reads relevant vCPU state, and more
+importantly, force delivery of the payload in *all* paths where userspace
+saves relevant vCPU state, not just KVM_GET_VCPU_EVENTS.
+
+Ignoring userspace save/restore for the moment, delivering the payload
+before the exception is injected is wrong regardless of whether L1 or L2
+is running.  To make matters even more confusing, the flaw *currently*
+being papered over by the !is_guest_mode() check isn't even the same bug
+that commit da998b46d244 ("kvm: x86: Defer setting of CR2 until #PF
+delivery") was trying to avoid.
+
+At the time of commit da998b46d244, KVM didn't correctly handle exception
+intercepts, as KVM would wait until VM-Entry into L2 was imminent to check
+if the queued exception should morph to a nested VM-Exit.  I.e. KVM would
+deliver the payload to L2 and then synthesize a VM-Exit into L1.  But the
+payload was only the most blatant issue, e.g. waiting to check exception
+intercepts would also lead to KVM incorrectly escalating a
+should-be-intercepted #PF into a #DF.
+
+That underlying bug was eventually fixed by commit 7709aba8f716 ("KVM: x86:
+Morph pending exceptions to pending VM-Exits at queue time"), but in the
+interim, commit a06230b62b89 ("KVM: x86: Deliver exception payload on
+KVM_GET_VCPU_EVENTS") came along and subtly added another dependency on
+the !is_guest_mode() check.
+
+While not recorded in the changelog, the motivation for deferring the
+!exception_payload_enabled delivery was to fix a flaw where a synthesized
+MTF (Monitor Trap Flag) VM-Exit would drop a pending #DB and clobber DR6.
+On a VM-Exit, VMX CPUs save pending #DB information into the VMCS, which
+is emulated by KVM in nested_vmx_update_pending_dbg() by grabbing the
+payload from the queue/pending exception.  I.e. prematurely delivering the
+payload would cause the pending #DB to not be recorded in the VMCS, and of
+course, clobber L2's DR6 as seen by L1.
+
+Jumping back to save+restore, the quirked behavior of forcing delivery of
+the payload only works if userspace does KVM_GET_VCPU_EVENTS *before*
+CR2 or DR6 is saved, i.e. before KVM_GET_SREGS{,2} and KVM_GET_DEBUGREGS.
+E.g. if userspace does KVM_GET_SREGS before KVM_GET_VCPU_EVENTS, then the
+CR2 saved by userspace won't contain the payload for the exception save by
+KVM_GET_VCPU_EVENTS.
+
+Deliberately deliver the payload in the store_regs() path, as it's the
+least awful option even though userspace may not be doing save+restore.
+Because if userspace _is_ doing save restore, it could elide KVM_GET_SREGS
+knowing that SREGS were already saved when the vCPU exited.
+
+Link: https://lore.kernel.org/all/20200207103608.110305-1-oupton@google.com
+Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Tested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260218005438.2619063-1-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   62 +++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 39 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -864,9 +864,6 @@ static void kvm_multiple_exception(struc
+               vcpu->arch.exception.error_code = error_code;
+               vcpu->arch.exception.has_payload = has_payload;
+               vcpu->arch.exception.payload = payload;
+-              if (!is_guest_mode(vcpu))
+-                      kvm_deliver_exception_payload(vcpu,
+-                                                    &vcpu->arch.exception);
+               return;
+       }
+@@ -5531,18 +5528,8 @@ static int kvm_vcpu_ioctl_x86_set_mce(st
+       return 0;
+ }
+-static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
+-                                             struct kvm_vcpu_events *events)
++static struct kvm_queued_exception *kvm_get_exception_to_save(struct kvm_vcpu *vcpu)
+ {
+-      struct kvm_queued_exception *ex;
+-
+-      process_nmi(vcpu);
+-
+-#ifdef CONFIG_KVM_SMM
+-      if (kvm_check_request(KVM_REQ_SMI, vcpu))
+-              process_smi(vcpu);
+-#endif
+-
+       /*
+        * KVM's ABI only allows for one exception to be migrated.  Luckily,
+        * the only time there can be two queued exceptions is if there's a
+@@ -5553,21 +5540,46 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_
+       if (vcpu->arch.exception_vmexit.pending &&
+           !vcpu->arch.exception.pending &&
+           !vcpu->arch.exception.injected)
+-              ex = &vcpu->arch.exception_vmexit;
+-      else
+-              ex = &vcpu->arch.exception;
++              return &vcpu->arch.exception_vmexit;
++
++      return &vcpu->arch.exception;
++}
++
++static void kvm_handle_exception_payload_quirk(struct kvm_vcpu *vcpu)
++{
++      struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
+       /*
+-       * In guest mode, payload delivery should be deferred if the exception
+-       * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1
+-       * intercepts #PF, ditto for DR6 and #DBs.  If the per-VM capability,
+-       * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not
+-       * propagate the payload and so it cannot be safely deferred.  Deliver
+-       * the payload if the capability hasn't been requested.
++       * If KVM_CAP_EXCEPTION_PAYLOAD is disabled, then (prematurely) deliver
++       * the pending exception payload when userspace saves *any* vCPU state
++       * that interacts with exception payloads to avoid breaking userspace.
++       *
++       * Architecturally, KVM must not deliver an exception payload until the
++       * exception is actually injected, e.g. to avoid losing pending #DB
++       * information (which VMX tracks in the VMCS), and to avoid clobbering
++       * state if the exception is never injected for whatever reason.  But
++       * if KVM_CAP_EXCEPTION_PAYLOAD isn't enabled, then userspace may or
++       * may not propagate the payload across save+restore, and so KVM can't
++       * safely defer delivery of the payload.
+        */
+       if (!vcpu->kvm->arch.exception_payload_enabled &&
+           ex->pending && ex->has_payload)
+               kvm_deliver_exception_payload(vcpu, ex);
++}
++
++static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
++                                             struct kvm_vcpu_events *events)
++{
++      struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
++
++      process_nmi(vcpu);
++
++#ifdef CONFIG_KVM_SMM
++      if (kvm_check_request(KVM_REQ_SMI, vcpu))
++              process_smi(vcpu);
++#endif
++
++      kvm_handle_exception_payload_quirk(vcpu);
+       memset(events, 0, sizeof(*events));
+@@ -5746,6 +5758,8 @@ static int kvm_vcpu_ioctl_x86_get_debugr
+           vcpu->arch.guest_state_protected)
+               return -EINVAL;
++      kvm_handle_exception_payload_quirk(vcpu);
++
+       memset(dbgregs, 0, sizeof(*dbgregs));
+       BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
+@@ -12148,6 +12162,8 @@ static void __get_sregs_common(struct kv
+       if (vcpu->arch.guest_state_protected)
+               goto skip_protected_regs;
++      kvm_handle_exception_payload_quirk(vcpu);
++
+       kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+       kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+       kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
diff --git a/queue-7.0/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch b/queue-7.0/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch
new file mode 100644 (file)
index 0000000..870ca11
--- /dev/null
@@ -0,0 +1,33 @@
+From da773ea3f59032f659bfc4c450ca86e384786168 Mon Sep 17 00:00:00 2001
+From: Tao Cui <cuitao@kylinos.cn>
+Date: Thu, 9 Apr 2026 18:56:36 +0800
+Subject: LoongArch: KVM: Use CSR_CRMD_PLV in kvm_arch_vcpu_in_kernel()
+
+From: Tao Cui <cuitao@kylinos.cn>
+
+commit da773ea3f59032f659bfc4c450ca86e384786168 upstream.
+
+The function reads LOONGARCH_CSR_CRMD but uses CSR_PRMD_PPLV to
+extract the privilege level. While both masks have the same value
+(0x3), CSR_CRMD_PLV is the semantically correct constant for CRMD.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Tao Cui <cuitao@kylinos.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/vcpu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/loongarch/kvm/vcpu.c
++++ b/arch/loongarch/kvm/vcpu.c
+@@ -402,7 +402,7 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_
+       val = gcsr_read(LOONGARCH_CSR_CRMD);
+       preempt_enable();
+-      return (val & CSR_PRMD_PPLV) == PLV_KERN;
++      return (val & CSR_CRMD_PLV) == PLV_KERN;
+ }
+ #ifdef CONFIG_GUEST_PERF_EVENTS
diff --git a/queue-7.0/mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch b/queue-7.0/mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch
new file mode 100644 (file)
index 0000000..b35b6a4
--- /dev/null
@@ -0,0 +1,43 @@
+From 95093e5cb4c5b50a5b1a4b79f2942b62744bd66a Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sat, 11 Apr 2026 14:36:36 -0700
+Subject: mm/damon/core: disallow non-power of two min_region_sz on damon_start()
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 95093e5cb4c5b50a5b1a4b79f2942b62744bd66a upstream.
+
+Commit d8f867fa0825 ("mm/damon: add damon_ctx->min_sz_region") introduced
+a bug that allows unaligned DAMON region address ranges.  Commit
+c80f46ac228b ("mm/damon/core: disallow non-power of two min_region_sz")
+fixed it, but only for damon_commit_ctx() use case.  Still, DAMON sysfs
+interface can emit non-power of two min_region_sz via damon_start().  Fix
+the path by adding the is_power_of_2() check on damon_start().
+
+The issue was discovered by sashiko [1].
+
+Link: https://lore.kernel.org/20260411213638.77768-1-sj@kernel.org
+Link: https://lore.kernel.org/20260403155530.64647-1-sj@kernel.org [1]
+Fixes: d8f867fa0825 ("mm/damon: add damon_ctx->min_sz_region")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.18.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -1368,6 +1368,11 @@ int damon_start(struct damon_ctx **ctxs,
+       int i;
+       int err = 0;
++      for (i = 0; i < nr_ctxs; i++) {
++              if (!is_power_of_2(ctxs[i]->min_region_sz))
++                      return -EINVAL;
++      }
++
+       mutex_lock(&damon_lock);
+       if ((exclusive && nr_running_ctxs) ||
+                       (!exclusive && running_exclusive_ctxs)) {
diff --git a/queue-7.0/mm-damon-core-disallow-time-quota-setting-zero-esz.patch b/queue-7.0/mm-damon-core-disallow-time-quota-setting-zero-esz.patch
new file mode 100644 (file)
index 0000000..6c9a7cb
--- /dev/null
@@ -0,0 +1,88 @@
+From 8bbde987c2b84f80da0853f739f0a920386f8b99 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Mon, 6 Apr 2026 17:31:52 -0700
+Subject: mm/damon/core: disallow time-quota setting zero esz
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 8bbde987c2b84f80da0853f739f0a920386f8b99 upstream.
+
+When the throughput of a DAMOS scheme is very slow, DAMOS time quota can
+make the effective size quota smaller than damon_ctx->min_region_sz.  In
+the case, damos_apply_scheme() will skip applying the action, because the
+action is tried at region level, which requires >=min_region_sz size.
+That is, the quota is effectively exceeded for the quota charge window.
+
+Because no action will be applied, the total_charged_sz and
+total_charged_ns are also not updated.  damos_set_effective_quota() will
+try to update the effective size quota before starting the next charge
+window.  However, because the total_charged_sz and total_charged_ns have
+not updated, the throughput and effective size quota are also not changed.
+Since effective size quota can only be decreased, other effective size
+quota update factors including DAMOS quota goals and size quota cannot
+make any change, either.
+
+As a result, the scheme is unexpectedly deactivated until the user notices
+and mitigates the situation.  The users can mitigate this situation by
+changing the time quota online or re-install the scheme.  While the
+mitigation is somewhat straightforward, finding the situation would be
+challenging, because DAMON is not providing good observabilities for that.
+Even if such observability is provided, doing the additional monitoring
+and the mitigation is somewhat cumbersome and not aligned to the intention
+of the time quota.  The time quota was intended to help reduce the user's
+administration overhead.
+
+Fix the problem by setting time quota-modified effective size quota be at
+least min_region_sz always.
+
+The issue was discovered [1] by sashiko.
+
+Link: https://lore.kernel.org/20260407003153.79589-1-sj@kernel.org
+Link: https://lore.kernel.org/20260405192504.110014-1-sj@kernel.org [1]
+Fixes: 1cd243030059 ("mm/damon/schemes: implement time quota")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 5.16.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2225,7 +2225,8 @@ static unsigned long damos_quota_score(s
+ /*
+  * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty
+  */
+-static void damos_set_effective_quota(struct damos_quota *quota)
++static void damos_set_effective_quota(struct damos_quota *quota,
++              struct damon_ctx *ctx)
+ {
+       unsigned long throughput;
+       unsigned long esz = ULONG_MAX;
+@@ -2251,6 +2252,7 @@ static void damos_set_effective_quota(st
+               else
+                       throughput = PAGE_SIZE * 1024;
+               esz = min(throughput * quota->ms, esz);
++              esz = max(ctx->min_region_sz, esz);
+       }
+       if (quota->sz && quota->sz < esz)
+@@ -2287,7 +2289,7 @@ static void damos_adjust_quota(struct da
+       /* First charge window */
+       if (!quota->total_charged_sz && !quota->charged_from) {
+               quota->charged_from = jiffies;
+-              damos_set_effective_quota(quota);
++              damos_set_effective_quota(quota, c);
+       }
+       /* New charge window starts */
+@@ -2301,7 +2303,7 @@ static void damos_adjust_quota(struct da
+               quota->charged_sz = 0;
+               if (trace_damos_esz_enabled())
+                       cached_esz = quota->esz;
+-              damos_set_effective_quota(quota);
++              damos_set_effective_quota(quota, c);
+               if (trace_damos_esz_enabled() && quota->esz != cached_esz)
+                       damos_trace_esz(c, s, quota);
+       }
diff --git a/queue-7.0/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch b/queue-7.0/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch
new file mode 100644 (file)
index 0000000..ea14965
--- /dev/null
@@ -0,0 +1,56 @@
+From 049a57421dd67a28c45ae7e92c36df758033e5fa Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sun, 29 Mar 2026 08:23:05 -0700
+Subject: mm/damon/core: use time_in_range_open() for damos quota window start
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 049a57421dd67a28c45ae7e92c36df758033e5fa upstream.
+
+damos_adjust_quota() uses time_after_eq() to show if it is time to start a
+new quota charge window, comparing the current jiffies and the scheduled
+next charge window start time.  If it is, the next charge window start
+time is updated and the new charge window starts.
+
+The time check and next window start time update is skipped while the
+scheme is deactivated by the watermarks.  Let's suppose the deactivation
+is kept more than LONG_MAX jiffies (assuming CONFIG_HZ of 250, more than
+99 days in 32 bit systems and more than one billion years in 64 bit
+systems), resulting in having the jiffies larger than the next charge
+window start time + LONG_MAX.  Then, the time_after_eq() call can return
+false until another LONG_MAX jiffies are passed.
+
+This means the scheme can continue working after being reactivated by the
+watermarks.  But, soon, the quota will be exceeded and the scheme will
+again effectively stop working until the next charge window starts.
+Because the current charge window is extended to up to LONG_MAX jiffies,
+however, it will look like it stopped unexpectedly and indefinitely, from
+the user's perspective.
+
+Fix this by using !time_in_range_open() instead.
+
+The issue was discovered [1] by sashiko.
+
+Link: https://lore.kernel.org/20260329152306.45796-1-sj@kernel.org
+Link: https://lore.kernel.org/20260324040722.57944-1-sj@kernel.org [1]
+Fixes: ee801b7dd782 ("mm/damon/schemes: activate schemes based on a watermarks mechanism")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 5.16.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2291,7 +2291,8 @@ static void damos_adjust_quota(struct da
+       }
+       /* New charge window starts */
+-      if (time_after_eq(jiffies, quota->charged_from +
++      if (!time_in_range_open(jiffies, quota->charged_from,
++                              quota->charged_from +
+                               msecs_to_jiffies(quota->reset_interval))) {
+               if (quota->esz && quota->charged_sz >= quota->esz)
+                       s->stat.qt_exceeds++;
diff --git a/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch b/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch
new file mode 100644 (file)
index 0000000..0a61afe
--- /dev/null
@@ -0,0 +1,79 @@
+From 40250b2dded0604a112be605f3828700d80ad7c2 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sat, 28 Mar 2026 21:38:59 -0700
+Subject: mm/damon/core: validate damos_quota_goal->nid for node_mem_{used,free}_bp
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 40250b2dded0604a112be605f3828700d80ad7c2 upstream.
+
+Patch series "mm/damon/core: validate damos_quota_goal->nid".
+
+node_mem[cg]_{used,free}_bp DAMOS quota goals receive the node id.  The
+node id is used for si_meminfo_node() and NODE_DATA() without proper
+validation.  As a result, privileged users can trigger an out of bounds
+memory access using DAMON_SYSFS.  Fix the issues.
+
+The issue was originally reported [1] with a fix by another author.  The
+original author announced [2] that they will stop working including the
+fix that was still in the review stage.  Hence I'm restarting this.
+
+
+This patch (of 2):
+
+Users can set damos_quota_goal->nid with arbitrary value for
+node_mem_{used,free}_bp.  But DAMON core is using those for
+si_meminfo_node() without the validation of the value.  This can result in
+out of bounds memory access.  The issue can actually triggered using DAMON
+user-space tool (damo), like below.
+
+    $ sudo ./damo start --damos_action stat \
+       --damos_quota_goal node_mem_used_bp 50% -1 \
+       --damos_quota_interval 1s
+    $ sudo dmesg
+    [...]
+    [   65.565986] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098
+
+Fix this issue by adding the validation of the given node.  If an invalid
+node id is given, it returns 0% for used memory ratio, and 100% for free
+memory ratio.
+
+Link: https://lore.kernel.org/20260329043902.46163-2-sj@kernel.org
+Link: https://lore.kernel.org/20260325073034.140353-1-objecting@objecting.org [1]
+Link: https://lore.kernel.org/20260327040924.68553-1-sj@kernel.org [2]
+Fixes: 0e1c773b501f ("mm/damon/core: introduce damos quota goal metrics for memory node utilization")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.16.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2078,12 +2078,24 @@ static inline u64 damos_get_some_mem_psi
+ #endif        /* CONFIG_PSI */
+ #ifdef CONFIG_NUMA
++static bool invalid_mem_node(int nid)
++{
++      return nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY);
++}
++
+ static __kernel_ulong_t damos_get_node_mem_bp(
+               struct damos_quota_goal *goal)
+ {
+       struct sysinfo i;
+       __kernel_ulong_t numerator;
++      if (invalid_mem_node(goal->nid)) {
++              if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP)
++                      return 0;
++              else    /* DAMOS_QUOTA_NODE_MEM_FREE_BP */
++                      return 10000;
++      }
++
+       si_meminfo_node(&i, goal->nid);
+       if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP)
+               numerator = i.totalram - i.freeram;
diff --git a/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch b/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch
new file mode 100644 (file)
index 0000000..e209545
--- /dev/null
@@ -0,0 +1,52 @@
+From a34dac6482e53e2c76944f25b1489b9b7da3a6e6 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sat, 28 Mar 2026 21:39:00 -0700
+Subject: mm/damon/core: validate damos_quota_goal->nid for node_memcg_{used,free}_bp
+
+From: SeongJae Park <sj@kernel.org>
+
+commit a34dac6482e53e2c76944f25b1489b9b7da3a6e6 upstream.
+
+Users can set damos_quota_goal->nid with arbitrary value for
+node_memcg_{used,free}_bp.  But DAMON core is using those for NODE-DATA()
+without a validation of the value.  This can result in out of bounds
+memory access.  The issue can actually triggered using DAMON user-space
+tool (damo), like below.
+
+    $ sudo mkdir /sys/fs/cgroup/foo
+    $ sudo ./damo start --damos_action stat --damos_quota_interval 1s \
+            --damos_quota_goal node_memcg_used_bp 50% -1 /foo
+    $ sudo dmseg
+    [...]
+    [  524.181426] Unable to handle kernel paging request at virtual address 0000000000002c00
+
+Fix this issue by adding the validation of the given node id.  If an
+invalid node id is given, it returns 0% for used memory ratio, and 100%
+for free memory ratio.
+
+Link: https://lore.kernel.org/20260329043902.46163-3-sj@kernel.org
+Fixes: b74a120bcf50 ("mm/damon/core: implement DAMOS_QUOTA_NODE_MEMCG_USED_BP")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.19.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2112,6 +2112,13 @@ static unsigned long damos_get_node_memc
+       unsigned long used_pages, numerator;
+       struct sysinfo i;
++      if (invalid_mem_node(goal->nid)) {
++              if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP)
++                      return 0;
++              else    /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */
++                      return 10000;
++      }
++
+       memcg = mem_cgroup_get_from_id(goal->memcg_id);
+       if (!memcg) {
+               if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP)
diff --git a/queue-7.0/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch b/queue-7.0/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch
new file mode 100644 (file)
index 0000000..d1b2e21
--- /dev/null
@@ -0,0 +1,41 @@
+From e04ed278d25bf15769800bf6e35c6737f137186f Mon Sep 17 00:00:00 2001
+From: Jackie Liu <liuyun01@kylinos.cn>
+Date: Tue, 31 Mar 2026 18:15:53 +0800
+Subject: mm/damon/stat: fix memory leak on damon_start() failure in damon_stat_start()
+
+From: Jackie Liu <liuyun01@kylinos.cn>
+
+commit e04ed278d25bf15769800bf6e35c6737f137186f upstream.
+
+Destroy the DAMON context and reset the global pointer when damon_start()
+fails.  Otherwise, the context allocated by damon_stat_build_ctx() is
+leaked, and the stale damon_stat_context pointer will be overwritten on
+the next enable attempt, making the old allocation permanently
+unreachable.
+
+Link: https://lore.kernel.org/20260331101553.88422-1-liu.yun@linux.dev
+Fixes: 369c415e6073 ("mm/damon: introduce DAMON_STAT module")
+Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.17.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/stat.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/mm/damon/stat.c
++++ b/mm/damon/stat.c
+@@ -255,8 +255,11 @@ static int damon_stat_start(void)
+       if (!damon_stat_context)
+               return -ENOMEM;
+       err = damon_start(&damon_stat_context, 1, true);
+-      if (err)
++      if (err) {
++              damon_destroy_ctx(damon_stat_context);
++              damon_stat_context = NULL;
+               return err;
++      }
+       damon_stat_last_refresh_jiffies = jiffies;
+       call_control.data = damon_stat_context;
diff --git a/queue-7.0/mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch b/queue-7.0/mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch
new file mode 100644 (file)
index 0000000..87443a5
--- /dev/null
@@ -0,0 +1,57 @@
+From 3538f90ab89aaf302782b4b073a0aae66904cd67 Mon Sep 17 00:00:00 2001
+From: Chenghao Duan <duanchenghao@kylinos.cn>
+Date: Thu, 26 Mar 2026 16:47:25 +0800
+Subject: mm/memfd_luo: fix physical address conversion in put_folios cleanup
+
+From: Chenghao Duan <duanchenghao@kylinos.cn>
+
+commit 3538f90ab89aaf302782b4b073a0aae66904cd67 upstream.
+
+In memfd_luo_retrieve_folios()'s put_folios cleanup path:
+
+1. kho_restore_folio() expects a phys_addr_t (physical address) but
+   receives a raw PFN (pfolio->pfn). This causes kho_restore_page() to
+   check the wrong physical address (pfn << PAGE_SHIFT instead of the
+   actual physical address).
+
+2. This loop lacks the !pfolio->pfn check that exists in the main
+   retrieval loop and memfd_luo_discard_folios(), which could
+   incorrectly process sparse file holes where pfn=0.
+
+Fix by converting PFN to physical address with PFN_PHYS() and adding
+the !pfolio->pfn check, matching the pattern used elsewhere in this file.
+
+This issue was identified by the AI review.
+https://sashiko.dev/#/patchset/20260323110747.193569-1-duanchenghao@kylinos.cn
+
+Link: https://lore.kernel.org/20260326084727.118437-6-duanchenghao@kylinos.cn
+Fixes: b3749f174d68 ("mm: memfd_luo: allow preserving memfd")
+Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn>
+Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
+Cc: Haoran Jiang <jianghaoran@kylinos.cn>
+Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memfd_luo.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/mm/memfd_luo.c
++++ b/mm/memfd_luo.c
+@@ -466,8 +466,13 @@ put_folios:
+        */
+       for (long j = i + 1; j < nr_folios; j++) {
+               const struct memfd_luo_folio_ser *pfolio = &folios_ser[j];
++              phys_addr_t phys;
+-              folio = kho_restore_folio(pfolio->pfn);
++              if (!pfolio->pfn)
++                      continue;
++
++              phys = PFN_PHYS(pfolio->pfn);
++              folio = kho_restore_folio(phys);
+               if (folio)
+                       folio_put(folio);
+       }
diff --git a/queue-7.0/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch b/queue-7.0/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch
new file mode 100644 (file)
index 0000000..20922f1
--- /dev/null
@@ -0,0 +1,76 @@
+From 6fae274ce0e3109cbbc4c18b354eaace1f0af7d7 Mon Sep 17 00:00:00 2001
+From: Jackie Liu <liuyun01@kylinos.cn>
+Date: Wed, 1 Apr 2026 08:57:02 +0800
+Subject: mm/mempolicy: fix memory leaks in weighted_interleave_auto_store()
+
+From: Jackie Liu <liuyun01@kylinos.cn>
+
+commit 6fae274ce0e3109cbbc4c18b354eaace1f0af7d7 upstream.
+
+weighted_interleave_auto_store() fetches old_wi_state inside the if
+(!input) block only.  This causes two memory leaks:
+
+1. When a user writes "false" and the current mode is already manual,
+   the function returns early without freeing the freshly allocated
+   new_wi_state.
+
+2. When a user writes "true", old_wi_state stays NULL because the
+   fetch is skipped entirely. The old state is then overwritten by
+   rcu_assign_pointer() but never freed, since the cleanup path is
+   gated on old_wi_state being non-NULL. A user can trigger this
+   repeatedly by writing "1" in a loop.
+
+Fix both leaks by moving the old_wi_state fetch before the input check,
+making it unconditional.  This also allows a unified early return for both
+"true" and "false" when the requested mode matches the current mode.
+
+Link: https://lore.kernel.org/20260401005702.7096-1-liu.yun@linux.dev
+Link: https://sashiko.dev/#/patchset/20260331100740.84906-1-liu.yun@linux.dev
+Fixes: e341f9c3c841 ("mm/mempolicy: Weighted Interleave Auto-tuning")
+Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
+Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
+Reviewed by: Donet Tom <donettom@linux.ibm.com>
+Cc: Gregory Price <gourry@gourry.net>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Byungchul Park <byungchul@sk.com>
+Cc: David Hildenbrand <david@kernel.org>
+Cc: <stable@vger.kernel.org> # v6.16+
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mempolicy.c |   23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -3706,18 +3706,19 @@ static ssize_t weighted_interleave_auto_
+               new_wi_state->iw_table[i] = 1;
+       mutex_lock(&wi_state_lock);
+-      if (!input) {
+-              old_wi_state = rcu_dereference_protected(wi_state,
+-                                      lockdep_is_held(&wi_state_lock));
+-              if (!old_wi_state)
+-                      goto update_wi_state;
+-              if (input == old_wi_state->mode_auto) {
+-                      mutex_unlock(&wi_state_lock);
+-                      return count;
+-              }
++      old_wi_state = rcu_dereference_protected(wi_state,
++                              lockdep_is_held(&wi_state_lock));
++
++      if (old_wi_state && input == old_wi_state->mode_auto) {
++              mutex_unlock(&wi_state_lock);
++              kfree(new_wi_state);
++              return count;
++      }
+-              memcpy(new_wi_state->iw_table, old_wi_state->iw_table,
+-                                             nr_node_ids * sizeof(u8));
++      if (!input) {
++              if (old_wi_state)
++                      memcpy(new_wi_state->iw_table, old_wi_state->iw_table,
++                                                     nr_node_ids * sizeof(u8));
+               goto update_wi_state;
+       }
diff --git a/queue-7.0/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch b/queue-7.0/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch
new file mode 100644 (file)
index 0000000..ba17894
--- /dev/null
@@ -0,0 +1,42 @@
+From ec05f51f1e65bce95528543eb73fda56fd201d94 Mon Sep 17 00:00:00 2001
+From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
+Date: Mon, 13 Apr 2026 21:26:46 +0200
+Subject: mm/vmalloc: take vmap_purge_lock in shrinker
+
+From: Uladzislau Rezki (Sony) <urezki@gmail.com>
+
+commit ec05f51f1e65bce95528543eb73fda56fd201d94 upstream.
+
+decay_va_pool_node() can be invoked concurrently from two paths:
+__purge_vmap_area_lazy() when pools are being purged, and the shrinker via
+vmap_node_shrink_scan().
+
+However, decay_va_pool_node() is not safe to run concurrently, and the
+shrinker path currently lacks serialization, leading to races and possible
+leaks.
+
+Protect decay_va_pool_node() by taking vmap_purge_lock in the shrinker
+path to ensure serialization with purge users.
+
+Link: https://lore.kernel.org/20260413192646.14683-1-urezki@gmail.com
+Fixes: 7679ba6b36db ("mm: vmalloc: add a shrinker to drain vmap pools")
+Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Reviewed-by: Baoquan He <baoquan.he@linux.dev>
+Cc: chenyichong <chenyichong@uniontech.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -5416,6 +5416,7 @@ vmap_node_shrink_scan(struct shrinker *s
+ {
+       struct vmap_node *vn;
++      guard(mutex)(&vmap_purge_lock);
+       for_each_vmap_node(vn)
+               decay_va_pool_node(vn, true);
diff --git a/queue-7.0/mmc-block-use-single-block-write-in-retry.patch b/queue-7.0/mmc-block-use-single-block-write-in-retry.patch
new file mode 100644 (file)
index 0000000..8fb107a
--- /dev/null
@@ -0,0 +1,92 @@
+From c7c6d4f5103864f73ee3a78bfd6da241f84197dd Mon Sep 17 00:00:00 2001
+From: Bin Liu <b-liu@ti.com>
+Date: Wed, 25 Mar 2026 08:49:47 -0500
+Subject: mmc: block: use single block write in retry
+
+From: Bin Liu <b-liu@ti.com>
+
+commit c7c6d4f5103864f73ee3a78bfd6da241f84197dd upstream.
+
+Due to errata i2493[0], multi-block write would still fail in retries.
+
+With i2493, the MMC interface has the potential of write failures when
+issuing multi-block writes operating in HS200 mode with excessive IO
+supply noise.
+
+While the errata provides guidance in hardware design and layout to
+minimize the IO supply noise, in theory the write failure cannot be
+resolved in hardware. The software solution to ensure the data integrity
+is to add minimum 5us delay between block writes. Single-block write is
+the practical way to introduce the delay.
+
+This patch reuses recovery_mode flag, and switches to single-block
+write in retry when multi-block write fails. It covers both CQE and
+non-CQE cases.
+
+[0] https://www.ti.com/lit/pdf/sprz582
+Cc: stable@vger.kernel.org
+Suggested-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Bin Liu <b-liu@ti.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/core/block.c |   12 ++++++++++--
+ drivers/mmc/core/queue.h |    3 +++
+ 2 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -1401,6 +1401,9 @@ static void mmc_blk_data_prep(struct mmc
+                   rq_data_dir(req) == WRITE &&
+                   (md->flags & MMC_BLK_REL_WR);
++      if (mqrq->flags & MQRQ_XFER_SINGLE_BLOCK)
++              recovery_mode = 1;
++
+       memset(brq, 0, sizeof(struct mmc_blk_request));
+       mmc_crypto_prepare_req(mqrq);
+@@ -1540,10 +1543,13 @@ static void mmc_blk_cqe_complete_rq(stru
+               err = 0;
+       if (err) {
+-              if (mqrq->retries++ < MMC_CQE_RETRIES)
++              if (mqrq->retries++ < MMC_CQE_RETRIES) {
++                      if (rq_data_dir(req) == WRITE)
++                              mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK;
+                       blk_mq_requeue_request(req, true);
+-              else
++              } else {
+                       blk_mq_end_request(req, BLK_STS_IOERR);
++              }
+       } else if (mrq->data) {
+               if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered))
+                       blk_mq_requeue_request(req, true);
+@@ -2085,6 +2091,8 @@ static void mmc_blk_mq_complete_rq(struc
+       } else if (!blk_rq_bytes(req)) {
+               __blk_mq_end_request(req, BLK_STS_IOERR);
+       } else if (mqrq->retries++ < MMC_MAX_RETRIES) {
++              if (rq_data_dir(req) == WRITE)
++                      mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK;
+               blk_mq_requeue_request(req, true);
+       } else {
+               if (mmc_card_removed(mq->card))
+--- a/drivers/mmc/core/queue.h
++++ b/drivers/mmc/core/queue.h
+@@ -61,6 +61,8 @@ enum mmc_drv_op {
+       MMC_DRV_OP_GET_EXT_CSD,
+ };
++#define       MQRQ_XFER_SINGLE_BLOCK          BIT(0)
++
+ struct mmc_queue_req {
+       struct mmc_blk_request  brq;
+       struct scatterlist      *sg;
+@@ -69,6 +71,7 @@ struct mmc_queue_req {
+       void                    *drv_op_data;
+       unsigned int            ioc_count;
+       int                     retries;
++      u32                     flags;
+ };
+ struct mmc_queue {
diff --git a/queue-7.0/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch b/queue-7.0/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch
new file mode 100644 (file)
index 0000000..853763f
--- /dev/null
@@ -0,0 +1,81 @@
+From 6546a49bbe656981d99a389195560999058c89c4 Mon Sep 17 00:00:00 2001
+From: Shawn Lin <shawn.lin@rock-chips.com>
+Date: Wed, 8 Apr 2026 15:18:49 +0800
+Subject: mmc: sdhci-of-dwcmshc: Disable clock before DLL configuration
+
+From: Shawn Lin <shawn.lin@rock-chips.com>
+
+commit 6546a49bbe656981d99a389195560999058c89c4 upstream.
+
+According to the ASIC design recommendations, the clock must be
+disabled before operating the DLL to prevent glitches that could
+affect the internal digital logic. In extreme cases, failing to
+do so may cause the controller to malfunction completely.
+
+Adds a step to disable the clock before DLL configuration and
+re-enables it at the end.
+
+Fixes: 08f3dff799d4 ("mmc: sdhci-of-dwcmshc: add rockchip platform support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-of-dwcmshc.c |   19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
++++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
+@@ -738,12 +738,15 @@ static void dwcmshc_rk3568_set_clock(str
+       extra |= BIT(4);
+       sdhci_writel(host, extra, reg);
++      /* Disable clock while config DLL */
++      sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
++
+       if (clock <= 52000000) {
+               if (host->mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
+                   host->mmc->ios.timing == MMC_TIMING_MMC_HS400) {
+                       dev_err(mmc_dev(host->mmc),
+                               "Can't reduce the clock below 52MHz in HS200/HS400 mode");
+-                      return;
++                      goto enable_clk;
+               }
+               /*
+@@ -763,7 +766,7 @@ static void dwcmshc_rk3568_set_clock(str
+                       DLL_STRBIN_DELAY_NUM_SEL |
+                       DLL_STRBIN_DELAY_NUM_DEFAULT << DLL_STRBIN_DELAY_NUM_OFFSET;
+               sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN);
+-              return;
++              goto enable_clk;
+       }
+       /* Reset DLL */
+@@ -790,7 +793,7 @@ static void dwcmshc_rk3568_set_clock(str
+                                500 * USEC_PER_MSEC);
+       if (err) {
+               dev_err(mmc_dev(host->mmc), "DLL lock timeout!\n");
+-              return;
++              goto enable_clk;
+       }
+       extra = 0x1 << 16 | /* tune clock stop en */
+@@ -823,6 +826,16 @@ static void dwcmshc_rk3568_set_clock(str
+               DLL_STRBIN_TAPNUM_DEFAULT |
+               DLL_STRBIN_TAPNUM_FROM_SW;
+       sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN);
++
++enable_clk:
++      /*
++       * The sdclk frequency select bits in SDHCI_CLOCK_CONTROL are not functional
++       * on Rockchip's SDHCI implementation. Instead, the clock frequency is fully
++       * controlled via external clk provider by calling clk_set_rate(). Consequently,
++       * passing 0 to sdhci_enable_clk() only re-enables the already-configured clock,
++       * which matches the hardware's actual behavior.
++       */
++      sdhci_enable_clk(host, 0);
+ }
+ static void rk35xx_sdhci_reset(struct sdhci_host *host, u8 mask)
diff --git a/queue-7.0/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch b/queue-7.0/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch
new file mode 100644 (file)
index 0000000..8c5c478
--- /dev/null
@@ -0,0 +1,59 @@
+From 3962c24f2d14e8a7f8a23f56b7ce320523947342 Mon Sep 17 00:00:00 2001
+From: "Viorel Suman (OSS)" <viorel.suman@oss.nxp.com>
+Date: Wed, 11 Mar 2026 14:33:09 +0200
+Subject: pwm: imx-tpm: Count the number of enabled channels in probe
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Viorel Suman (OSS) <viorel.suman@oss.nxp.com>
+
+commit 3962c24f2d14e8a7f8a23f56b7ce320523947342 upstream.
+
+On a soft reset TPM PWM IP may preserve its internal state from previous
+runtime, therefore on a subsequent OS boot and driver probe
+"enable_count" value and TPM PWM IP internal channels "enabled" states
+may get unaligned. In consequence on a suspend/resume cycle the call "if
+(--tpm->enable_count == 0)" may lead to "enable_count" overflow the
+system being blocked from entering suspend due to:
+
+   if (tpm->enable_count > 0)
+       return -EBUSY;
+
+Fix the problem by counting the enabled channels in probe function.
+
+Signed-off-by: Viorel Suman (OSS) <viorel.suman@oss.nxp.com>
+Fixes: 738a1cfec2ed ("pwm: Add i.MX TPM PWM driver support")
+Link: https://patch.msgid.link/20260311123309.348904-1-viorel.suman@oss.nxp.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pwm/pwm-imx-tpm.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/pwm/pwm-imx-tpm.c
++++ b/drivers/pwm/pwm-imx-tpm.c
+@@ -352,7 +352,7 @@ static int pwm_imx_tpm_probe(struct plat
+       struct clk *clk;
+       void __iomem *base;
+       int ret;
+-      unsigned int npwm;
++      unsigned int i, npwm;
+       u32 val;
+       base = devm_platform_ioremap_resource(pdev, 0);
+@@ -382,6 +382,13 @@ static int pwm_imx_tpm_probe(struct plat
+       mutex_init(&tpm->lock);
++      /* count the enabled channels */
++      for (i = 0; i < npwm; ++i) {
++              val = readl(base + PWM_IMX_TPM_CnSC(i));
++              if (FIELD_GET(PWM_IMX_TPM_CnSC_ELS, val))
++                      ++tpm->enable_count;
++      }
++
+       ret = devm_pwmchip_add(&pdev->dev, chip);
+       if (ret)
+               return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
diff --git a/queue-7.0/randomize_kstack-maintain-kstack_offset-per-task.patch b/queue-7.0/randomize_kstack-maintain-kstack_offset-per-task.patch
new file mode 100644 (file)
index 0000000..92783ea
--- /dev/null
@@ -0,0 +1,155 @@
+From 37beb42560165869838e7d91724f3e629db64129 Mon Sep 17 00:00:00 2001
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 3 Mar 2026 15:08:38 +0000
+Subject: randomize_kstack: Maintain kstack_offset per task
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+commit 37beb42560165869838e7d91724f3e629db64129 upstream.
+
+kstack_offset was previously maintained per-cpu, but this caused a
+couple of issues. So let's instead make it per-task.
+
+Issue 1: add_random_kstack_offset() and choose_random_kstack_offset()
+expected and required to be called with interrupts and preemption
+disabled so that it could manipulate per-cpu state. But arm64, loongarch
+and risc-v are calling them with interrupts and preemption enabled. I
+don't _think_ this causes any functional issues, but it's certainly
+unexpected and could lead to manipulating the wrong cpu's state, which
+could cause a minor performance degradation due to bouncing the cache
+lines. By maintaining the state per-task those functions can safely be
+called in preemptible context.
+
+Issue 2: add_random_kstack_offset() is called before executing the
+syscall and expands the stack using a previously chosen random offset.
+choose_random_kstack_offset() is called after executing the syscall and
+chooses and stores a new random offset for the next syscall. With
+per-cpu storage for this offset, an attacker could force cpu migration
+during the execution of the syscall and prevent the offset from being
+updated for the original cpu such that it is predictable for the next
+syscall on that cpu. By maintaining the state per-task, this problem
+goes away because the per-task random offset is updated after the
+syscall regardless of which cpu it is executing on.
+
+Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
+Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/
+Cc: stable@vger.kernel.org
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Link: https://patch.msgid.link/20260303150840.3789438-2-ryan.roberts@arm.com
+Signed-off-by: Kees Cook <kees@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/randomize_kstack.h |   26 +++++++++++++++-----------
+ include/linux/sched.h            |    4 ++++
+ init/main.c                      |    1 -
+ kernel/fork.c                    |    2 ++
+ 4 files changed, 21 insertions(+), 12 deletions(-)
+
+--- a/include/linux/randomize_kstack.h
++++ b/include/linux/randomize_kstack.h
+@@ -9,7 +9,6 @@
+ DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
+                        randomize_kstack_offset);
+-DECLARE_PER_CPU(u32, kstack_offset);
+ /*
+  * Do not use this anywhere else in the kernel. This is used here because
+@@ -50,15 +49,14 @@ DECLARE_PER_CPU(u32, kstack_offset);
+  * add_random_kstack_offset - Increase stack utilization by previously
+  *                          chosen random offset
+  *
+- * This should be used in the syscall entry path when interrupts and
+- * preempt are disabled, and after user registers have been stored to
+- * the stack. For testing the resulting entropy, please see:
+- * tools/testing/selftests/lkdtm/stack-entropy.sh
++ * This should be used in the syscall entry path after user registers have been
++ * stored to the stack. Preemption may be enabled. For testing the resulting
++ * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh
+  */
+ #define add_random_kstack_offset() do {                                       \
+       if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
+                               &randomize_kstack_offset)) {            \
+-              u32 offset = raw_cpu_read(kstack_offset);               \
++              u32 offset = current->kstack_offset;                    \
+               u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset));   \
+               /* Keep allocation even after "ptr" loses scope. */     \
+               asm volatile("" :: "r"(ptr) : "memory");                \
+@@ -69,9 +67,9 @@ DECLARE_PER_CPU(u32, kstack_offset);
+  * choose_random_kstack_offset - Choose the random offset for the next
+  *                             add_random_kstack_offset()
+  *
+- * This should only be used during syscall exit when interrupts and
+- * preempt are disabled. This position in the syscall flow is done to
+- * frustrate attacks from userspace attempting to learn the next offset:
++ * This should only be used during syscall exit. Preemption may be enabled. This
++ * position in the syscall flow is done to frustrate attacks from userspace
++ * attempting to learn the next offset:
+  * - Maximize the timing uncertainty visible from userspace: if the
+  *   offset is chosen at syscall entry, userspace has much more control
+  *   over the timing between choosing offsets. "How long will we be in
+@@ -85,14 +83,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
+ #define choose_random_kstack_offset(rand) do {                                \
+       if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
+                               &randomize_kstack_offset)) {            \
+-              u32 offset = raw_cpu_read(kstack_offset);               \
++              u32 offset = current->kstack_offset;                    \
+               offset = ror32(offset, 5) ^ (rand);                     \
+-              raw_cpu_write(kstack_offset, offset);                   \
++              current->kstack_offset = offset;                        \
+       }                                                               \
+ } while (0)
++
++static inline void random_kstack_task_init(struct task_struct *tsk)
++{
++      tsk->kstack_offset = 0;
++}
+ #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+ #define add_random_kstack_offset()            do { } while (0)
+ #define choose_random_kstack_offset(rand)     do { } while (0)
++#define random_kstack_task_init(tsk)          do { } while (0)
+ #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+ #endif
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1592,6 +1592,10 @@ struct task_struct {
+       unsigned long                   prev_lowest_stack;
+ #endif
++#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
++      u32                             kstack_offset;
++#endif
++
+ #ifdef CONFIG_X86_MCE
+       void __user                     *mce_vaddr;
+       __u64                           mce_kflags;
+--- a/init/main.c
++++ b/init/main.c
+@@ -833,7 +833,6 @@ static inline void initcall_debug_enable
+ #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+ DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
+                          randomize_kstack_offset);
+-DEFINE_PER_CPU(u32, kstack_offset);
+ static int __init early_randomize_kstack_offset(char *buf)
+ {
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -95,6 +95,7 @@
+ #include <linux/thread_info.h>
+ #include <linux/kstack_erase.h>
+ #include <linux/kasan.h>
++#include <linux/randomize_kstack.h>
+ #include <linux/scs.h>
+ #include <linux/io_uring.h>
+ #include <linux/io_uring_types.h>
+@@ -2233,6 +2234,7 @@ __latent_entropy struct task_struct *cop
+       if (retval)
+               goto bad_fork_cleanup_io;
++      random_kstack_task_init(p);
+       stackleak_task_init(p);
+       if (pid != &init_struct_pid) {
diff --git a/queue-7.0/rtc-ntxec-fix-of-node-reference-imbalance.patch b/queue-7.0/rtc-ntxec-fix-of-node-reference-imbalance.patch
new file mode 100644 (file)
index 0000000..06f8ab7
--- /dev/null
@@ -0,0 +1,41 @@
+From 30c4d2f26bb3538c328035cea2e6265c8320539e Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Tue, 7 Apr 2026 14:27:17 +0200
+Subject: rtc: ntxec: fix OF node reference imbalance
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 30c4d2f26bb3538c328035cea2e6265c8320539e upstream.
+
+The driver reuses the OF node of the parent multi-function device but
+fails to take another reference to balance the one dropped by the
+platform bus code when unbinding the MFD and deregistering the child
+devices.
+
+Fix this by using the intended helper for reusing OF nodes.
+
+Fixes: 435af89786c6 ("rtc: New driver for RTC in Netronix embedded controller")
+Cc: stable@vger.kernel.org     # 5.13
+Cc: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Link: https://patch.msgid.link/20260407122717.2676774-1-johan@kernel.org
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/rtc/rtc-ntxec.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/rtc/rtc-ntxec.c
++++ b/drivers/rtc/rtc-ntxec.c
+@@ -110,7 +110,7 @@ static int ntxec_rtc_probe(struct platfo
+       struct rtc_device *dev;
+       struct ntxec_rtc *rtc;
+-      pdev->dev.of_node = pdev->dev.parent->of_node;
++      device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
+       rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+       if (!rtc)
index ad00972e9ec68a5503d208d0ef733a3463e1defe..5258dabea15cc2d00b11aeff7fdd235f06fad30d 100644 (file)
@@ -168,3 +168,61 @@ media-rzv2h-ivc-fix-axirx_vblank-register-write.patch
 fs-prepare-for-adding-lsm-blob-to-backing_file.patch
 lsm-add-backing_file-lsm-hooks.patch
 selinux-fix-overlayfs-mmap-and-mprotect-access-checks.patch
+hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch
+randomize_kstack-maintain-kstack_offset-per-task.patch
+mmc-block-use-single-block-write-in-retry.patch
+mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch
+arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch
+crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch
+xfs-start-gc-on-zonegc_low_space-attribute-updates.patch
+xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch
+firmware-google-framebuffer-do-not-unregister-platform-device.patch
+firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch
+crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch
+crypto-talitos-rename-first-last-to-first_desc-last_desc.patch
+pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch
+tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch
+tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch
+tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch
+tpm-tpm_tis-add-error-logging-for-data-transfer.patch
+tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch
+rtc-ntxec-fix-of-node-reference-imbalance.patch
+mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch
+mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch
+mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch
+mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch
+mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch
+mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch
+mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch
+mm-damon-core-disallow-time-quota-setting-zero-esz.patch
+mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch
+userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch
+loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch
+kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch
+kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch
+kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch
+kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch
+kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch
+kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch
+kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch
+kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch
+kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch
+kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch
+kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch
+kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch
+kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch
+kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch
+kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch
+kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch
+kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch
+kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch
+kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch
+kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch
+kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch
+kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch
+kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch
+kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch
+kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch
+kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch
+kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch
+kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch
diff --git a/queue-7.0/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch b/queue-7.0/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch
new file mode 100644 (file)
index 0000000..5a3fc6e
--- /dev/null
@@ -0,0 +1,47 @@
+From 666c1a2ca603d8314231200bf8bbb3a81bd64c6b Mon Sep 17 00:00:00 2001
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+Date: Wed, 8 Apr 2026 12:00:27 +0300
+Subject: tpm: Fix auth session leak in tpm2_get_random() error path
+
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+
+commit 666c1a2ca603d8314231200bf8bbb3a81bd64c6b upstream.
+
+When tpm_buf_fill_hmac_session() fails inside the do-while loop in
+tpm2_get_random(), the function returns directly after destroying the
+buffer, without ending the auth session via tpm2_end_auth_session().
+
+This leaks the TPM auth session resource. All other error paths within
+the loop correctly reach the 'out' label which calls both
+tpm_buf_destroy() and tpm2_end_auth_session().
+
+Fix this by replacing the early return with a goto to the existing 'out'
+label, which already handles both cleanup operations. The redundant
+tpm_buf_destroy() call is removed since 'out' takes care of it.
+
+Cc: stable@vger.kernel.org # v6.19+
+Fixes: 6e9722e9a7bf ("tpm2-sessions: Fix out of range indexing in name_size")
+Signed-off-by: Gunnar Kudrjavets <gunnarku@amazon.com>
+Reviewed-by: Justinien Bouron <jbouron@amazon.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm2-cmd.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -295,10 +295,8 @@ int tpm2_get_random(struct tpm_chip *chi
+               }
+               tpm_buf_append_u16(&buf, num_bytes);
+               err = tpm_buf_fill_hmac_session(chip, &buf);
+-              if (err) {
+-                      tpm_buf_destroy(&buf);
+-                      return err;
+-              }
++              if (err)
++                      goto out;
+               err = tpm_transmit_cmd(chip, &buf,
+                                      offsetof(struct tpm2_get_random_out,
diff --git a/queue-7.0/tpm-tpm_tis-add-error-logging-for-data-transfer.patch b/queue-7.0/tpm-tpm_tis-add-error-logging-for-data-transfer.patch
new file mode 100644 (file)
index 0000000..e7fad8b
--- /dev/null
@@ -0,0 +1,42 @@
+From 0471921e2d1043dcc6de5cffb49dd37709521abe Mon Sep 17 00:00:00 2001
+From: Jacqueline Wong <jacqwong@google.com>
+Date: Wed, 15 Apr 2026 16:00:05 +0000
+Subject: tpm: tpm_tis: add error logging for data transfer
+
+From: Jacqueline Wong <jacqwong@google.com>
+
+commit 0471921e2d1043dcc6de5cffb49dd37709521abe upstream.
+
+Add logging to more easily determine reason for transmit failure
+
+Cc: stable@vger.kernel.org # v6.6+
+Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors")
+Signed-off-by: Jacqueline Wong <jacqwong@google.com>
+Signed-off-by: Jordan Hand <jhand@google.com>
+Link: https://lore.kernel.org/r/20260415160006.2275325-2-jacqwong@google.com
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm_tis_core.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/char/tpm/tpm_tis_core.c
++++ b/drivers/char/tpm/tpm_tis_core.c
+@@ -471,6 +471,8 @@ static int tpm_tis_send_data(struct tpm_
+               status = tpm_tis_status(chip);
+               if (!itpm && (status & TPM_STS_DATA_EXPECT) == 0) {
+                       rc = -EIO;
++                      dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be set. sts = 0x%08x\n",
++                              status);
+                       goto out_err;
+               }
+       }
+@@ -491,6 +493,8 @@ static int tpm_tis_send_data(struct tpm_
+       status = tpm_tis_status(chip);
+       if (!itpm && (status & TPM_STS_DATA_EXPECT) != 0) {
+               rc = -EIO;
++              dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be unset. sts = 0x%08x\n",
++                      status);
+               goto out_err;
+       }
diff --git a/queue-7.0/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch b/queue-7.0/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch
new file mode 100644 (file)
index 0000000..a110474
--- /dev/null
@@ -0,0 +1,48 @@
+From 949692da7211572fac419b2986b6abc0cd1aeb76 Mon Sep 17 00:00:00 2001
+From: Jacqueline Wong <jacqwong@google.com>
+Date: Wed, 15 Apr 2026 16:00:06 +0000
+Subject: tpm: tpm_tis: stop transmit if retries are exhausted
+
+From: Jacqueline Wong <jacqwong@google.com>
+
+commit 949692da7211572fac419b2986b6abc0cd1aeb76 upstream.
+
+tpm_tis_send_main() will attempt to retry sending data TPM_RETRY times.
+Currently, if those retries are exhausted, the driver will attempt to
+call execute. The TPM will be in the wrong state, leading to the
+operation simply timing out.
+
+Instead, if there is still an error after retries are exhausted, return
+that error immediately.
+
+Cc: stable@vger.kernel.org # v6.6+
+Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors")
+Signed-off-by: Jacqueline Wong <jacqwong@google.com>
+Signed-off-by: Jordan Hand <jhand@google.com>
+Link: https://lore.kernel.org/r/20260415160006.2275325-3-jacqwong@google.com
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm_tis_core.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/char/tpm/tpm_tis_core.c
++++ b/drivers/char/tpm/tpm_tis_core.c
+@@ -556,11 +556,16 @@ static int tpm_tis_send_main(struct tpm_
+                       break;
+               else if (rc != -EAGAIN && rc != -EIO)
+                       /* Data transfer failed, not recoverable */
+-                      return rc;
++                      goto out_err;
+               usleep_range(priv->timeout_min, priv->timeout_max);
+       }
++      if (rc == -EAGAIN || rc == -EIO) {
++              dev_err(&chip->dev, "Exhausted %d tpm_tis_send_data retries\n", TPM_RETRY);
++              goto out_err;
++      }
++
+       /* go and do it */
+       rc = tpm_tis_write8(priv, TPM_STS(priv->locality), TPM_STS_GO);
+       if (rc < 0)
diff --git a/queue-7.0/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch b/queue-7.0/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch
new file mode 100644 (file)
index 0000000..ecac4ef
--- /dev/null
@@ -0,0 +1,44 @@
+From c424d2664f08c77f08b4580b5f0cbaabf7c229b2 Mon Sep 17 00:00:00 2001
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+Date: Thu, 9 Apr 2026 17:20:54 +0000
+Subject: tpm: Use kfree_sensitive() to free auth session in tpm_dev_release()
+
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+
+commit c424d2664f08c77f08b4580b5f0cbaabf7c229b2 upstream.
+
+tpm_dev_release() uses plain kfree() to free chip->auth, which contains
+sensitive cryptographic material including HMAC session keys, nonces,
+and passphrase data (struct tpm2_auth).
+
+Every other code path that frees this structure uses kfree_sensitive()
+to zero the memory before releasing it: both tpm2_end_auth_session()
+and tpm_buf_check_hmac_response() do so. The tpm_dev_release() path
+is the only one that does not, leaving key material in freed slab
+memory until it is eventually overwritten.
+
+Use kfree_sensitive() for consistency with the rest of the driver and
+to ensure session keys are scrubbed during device teardown.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: 699e3efd6c64 ("tpm: Add HMAC session start and end functions")
+Signed-off-by: Gunnar Kudrjavets <gunnarku@amazon.com>
+Reviewed-by: Justinien Bouron <jbouron@amazon.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm-chip.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/char/tpm/tpm-chip.c
++++ b/drivers/char/tpm/tpm-chip.c
+@@ -247,7 +247,7 @@ static void tpm_dev_release(struct devic
+       kfree(chip->work_space.context_buf);
+       kfree(chip->work_space.session_buf);
+ #ifdef CONFIG_TCG_TPM2_HMAC
+-      kfree(chip->auth);
++      kfree_sensitive(chip->auth);
+ #endif
+       kfree(chip);
+ }
diff --git a/queue-7.0/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch b/queue-7.0/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch
new file mode 100644 (file)
index 0000000..4b6f220
--- /dev/null
@@ -0,0 +1,57 @@
+From f0f75a3d98b7959a8677b6363e23190f3018636b Mon Sep 17 00:00:00 2001
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+Date: Wed, 15 Apr 2026 03:00:03 +0300
+Subject: tpm2-sessions: Fix missing tpm_buf_destroy() in tpm2_read_public()
+
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+
+commit f0f75a3d98b7959a8677b6363e23190f3018636b upstream.
+
+tpm2_read_public() calls tpm_buf_init() but fails to call
+tpm_buf_destroy() on two exit paths, leaking a page allocation:
+
+1. When name_size() returns an error (unrecognized hash algorithm),
+   the function returns directly without destroying the buffer.
+
+2. On the success path, the buffer is never destroyed before
+   returning.
+
+All other error paths in the function correctly call
+tpm_buf_destroy() before returning.
+
+Fix both by adding the missing tpm_buf_destroy() calls.
+
+Cc: stable@vger.kernel.org # v6.19+
+Fixes: bda1cbf73c6e ("tpm2-sessions: Fix tpm2_read_public range checks")
+Signed-off-by: Gunnar Kudrjavets <gunnarku@amazon.com>
+Reviewed-by: Justinien Bouron <jbouron@amazon.com>
+Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm2-sessions.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/char/tpm/tpm2-sessions.c
++++ b/drivers/char/tpm/tpm2-sessions.c
+@@ -203,8 +203,10 @@ static int tpm2_read_public(struct tpm_c
+       rc = tpm_buf_read_u16(&buf, &offset);
+       name_size_alg = name_size(&buf.data[offset]);
+-      if (name_size_alg < 0)
++      if (name_size_alg < 0) {
++              tpm_buf_destroy(&buf);
+               return name_size_alg;
++      }
+       if (rc != name_size_alg) {
+               tpm_buf_destroy(&buf);
+@@ -217,6 +219,7 @@ static int tpm2_read_public(struct tpm_c
+       }
+       memcpy(name, &buf.data[offset], rc);
++      tpm_buf_destroy(&buf);
+       return name_size_alg;
+ }
+ #endif /* CONFIG_TCG_TPM2_HMAC */
diff --git a/queue-7.0/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch b/queue-7.0/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch
new file mode 100644 (file)
index 0000000..0995109
--- /dev/null
@@ -0,0 +1,60 @@
+From 161ce69c2c89781784b945d8e281ff2da9dede9c Mon Sep 17 00:00:00 2001
+From: "Denis M. Karpov" <komlomal@gmail.com>
+Date: Thu, 9 Apr 2026 13:33:45 +0300
+Subject: userfaultfd: allow registration of ranges below mmap_min_addr
+
+From: Denis M. Karpov <komlomal@gmail.com>
+
+commit 161ce69c2c89781784b945d8e281ff2da9dede9c upstream.
+
+The current implementation of validate_range() in fs/userfaultfd.c
+performs a hard check against mmap_min_addr.  This is redundant because
+UFFDIO_REGISTER operates on memory ranges that must already be backed by a
+VMA.
+
+Enforcing mmap_min_addr or capability checks again in userfaultfd is
+unnecessary and prevents applications like binary compilers from using
+UFFD for valid memory regions mapped by application.
+
+Remove the redundant check for mmap_min_addr.
+
+We started using UFFD instead of the classic mprotect approach in the
+binary translator to track application writes.  During development, we
+encountered this bug.  The translator cannot control where the translated
+application chooses to map its memory and if the app requires a
+low-address area, UFFD fails, whereas mprotect would work just fine.  I
+believe this is a genuine logic bug rather than an improvement, and I
+would appreciate including the fix in stable.
+
+Link: https://lore.kernel.org/20260409103345.15044-1-komlomal@gmail.com
+Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
+Signed-off-by: Denis M. Karpov <komlomal@gmail.com>
+Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
+Acked-by: Harry Yoo (Oracle) <harry@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jann Horn <jannh@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/userfaultfd.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -1238,8 +1238,6 @@ static __always_inline int validate_unal
+               return -EINVAL;
+       if (!len)
+               return -EINVAL;
+-      if (start < mmap_min_addr)
+-              return -EINVAL;
+       if (start >= task_size)
+               return -EINVAL;
+       if (len > task_size - start)
diff --git a/queue-7.0/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch b/queue-7.0/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch
new file mode 100644 (file)
index 0000000..0339693
--- /dev/null
@@ -0,0 +1,32 @@
+From 29a7b2614357393b176ef06ba5bc3ff5afc8df69 Mon Sep 17 00:00:00 2001
+From: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
+Date: Wed, 1 Apr 2026 12:02:41 +0800
+Subject: xfs: fix a resource leak in xfs_alloc_buftarg()
+
+From: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
+
+commit 29a7b2614357393b176ef06ba5bc3ff5afc8df69 upstream.
+
+In the error path, call fs_put_dax() to drop the DAX
+device reference.
+
+Fixes: 6f643c57d57c ("xfs: implement ->notify_failure() for XFS")
+Cc: stable@vger.kernel.org
+Signed-off-by: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1831,6 +1831,7 @@ xfs_alloc_buftarg(
+       return btp;
+ error_free:
++      fs_put_dax(btp->bt_daxdev, mp);
+       kfree(btp);
+       return ERR_PTR(error);
+ }
diff --git a/queue-7.0/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch b/queue-7.0/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch
new file mode 100644 (file)
index 0000000..4d01219
--- /dev/null
@@ -0,0 +1,104 @@
+From 181ea4e2de422aa0a66f355bd59bccccdd169826 Mon Sep 17 00:00:00 2001
+From: Hans Holmberg <hans.holmberg@wdc.com>
+Date: Wed, 25 Mar 2026 13:43:12 +0100
+Subject: xfs: start gc on zonegc_low_space attribute updates
+
+From: Hans Holmberg <hans.holmberg@wdc.com>
+
+commit 181ea4e2de422aa0a66f355bd59bccccdd169826 upstream.
+
+Start gc if the agressiveness of zone garbage collection is changed
+by the user (if the file system is not read only).
+
+Without this change, the new setting will not be taken into account
+until the gc thread is woken up by e.g. a write.
+
+Cc: stable@vger.kernel.org # v6.15
+Fixes: 845abeb1f06a8a ("xfs: add tunable threshold parameter for triggering zone GC")
+Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_sysfs.c      |    7 ++++++-
+ fs/xfs/xfs_zone_alloc.h |    4 ++++
+ fs/xfs/xfs_zone_gc.c    |   17 +++++++++++++++++
+ 3 files changed, 27 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_sysfs.c
++++ b/fs/xfs/xfs_sysfs.c
+@@ -14,6 +14,7 @@
+ #include "xfs_log_priv.h"
+ #include "xfs_mount.h"
+ #include "xfs_zones.h"
++#include "xfs_zone_alloc.h"
+ struct xfs_sysfs_attr {
+       struct attribute attr;
+@@ -724,6 +725,7 @@ zonegc_low_space_store(
+       const char              *buf,
+       size_t                  count)
+ {
++      struct xfs_mount        *mp = zoned_to_mp(kobj);
+       int                     ret;
+       unsigned int            val;
+@@ -734,7 +736,10 @@ zonegc_low_space_store(
+       if (val > 100)
+               return -EINVAL;
+-      zoned_to_mp(kobj)->m_zonegc_low_space = val;
++      if (mp->m_zonegc_low_space != val) {
++              mp->m_zonegc_low_space = val;
++              xfs_zone_gc_wakeup(mp);
++      }
+       return count;
+ }
+--- a/fs/xfs/xfs_zone_alloc.h
++++ b/fs/xfs/xfs_zone_alloc.h
+@@ -51,6 +51,7 @@ int xfs_mount_zones(struct xfs_mount *mp
+ void xfs_unmount_zones(struct xfs_mount *mp);
+ void xfs_zone_gc_start(struct xfs_mount *mp);
+ void xfs_zone_gc_stop(struct xfs_mount *mp);
++void xfs_zone_gc_wakeup(struct xfs_mount *mp);
+ #else
+ static inline int xfs_mount_zones(struct xfs_mount *mp)
+ {
+@@ -65,6 +66,9 @@ static inline void xfs_zone_gc_start(str
+ static inline void xfs_zone_gc_stop(struct xfs_mount *mp)
+ {
+ }
++static inline void xfs_zone_gc_wakeup(struct xfs_mount *mp)
++{
++}
+ #endif /* CONFIG_XFS_RT */
+ #endif /* _XFS_ZONE_ALLOC_H */
+--- a/fs/xfs/xfs_zone_gc.c
++++ b/fs/xfs/xfs_zone_gc.c
+@@ -1159,6 +1159,23 @@ xfs_zone_gc_stop(
+               kthread_park(mp->m_zone_info->zi_gc_thread);
+ }
++void
++xfs_zone_gc_wakeup(
++      struct xfs_mount        *mp)
++{
++      struct super_block      *sb = mp->m_super;
++
++      /*
++       * If we are unmounting the file system we must not try to
++       * wake gc as m_zone_info might have been freed already.
++       */
++      if (down_read_trylock(&sb->s_umount)) {
++              if (!xfs_is_readonly(mp))
++                      wake_up_process(mp->m_zone_info->zi_gc_thread);
++              up_read(&sb->s_umount);
++      }
++}
++
+ int
+ xfs_zone_gc_mount(
+       struct xfs_mount        *mp)